From 00920afbce80c4836a14cf35aedf7daa71b47446 Mon Sep 17 00:00:00 2001
From: bharnden
Date: Wed, 26 Sep 2018 14:20:19 -0700
Subject: [PATCH] #193 updates to service validation, will retry validation for
validation_timer period, instead of just waiting, using configurable
validation_period
---
daemon/core/service.py | 44 ++++++++++++-----
daemon/examples/myservices/sample.py | 7 ++-
docs/exampleservice.html | 72 +++++++++++++++++-----------
3 files changed, 79 insertions(+), 44 deletions(-)
diff --git a/daemon/core/service.py b/daemon/core/service.py
index ec1c700c..5c2b67d9 100644
--- a/daemon/core/service.py
+++ b/daemon/core/service.py
@@ -269,7 +269,7 @@ class ServiceManager(object):
cls.add(service)
except ValueError as e:
service_errors.append(service.name)
- logger.warn("not loading service: %s", e.message)
+ logger.warn("not loading service: %s", e)
return service_errors
@@ -462,7 +462,8 @@ class CoreServices(object):
:param CoreService service: service to start
:return: nothing
"""
- logger.info("starting node(%s) service(%s)", node.name, service.name)
+ logger.info("starting node(%s) service(%s) validation(%s)", node.name, service.name,
+ service.validation_mode.name)
# create service directories
for directory in service.dirs:
@@ -477,14 +478,26 @@ class CoreServices(object):
if status:
raise ServiceBootError("node(%s) service(%s) error during startup" % (node.name, service.name))
- # wait for time if provided, default to a time previously used to provide a small buffer
- time.sleep(0.125)
- if service.validation_timer:
- time.sleep(service.validation_timer)
+ # blocking mode is finished
+ if wait:
+ return
+
+ # timer mode, sleep and return
+ if service.validation_mode == ServiceMode.TIMER:
+ time.sleep(service.validation_timer)
+ # non-blocking, attempt to validate periodically, up to validation_timer time
+ elif service.validation_mode == ServiceMode.NON_BLOCKING:
+ start = time.time()
+ while True:
+ status = self.validate_service(node, service)
+ if not status:
+ break
+
+ if time.time() - start > service.validation_timer:
+ break
+
+ time.sleep(service.validation_period)
- # run validation commands, if present and not timer mode
- if service.validation_mode != ServiceMode.TIMER:
- status = self.validate_service(node, service)
if status:
raise ServiceBootError("node(%s) service(%s) failed validation" % (node.name, service.name))
@@ -528,9 +541,11 @@ class CoreServices(object):
logger.debug("validating service(%s) using: %s", service.name, cmd)
try:
node.check_cmd(cmd)
- except CoreCommandError:
- logger.exception("node(%s) service(%s) validate command failed", node.name, service.name)
+ except CoreCommandError as e:
+ logger.error("node(%s) service(%s) validate failed", node.name, service.name)
+ logger.error("cmd(%s): %s", e.cmd, e.output)
status = -1
+ break
return status
@@ -753,8 +768,11 @@ class CoreService(object):
# validation mode, used to determine startup success
validation_mode = ServiceMode.NON_BLOCKING
- # time to wait for determining if service started successfully
- validation_timer = 0
+ # time to wait in seconds for determining if service started successfully
+ validation_timer = 5
+
+ # validation period in seconds, how frequent validation is attempted
+ validation_period = 0.5
# metadata associated with this service
meta = None
diff --git a/daemon/examples/myservices/sample.py b/daemon/examples/myservices/sample.py
index a9c0a362..843e59ef 100644
--- a/daemon/examples/myservices/sample.py
+++ b/daemon/examples/myservices/sample.py
@@ -28,12 +28,15 @@ class MyService(CoreService):
# Commands used to validate that a service was started, any non-zero exit code will cause a failure.
validate = ()
# Validation mode, used to determine startup success.
+ #
# * NON_BLOCKING - runs startup commands, and validates success with validation commands
# * BLOCKING - runs startup commands, and validates success with the startup commands themselves
# * TIMER - runs startup commands, and validates success by waiting for "validation_timer" alone
validation_mode = ServiceMode.NON_BLOCKING
- # Time for a service to wait before running validation commands or determining success in TIMER mode.
- validation_timer = 0
+ # Time in seconds for a service to wait for validation, before determining success in TIMER/NON_BLOCKING modes.
+ validation_timer = 5
+ # Period in seconds to wait before retrying validation, only used in NON_BLOCKING mode.
+ validation_period = 0.5
# Shutdown commands to stop this service.
shutdown = ()
diff --git a/docs/exampleservice.html b/docs/exampleservice.html
index 50b6e2bb..cddb18d4 100644
--- a/docs/exampleservice.html
+++ b/docs/exampleservice.html
@@ -114,11 +114,11 @@
- Files that this service will generate, without a full path this file goes in the node's directory.
+
Files that this service will generate, without a full path this file goes in the node’s directory.
e.g. /tmp/pycore.12345/n1.conf/myfile
-
configs = ("sh myservice1.sh", "sh myservice2.sh")
+
configs = ("myservice1.sh", "myservice2.sh")
@@ -151,10 +151,12 @@ e.g. /tmp/pycore.12345/n1.conf/myfile
- Validation mode, used to determine startup success.
- NON_BLOCKING - runs startup commands, and validates success with validation commands
- BLOCKING - runs startup commands, and validates success with the startup commands themselves
-* TIMER - runs startup commands, and validates success by waiting for "validation_timer" alone
+ Validation mode, used to determine startup success.
+
+- NON_BLOCKING - runs startup commands, and validates success with validation commands
+- BLOCKING - runs startup commands, and validates success with the startup commands themselves
+- TIMER - runs startup commands, and validates success by waiting for “validation_timer” alone
+
validation_mode = ServiceMode.NON_BLOCKING
@@ -166,10 +168,10 @@ e.g. /tmp/pycore.12345/n1.conf/myfile
-
Time for a service to wait before running validation commands or determining success in TIMER mode.
+
Time in seconds for a service to wait for validation, before determining success in TIMER/NON_BLOCKING modes.
@@ -178,10 +180,10 @@ e.g. /tmp/pycore.12345/n1.conf/myfile
- Shutdown commands to stop this service.
+ Period in seconds to wait before retrying validation, only used in NON_BLOCKING mode.
@@ -190,6 +192,18 @@ e.g. /tmp/pycore.12345/n1.conf/myfile
+ Shutdown commands to stop this service.
+
+
+
+
+
+
@@ -198,10 +212,10 @@ e.g. /tmp/pycore.12345/n1.conf/myfile
-
+
Provides a way to run some arbitrary logic when the service is loaded, possibly to help facilitate
dynamic settings for the environment.
@@ -211,10 +225,10 @@ dynamic settings for the environment.
-
+
@@ -224,10 +238,10 @@ dynamic settings for the environment.
-
+
Provides a way to dynamically generate the config files from the node a service will run.
Defaults to the class definition and can be left out entirely if not needed.
@@ -237,10 +251,10 @@ Defaults to the class definition and can be left out entirely if not needed.
-
+
@@ -250,13 +264,13 @@ Defaults to the class definition and can be left out entirely if not needed.
-
+
Returns a string representation for a file, given the node the service is starting on the config filename
-that this information will be used for. This must be defined, if "configs" are defined.
+that this information will be used for. This must be defined, if “configs” are defined.
cfg = "#!/bin/sh\n"
@@ -272,10 +286,10 @@ that this information will be used for. This must be defined, if "configs" are d
-
+
@@ -285,10 +299,10 @@ that this information will be used for. This must be defined, if "configs" are d
-
+
Provides a way to dynamically generate the startup commands from the node a service will run.
Defaults to the class definition and can be left out entirely if not needed.
@@ -298,10 +312,10 @@ Defaults to the class definition and can be left out entirely if not needed.
-
+
@@ -311,10 +325,10 @@ Defaults to the class definition and can be left out entirely if not needed.
-
+
Provides a way to dynamically generate the validate commands from the node a service will run.
Defaults to the class definition and can be left out entirely if not needed.