From 00920afbce80c4836a14cf35aedf7daa71b47446 Mon Sep 17 00:00:00 2001 From: bharnden Date: Wed, 26 Sep 2018 14:20:19 -0700 Subject: [PATCH] #193 updates to service validation, will retry validation for validation_timer period, instead of just waiting, using configurable validation_period --- daemon/core/service.py | 44 ++++++++++++----- daemon/examples/myservices/sample.py | 7 ++- docs/exampleservice.html | 72 +++++++++++++++++----------- 3 files changed, 79 insertions(+), 44 deletions(-) diff --git a/daemon/core/service.py b/daemon/core/service.py index ec1c700c..5c2b67d9 100644 --- a/daemon/core/service.py +++ b/daemon/core/service.py @@ -269,7 +269,7 @@ class ServiceManager(object): cls.add(service) except ValueError as e: service_errors.append(service.name) - logger.warn("not loading service: %s", e.message) + logger.warn("not loading service: %s", e) return service_errors @@ -462,7 +462,8 @@ class CoreServices(object): :param CoreService service: service to start :return: nothing """ - logger.info("starting node(%s) service(%s)", node.name, service.name) + logger.info("starting node(%s) service(%s) validation(%s)", node.name, service.name, + service.validation_mode.name) # create service directories for directory in service.dirs: @@ -477,14 +478,26 @@ class CoreServices(object): if status: raise ServiceBootError("node(%s) service(%s) error during startup" % (node.name, service.name)) - # wait for time if provided, default to a time previously used to provide a small buffer - time.sleep(0.125) - if service.validation_timer: - time.sleep(service.validation_timer) + # blocking mode is finished + if wait: + return + + # timer mode, sleep and return + if service.validation_mode == ServiceMode.TIMER: + time.sleep(service.validation_timer) + # non-blocking, attempt to validate periodically, up to validation_timer time + elif service.validation_mode == ServiceMode.NON_BLOCKING: + start = time.time() + while True: + status = self.validate_service(node, service) + if not status: + break + + if time.time() - start > service.validation_timer: + break + + time.sleep(service.validation_period) - # run validation commands, if present and not timer mode - if service.validation_mode != ServiceMode.TIMER: - status = self.validate_service(node, service) if status: raise ServiceBootError("node(%s) service(%s) failed validation" % (node.name, service.name)) @@ -528,9 +541,11 @@ class CoreServices(object): logger.debug("validating service(%s) using: %s", service.name, cmd) try: node.check_cmd(cmd) - except CoreCommandError: - logger.exception("node(%s) service(%s) validate command failed", node.name, service.name) + except CoreCommandError as e: + logger.error("node(%s) service(%s) validate failed", node.name, service.name) + logger.error("cmd(%s): %s", e.cmd, e.output) status = -1 + break return status @@ -753,8 +768,11 @@ class CoreService(object): # validation mode, used to determine startup success validation_mode = ServiceMode.NON_BLOCKING - # time to wait for determining if service started successfully - validation_timer = 0 + # time to wait in seconds for determining if service started successfully + validation_timer = 5 + + # validation period in seconds, how frequent validation is attempted + validation_period = 0.5 # metadata associated with this service meta = None diff --git a/daemon/examples/myservices/sample.py b/daemon/examples/myservices/sample.py index a9c0a362..843e59ef 100644 --- a/daemon/examples/myservices/sample.py +++ b/daemon/examples/myservices/sample.py @@ -28,12 +28,15 @@ class MyService(CoreService): # Commands used to validate that a service was started, any non-zero exit code will cause a failure. validate = () # Validation mode, used to determine startup success. + # # * NON_BLOCKING - runs startup commands, and validates success with validation commands # * BLOCKING - runs startup commands, and validates success with the startup commands themselves # * TIMER - runs startup commands, and validates success by waiting for "validation_timer" alone validation_mode = ServiceMode.NON_BLOCKING - # Time for a service to wait before running validation commands or determining success in TIMER mode. - validation_timer = 0 + # Time in seconds for a service to wait for validation, before determining success in TIMER/NON_BLOCKING modes. + validation_timer = 5 + # Period in seconds to wait before retrying validation, only used in NON_BLOCKING mode. + validation_period = 0.5 # Shutdown commands to stop this service. shutdown = () diff --git a/docs/exampleservice.html b/docs/exampleservice.html index 50b6e2bb..cddb18d4 100644 --- a/docs/exampleservice.html +++ b/docs/exampleservice.html @@ -114,11 +114,11 @@
#
-

Files that this service will generate, without a full path this file goes in the node's directory. +

Files that this service will generate, without a full path this file goes in the node’s directory. e.g. /tmp/pycore.12345/n1.conf/myfile

-
    configs = ("sh myservice1.sh", "sh myservice2.sh")
+
    configs = ("myservice1.sh", "myservice2.sh")
@@ -151,10 +151,12 @@ e.g. /tmp/pycore.12345/n1.conf/myfile

#
-

Validation mode, used to determine startup success. - NON_BLOCKING - runs startup commands, and validates success with validation commands - BLOCKING - runs startup commands, and validates success with the startup commands themselves -* TIMER - runs startup commands, and validates success by waiting for "validation_timer" alone

+

Validation mode, used to determine startup success.

+
    validation_mode = ServiceMode.NON_BLOCKING
@@ -166,10 +168,10 @@ e.g. /tmp/pycore.12345/n1.conf/myfile

#
-

Time for a service to wait before running validation commands or determining success in TIMER mode.

+

Time in seconds for a service to wait for validation, before determining success in TIMER/NON_BLOCKING modes.

-
    validation_timer = 0
+
    validation_timer = 5
@@ -178,10 +180,10 @@ e.g. /tmp/pycore.12345/n1.conf/myfile

#
-

Shutdown commands to stop this service.

+

Period in seconds to wait before retrying validation, only used in NON_BLOCKING mode.

-
    shutdown = ()
+
    validation_period = 0.5
@@ -190,6 +192,18 @@ e.g. /tmp/pycore.12345/n1.conf/myfile

#
+

Shutdown commands to stop this service.

+ +
+
    shutdown = ()
+
+ +
+
+
+
+ # +

On Load

@@ -198,10 +212,10 @@ e.g. /tmp/pycore.12345/n1.conf/myfile

-
+
- # + #

Provides a way to run some arbitrary logic when the service is loaded, possibly to help facilitate dynamic settings for the environment.

@@ -211,10 +225,10 @@ dynamic settings for the environment.

-
+
- # + #

Get Configs

@@ -224,10 +238,10 @@ dynamic settings for the environment.

-
+
- # + #

Provides a way to dynamically generate the config files from the node a service will run. Defaults to the class definition and can be left out entirely if not needed.

@@ -237,10 +251,10 @@ Defaults to the class definition and can be left out entirely if not needed.

-
+
- # + #

Generate Config

@@ -250,13 +264,13 @@ Defaults to the class definition and can be left out entirely if not needed.

-
+
- # + #

Returns a string representation for a file, given the node the service is starting on the config filename -that this information will be used for. This must be defined, if "configs" are defined.

+that this information will be used for. This must be defined, if “configs” are defined.

        cfg = "#!/bin/sh\n"
@@ -272,10 +286,10 @@ that this information will be used for. This must be defined, if "configs" are d
     
-
+
- # + #

Get Startup

@@ -285,10 +299,10 @@ that this information will be used for. This must be defined, if "configs" are d
-
+
- # + #

Provides a way to dynamically generate the startup commands from the node a service will run. Defaults to the class definition and can be left out entirely if not needed.

@@ -298,10 +312,10 @@ Defaults to the class definition and can be left out entirely if not needed.

-
+
- # + #

Get Validate

@@ -311,10 +325,10 @@ Defaults to the class definition and can be left out entirely if not needed.

-
+
- # + #

Provides a way to dynamically generate the validate commands from the node a service will run. Defaults to the class definition and can be left out entirely if not needed.