Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reload: fix hotreload when running on windows as service. #9908

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/fluent-bit/flb_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define FLB_CONFIG_H

#include <time.h>
#include <signal.h>

#include <fluent-bit/flb_info.h>
#include <fluent-bit/flb_pipe.h>
Expand Down Expand Up @@ -266,6 +267,8 @@ struct flb_config {
int enable_hot_reload;
int ensure_thread_safety_on_hot_reloading;
unsigned int hot_reloaded_count;
unsigned int hot_reloaded_failures_count;
volatile sig_atomic_t bin_restarting;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Originally this code was used to improve the reliability of the reload API so that it could be used in testing:

  • bin_restarting was refactored out of the src/fluent-bit.c file to expose it to the reload API to better track the reload state
  • hot_reloaded_failures_count was added to track failed reloads.

The hot_reloaded_failures_count is not crucial to this patch and can be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@edsiper should I drop hot_reloaded_failures_count and the relevant code?

int shutdown_by_hot_reloading;
int hot_reloading;

Expand Down
11 changes: 9 additions & 2 deletions plugins/in_calyptia_fleet/in_calyptia_fleet.c
Original file line number Diff line number Diff line change
Expand Up @@ -468,12 +468,19 @@ static void *do_reload(void *data)
}
reload->flb->config->conf_path_file = reload->cfg_path;

flb_free(reload);
sleep(5);
#ifndef FLB_SYSTEM_WINDOWS
flb_free(reload);
kill(getpid(), SIGHUP);
#else
GenerateConsoleCtrlEvent(1 /* CTRL_BREAK_EVENT_1 */, 0);
/* using the refactor that placed `bin_restarting` inside
* `flb_config` to use it as a messaging mechanism instead of
* GenerateConsoleCtrlEvent to overcome the fact that windows
* services do not have a console and therefore cannot
* react to console events or handle them (fixes sc-112185).
*/
reload->flb->config->bin_restarting = FLB_RELOAD_IN_PROGRESS;
flb_free(reload);
#endif
return NULL;
}
Expand Down
21 changes: 21 additions & 0 deletions src/flb_reload.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,14 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)

if (ctx == NULL) {
flb_error("[reload] given flb context is NULL");
ctx->config->hot_reloaded_failures_count++;
return FLB_RELOAD_INVALID_CONTEXT;
}

old_config = ctx->config;
if (old_config->enable_hot_reload != FLB_TRUE) {
flb_warn("[reload] hot reloading is not enabled");
old_config->hot_reloaded_failures_count++;
return FLB_RELOAD_NOT_ENABLED;
}

Expand All @@ -404,6 +406,7 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)
*/
new_cf = flb_cf_create();
if (!new_cf) {
old_config->hot_reloaded_failures_count++;
return FLB_RELOAD_HALTED;
}

Expand All @@ -421,6 +424,7 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)
}
flb_cf_destroy(new_cf);
flb_error("[reload] reconstruct cf failed");
old_config->hot_reloaded_failures_count++;
return FLB_RELOAD_HALTED;
}
}
Expand All @@ -434,6 +438,7 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)
flb_cf_destroy(new_cf);
flb_error("[reload] creating flb context is failed. Reloading is halted");

old_config->hot_reloaded_failures_count++;
return FLB_RELOAD_HALTED;
}

Expand Down Expand Up @@ -462,6 +467,10 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)

if (!new_cf) {
flb_sds_destroy(file);
flb_destroy(new_ctx);

old_config->hot_reloading = FLB_FALSE;
old_config->hot_reloaded_failures_count++;

return FLB_RELOAD_HALTED;
}
Expand All @@ -476,6 +485,10 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)
}
flb_cf_destroy(new_cf);
flb_destroy(new_ctx);

old_config->hot_reloading = FLB_FALSE;
old_config->hot_reloaded_failures_count++;

flb_error("[reload] reloaded config is invalid. Reloading is halted");

return FLB_RELOAD_HALTED;
Expand All @@ -489,6 +502,9 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)
flb_cf_destroy(new_cf);
flb_destroy(new_ctx);

old_config->hot_reloading = FLB_FALSE;
old_config->hot_reloaded_failures_count++;

flb_error("[reload] reloaded config format is invalid. Reloading is halted");

return FLB_RELOAD_HALTED;
Expand All @@ -501,6 +517,9 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)
flb_cf_destroy(new_cf);
flb_destroy(new_ctx);

old_config->hot_reloading = FLB_FALSE;
old_config->hot_reloaded_failures_count++;

flb_error("[reload] reloaded config is invalid. Reloading is halted");

return FLB_RELOAD_HALTED;
Expand Down Expand Up @@ -528,6 +547,8 @@ int flb_reload(flb_ctx_t *ctx, struct flb_cf *cf_opts)

if (ret != 0) {
flb_destroy(new_ctx);
old_config->hot_reloading = FLB_FALSE;
old_config->hot_reloaded_failures_count++;

flb_error("[reload] loaded configuration contains error(s). Reloading is aborted");

Expand Down
71 changes: 43 additions & 28 deletions src/fluent-bit.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ extern void win32_started(void);
flb_ctx_t *ctx;
struct flb_config *config;
volatile sig_atomic_t exit_signal = 0;
volatile sig_atomic_t flb_bin_restarting = FLB_RELOAD_IDLE;
pthread_mutex_t reload_mtx;

#ifdef FLB_HAVE_LIBBACKTRACE
struct flb_stacktrace flb_st;
Expand Down Expand Up @@ -606,11 +606,14 @@ static void flb_signal_handler(int signal)
break;
#ifndef FLB_HAVE_STATIC_CONF
case SIGHUP:
if (flb_bin_restarting == FLB_RELOAD_IDLE) {
flb_bin_restarting = FLB_RELOAD_IN_PROGRESS;
}
else {
flb_utils_error(FLB_ERR_RELOADING_IN_PROGRESS);
if (pthread_mutex_trylock(&reload_mtx) == 0) {
if (config->bin_restarting == FLB_RELOAD_IDLE) {
config->bin_restarting = FLB_RELOAD_IN_PROGRESS;
}
else {
flb_utils_error(FLB_ERR_RELOADING_IN_PROGRESS);
}
pthread_mutex_unlock(&reload_mtx);
}
break;
#endif
Expand Down Expand Up @@ -647,16 +650,19 @@ static BOOL WINAPI flb_console_handler(DWORD evType)
handler_signal = 2;
break;
case 1 /* CTRL_BREAK_EVENT_1 */:
if (flb_bin_restarting == FLB_RELOAD_IDLE) {
flb_bin_restarting = FLB_RELOAD_IN_PROGRESS;
/* signal the main loop to execute reload. this is necessary since
* all signal handlers in win32 are executed on their own thread.
*/
handler_signal = 1;
flb_bin_restarting = FLB_RELOAD_IDLE;
}
else {
flb_utils_error(FLB_ERR_RELOADING_IN_PROGRESS);
if (pthread_mutex_trylock(&reload_mtx) == 0) {
if (config->bin_restarting == FLB_RELOAD_IDLE) {
config->bin_restarting = FLB_RELOAD_IN_PROGRESS;
/* signal the main loop to execute reload. this is necessary since
* all signal handlers in win32 are executed on their own thread.
*/
handler_signal = 1;
config->bin_restarting = FLB_RELOAD_IDLE;
}
else {
flb_utils_error(FLB_ERR_RELOADING_IN_PROGRESS);
}
pthread_mutex_unlock(&reload_mtx);
}
break;
}
Expand Down Expand Up @@ -1384,6 +1390,8 @@ int flb_main(int argc, char **argv)
}
#endif

pthread_mutex_init(&reload_mtx, NULL);

while (ctx->status == FLB_LIB_OK && exit_signal == 0) {
sleep(1);

Expand All @@ -1404,30 +1412,35 @@ int flb_main(int argc, char **argv)
#ifdef FLB_SYSTEM_WINDOWS
flb_console_handler_set_ctx(ctx, cf_opts);
#endif
if (flb_bin_restarting == FLB_RELOAD_IN_PROGRESS) {
pthread_mutex_lock(&reload_mtx);
if (config->bin_restarting == FLB_RELOAD_IN_PROGRESS) {
/* reload by using same config files/path */
ret = flb_reload(ctx, cf_opts);
if (ret == 0) {
ctx = flb_context_get();
flb_bin_restarting = FLB_RELOAD_IDLE;
config = ctx->config;
config->bin_restarting = FLB_RELOAD_IDLE;
}
else {
flb_bin_restarting = ret;
config->bin_restarting = ret;
}
}

if (flb_bin_restarting == FLB_RELOAD_HALTED) {
if (config->bin_restarting == FLB_RELOAD_HALTED) {
sleep(1);
flb_bin_restarting = FLB_RELOAD_IDLE;
config->bin_restarting = FLB_RELOAD_IDLE;
}
pthread_mutex_unlock(&reload_mtx);
}

if (exit_signal) {
flb_signal_exit(exit_signal);
}
if (flb_bin_restarting != FLB_RELOAD_ABORTED) {
pthread_mutex_lock(&reload_mtx);
if (config->bin_restarting != FLB_RELOAD_ABORTED) {
ret = ctx->config->exit_status_code;
}
pthread_mutex_unlock(&reload_mtx);

cf_opts = flb_cf_context_get();

Expand All @@ -1449,13 +1462,15 @@ int flb_main(int argc, char **argv)
}
#endif

if (flb_bin_restarting == FLB_RELOAD_ABORTED) {
pthread_mutex_lock(&reload_mtx);
if (config->bin_restarting == FLB_RELOAD_ABORTED) {
fprintf(stderr, "reloading is aborted and exit\n");
}
else {
flb_stop(ctx);
flb_destroy(ctx);
}
}
else {
flb_stop(ctx);
flb_destroy(ctx);
}
pthread_mutex_unlock(&reload_mtx);

return ret;
}
Expand Down
12 changes: 6 additions & 6 deletions src/http_server/api/v2/reload.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ static void handle_reload_request(mk_request_t *request, struct flb_config *conf
http_status = 400;
}
else {
ret = GenerateConsoleCtrlEvent(1 /* CTRL_BREAK_EVENT_1 */, 0);
if (ret == 0) {
mk_http_status(request, 500);
mk_http_done(request);
return;
}
/* we are using the `bin_restarting` property inside `flb_config` to
* use it as a messaging mechanism instead of GenerateConsoleCtrlEvent
* to overcome the fact that windows services do not have a console and
* therefore cannot react to console events or handle them.
*/
config->bin_restarting = FLB_RELOAD_IN_PROGRESS;

msgpack_pack_str(&mp_pck, 4);
msgpack_pack_str_body(&mp_pck, "done", 4);
Expand Down
Loading