From 6be9a266ea73466840fa6571f86807069e63340d Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 27 May 2022 13:08:04 +0200 Subject: [PATCH 1/2] testbench: add testcase for frequent imfile input file change PoC test, yet incomplete This patch not only contains the new test but also supporting changes to testbench tooling. see also: https://github.com/rsyslog/rsyslog/issues/4797 --- tests/Makefile.am | 2 + tests/imfile-logrotate-async.sh | 79 +++++++++++++++++++++++++ tests/inputfilegen.c | 102 ++++++++++++++++++++++++++++++-- 3 files changed, 178 insertions(+), 5 deletions(-) create mode 100755 tests/imfile-logrotate-async.sh diff --git a/tests/Makefile.am b/tests/Makefile.am index a807133fb5..cd3b5b254f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1565,6 +1565,7 @@ TESTS += \ imfile-symlink.sh \ imfile-symlink-multi.sh \ imfile-logrotate.sh \ + imfile-logrotate-async.sh \ imfile-logrotate-multiple.sh \ imfile-logrotate-copytruncate.sh \ imfile-logrotate-nocopytruncate.sh \ @@ -2575,6 +2576,7 @@ EXTRA_DIST= \ imfile-symlink.sh \ imfile-symlink-multi.sh \ imfile-logrotate.sh \ + imfile-logrotate-async.sh \ imfile-logrotate-copytruncate.sh \ imfile-logrotate-nocopytruncate.sh \ imfile-logrotate-multiple.sh \ diff --git a/tests/imfile-logrotate-async.sh b/tests/imfile-logrotate-async.sh new file mode 100755 index 0000000000..1210b201c5 --- /dev/null +++ b/tests/imfile-logrotate-async.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# This is part of the rsyslog testbench, licensed under ASL 2.0 +. $srcdir/diag.sh check-inotify-only +. ${srcdir:=.}/diag.sh init +check_command_available logrotate +export NUMMESSAGES=10000 +export RETRIES=50 + +# Write logrotate config file +echo '"./'$RSYSLOG_DYNNAME'.input*.log" +{ + #daily + rotate 60 + missingok + notifempty + sharedscripts + postrotate + kill -HUP $(cat '$RSYSLOG_DYNNAME'.inputfilegen_pid) + endscript + #olddir /logs/old + +}' > $RSYSLOG_DYNNAME.logrotate + + +generate_conf +add_conf ' +$WorkDirectory '$RSYSLOG_DYNNAME'.spool + +global( debug.whitelist="off" + debug.files=["rainerscript.c", "ratelimit.c", "ruleset.c", "main Q", "msg.c", "../action.c"] + ) + +module(load="../plugins/imfile/.libs/imfile" mode="inotify" PollingInterval="2") + +input(type="imfile" File="./'$RSYSLOG_DYNNAME'.input*.log" Tag="file:" + Severity="error" Facility="local7" addMetadata="on" reopenOnTruncate="on") + +$template outfmt,"%msg:F,58:2%\n" +if $msg contains "msgnum:" then + action(type="omfile" file="'$RSYSLOG_OUT_LOG'" template="outfmt") +' +startup + +./inputfilegen -m $NUMMESSAGES -S 5 -B 500 -f $RSYSLOG_DYNNAME.input.log & +INPUTFILEGEN_PID=$! +echo "$INPUTFILEGEN_PID" > $RSYSLOG_DYNNAME.inputfilegen_pid + + + +./msleep 1 +logrotate --state $RSYSLOG_DYNNAME.logrotate.state -f $RSYSLOG_DYNNAME.logrotate +./msleep 20 +echo INPUT FILES: +ls -li $RSYSLOG_DYNNAME.input* +logrotate --state $RSYSLOG_DYNNAME.logrotate.state -f $RSYSLOG_DYNNAME.logrotate +./msleep 20 +echo INPUT FILES: +ls -li $RSYSLOG_DYNNAME.input* +logrotate --state $RSYSLOG_DYNNAME.logrotate.state -f $RSYSLOG_DYNNAME.logrotate +echo INPUT FILES: +ls -li $RSYSLOG_DYNNAME.input* +echo ls ${RSYSLOG_DYNNAME}.spool: +ls -li ${RSYSLOG_DYNNAME}.spool +echo INPUT FILES: +ls -li $RSYSLOG_DYNNAME.input* + +# generate more input after logrotate into new logfile +#./inputfilegen -m $TESTMESSAGES -i $TESTMESSAGES >> $RSYSLOG_DYNNAME.input.1.log +#ls -l $RSYSLOG_DYNNAME.input* + +#msgcount=$((2* TESTMESSAGES)) +#wait_file_lines $RSYSLOG_OUT_LOG $msgcount $RETRIES +wait_file_lines + +shutdown_when_empty +wait_shutdown +seq_check +#seq_check 0 $TESTMESSAGESFULL +exit_test diff --git a/tests/inputfilegen.c b/tests/inputfilegen.c index 523f82a7a9..ef61229ab1 100644 --- a/tests/inputfilegen.c +++ b/tests/inputfilegen.c @@ -1,5 +1,5 @@ /* generate an input file suitable for use by the testbench - * Copyright (C) 2018 by Rainer Gerhards and Adiscon GmbH. + * Copyright (C) 2018-2022 by Rainer Gerhards and Adiscon GmbH. * Copyright (C) 2016-2018 by Pascal Withopf and Adiscon GmbH. * * usage: ./inputfilegen num-lines > file @@ -14,9 +14,20 @@ * This is especially useful with -s, as the testbench otherwise does * not know how to do a seq_check. To keep things flexible, can also be * used with -m (this may aid testbench framework generalization). + * -f outputfile + * Permits to write data to file "outputfile" instead of stdout. Also + * enables support for SIGHUP. + * -S sleep time + * ms to sleep between sending message bulks (bulks size given by -B) + * -B number of messages in bulk + * number of messages to send without sleeping as specified in -S. + * IGNORED IF -S is not also given! * Part of rsyslog, licensed under ASL 2.0 */ +#include "config.h" #include +#include +#include #include #include #include @@ -25,10 +36,60 @@ #else #include #endif +#if defined(__FreeBSD__) +#include +#else +#include +#endif +#if defined(HAVE_SYS_SELECT_H) +#include +#endif #define DEFMSGS 5 #define NOEXTRADATA -1 +static volatile int bHadHUP = 0; +static void +hdlr_sighup(int sig) +{ + fprintf(stderr, "had hup, sig %d\n", sig); + bHadHUP = 1; +} +static void +sighup_enable() +{ + struct sigaction sigAct; + memset(&sigAct, 0, sizeof (sigAct)); + sigemptyset(&sigAct.sa_mask); + sigAct.sa_handler = hdlr_sighup; + sigaction(SIGHUP, &sigAct, NULL); +} + +void msleep(const int sleepTime) +{ + struct timeval tvSelectTimeout; + + tvSelectTimeout.tv_sec = sleepTime / 1000; + tvSelectTimeout.tv_usec = (sleepTime % 1000) * 1000; /* micro seconds */ + if(select(0, NULL, NULL, NULL, &tvSelectTimeout) == -1) { + if(errno != EINTR) { + perror("select"); + exit(1); + } + } +} + +static FILE * +open_output(const char *fn) +{ + FILE *fh_output = fopen(fn, "w"); + if(fh_output == NULL) { + perror(fn); + exit(1); + } + return fh_output; +} + int main(int argc, char* argv[]) { int c, i; @@ -39,8 +100,13 @@ int main(int argc, char* argv[]) long long filesize = -1; char *extradata = NULL; const char *msgcntfile = NULL; + const char *outputfile = "-"; + FILE *fh_output; + int sleep_ms = 0; + int sleep_msgs = 0; /* messages to xmit between sleeps (if configured) */ + int ctr = 0; - while((c=getopt(argc, argv, "m:M:i:d:s:")) != -1) { + while((c=getopt(argc, argv, "m:M:i:d:s:f:S:B:")) != -1) { switch(c) { case 'm': nmsgs = atoi(optarg); @@ -57,6 +123,16 @@ int main(int argc, char* argv[]) case 's': filesize = atoll(optarg); break; + case 'S': + sleep_ms = atoi(optarg); + break; + case 'B': + sleep_msgs = atoi(optarg); + break; + case 'f': + outputfile = optarg; + sighup_enable(); + break; case ':': fprintf(stderr, "Option -%c requires an operand\n", optopt); errflg++; @@ -97,17 +173,33 @@ int main(int argc, char* argv[]) fclose(fh); } + if(strcmp(outputfile, "-")) { + fh_output = open_output(outputfile); + } else { + fh_output = stdout; + } + if(nchars != NOEXTRADATA) { extradata = (char *)malloc(nchars + 1); memset(extradata, 'X', nchars); extradata[nchars] = '\0'; } for(i = nmsgstart; i < (nmsgs+nmsgstart); ++i) { - printf("msgnum:%8.8d:", i); + if(sleep_ms > 0 && ctr++ >= sleep_msgs) { + msleep(sleep_ms); + ctr = 0; + } + if(bHadHUP) { + fclose(fh_output); + fh_output = open_output(outputfile); + fprintf(stderr, "%s reopened\n", outputfile); + bHadHUP = 0; + } + fprintf(fh_output, "msgnum:%8.8d:", i); if(nchars != NOEXTRADATA) { - printf("%s", extradata); + fprintf(fh_output, "%s", extradata); } - printf("\n"); + fprintf(fh_output, "\n"); } free(extradata); return 0; From 5520373959c4513fc60a1be8785ea45f926231b5 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 31 May 2022 12:03:18 +0200 Subject: [PATCH 2/2] imfile bugfix: message loss/duplication when monitored file is rotated When a to-be-monitored file is being rotated, some messages may be lost or duplicated. In case of duplication, many file lines may be duplicated depending on actual timing. The whole bug was primarily timing depenedent in general. It most often was visible in practice when the monitored file was very frequently rotated (we had some report with every few seconds). Note that while we try hard to not lose any messages, input file rotation always has some loss potential. This is inevitable if the monitored file is being truncated. Also note that this bugfix affects imfile, only. It has nothing to do and no relation to rsyslog output files being rotated on HUP. closes: https://github.com/rsyslog/rsyslog/issues/4797 --- plugins/imfile/imfile.c | 49 +++++++++++++++++++++------- runtime/stream.c | 58 +++------------------------------ runtime/stream.h | 5 --- tests/imfile-logrotate-async.sh | 7 ++-- 4 files changed, 47 insertions(+), 72 deletions(-) diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c index 78f1cba0f9..3e28192844 100644 --- a/plugins/imfile/imfile.c +++ b/plugins/imfile/imfile.c @@ -84,6 +84,7 @@ MODULE_CNFNAME("imfile") /* defines */ #define FILE_ID_HASH_SIZE 20 /* max size of a file_id hash */ #define FILE_ID_SIZE 512 /* how many bytes are used for file-id? */ +#define FILE_DELETE_DELAY 5 /* how many seconds to wait before finally deleting a gone file */ /* Module static data */ DEF_IMOD_STATIC_DATA /* must be present, starts static data */ @@ -209,6 +210,7 @@ struct act_obj_s { ratelimit_t *ratelimiter; multi_submit_t multiSub; int is_symlink; + time_t time_to_delete; /* Helper variable to DELAY the actual file delete in act_obj_unlink */ }; struct fs_edge_s { fs_node_t *parent; /* node pointing to this edge */ @@ -774,6 +776,7 @@ act_obj_add(fs_edge_t *const edge, const char *const name, const int is_file, act->file_id_prev[0] = '\0'; act->is_symlink = is_symlink; act->ratelimiter = NULL; + act->time_to_delete = 0; if (source) { /* we are target of symlink */ CHKmalloc(act->source_name = strdup(source)); } else { @@ -827,26 +830,49 @@ detect_updates(fs_edge_t *const edge) for(act = edge->active ; act != NULL ; act = act->next) { DBGPRINTF("detect_updates checking active obj '%s'\n", act->name); - const int r = lstat(act->name, &fileInfo); + // lstat() has the disadvantage, that we get "deleted" when the name has changed + // but inode is still the same (like with logrotate) + int r = lstat(act->name, &fileInfo); if(r == -1) { /* object gone away? */ - DBGPRINTF("object gone away, unlinking: '%s'\n", act->name); - act_obj_unlink(act); - restart = 1; + /* now let's see if the file itself already exist (e.g. rotated away) */ + /* NOTE: this will NOT stall the file. The reason is that when a new file + * with the same name is detected, we will not run into this code. + TODO: check the full implications, there are for sure some! + e.g. file has been closed, so we will never have old inode (but + why was it closed then? --> check) + */ + r = fstat(act->ino, &fileInfo); + if(r == -1) { + time_t ttNow; + time(&ttNow); + if (act->time_to_delete == 0) { + act->time_to_delete = ttNow; + } + /* First time we run into this code, we need to give imfile a little time to process + * the old file in case a process is still writing into it until the FILE_DELETE_DELAY + * is reached OR the inode has changed (see elseif below). In most cases, the + * delay will never be reached and the file will be closed when the inode has changed. + */ + if (act->time_to_delete + FILE_DELETE_DELAY < ttNow) { + DBGPRINTF("detect_updates obj gone away, unlinking: '%s', ttDelete: %ld/%ld\n", + act->name, act->time_to_delete, ttNow); + act_obj_unlink(act); + restart = 1; + } else { + DBGPRINTF("detect_updates obj gone away, keep '%s' open: %ld/%ld/%lds!\n", + act->name, act->time_to_delete, ttNow, ttNow - act->time_to_delete); + pollFile(act); + } + } break; } else if(fileInfo.st_ino != act->ino) { DBGPRINTF("file '%s' inode changed from %llu to %llu, unlinking from " "internal lists\n", act->name, (long long unsigned) act->ino, (long long unsigned) fileInfo.st_ino); - if(act->pStrm != NULL) { - /* we do no need to re-set later, as act_obj_unlink - * will destroy the strm obj */ - strmSet_checkRotation(act->pStrm, STRM_ROTATION_DO_NOT_CHECK); - } act_obj_unlink(act); restart = 1; break; } - } if (restart) { @@ -1103,7 +1129,8 @@ chk_active(const act_obj_t *act, const act_obj_t *const deleted) static void ATTR_NONNULL() act_obj_unlink(act_obj_t *act) { - DBGPRINTF("act_obj_unlink %p: %s, pStrm %p\n", act, act->name, act->pStrm); + DBGPRINTF("act_obj_unlink %p: %s, pStrm %p, ttDelete: %ld\n", + act, act->name, act->pStrm, act->time_to_delete); if(act->prev == NULL) { act->edge->active = act->next; } else { diff --git a/runtime/stream.c b/runtime/stream.c index 9cadcc6bab..e471a110f2 100644 --- a/runtime/stream.c +++ b/runtime/stream.c @@ -587,47 +587,6 @@ strmNextFile(strm_t *pThis) } -/* handle the eof case for monitored files. - * If we are monitoring a file, someone may have rotated it. In this case, we - * also need to close it and reopen it under the same name. - * rgerhards, 2008-02-13 - * The previous code also did a check for file truncation, in which case the - * file was considered rewritten. However, this potential border case turned - * out to be a big trouble spot on busy systems. It caused massive message - * duplication (I guess stat() can return a too-low number under some - * circumstances). So starting as of now, we only check the inode number and - * a file change is detected only if the inode changes. -- rgerhards, 2011-01-10 - */ -static rsRetVal ATTR_NONNULL() -strmHandleEOFMonitor(strm_t *const pThis) -{ - DEFiRet; - struct stat statName; - - ISOBJ_TYPE_assert(pThis, strm); - if(stat((char*) pThis->pszCurrFName, &statName) == -1) - ABORT_FINALIZE(RS_RET_IO_ERROR); - DBGPRINTF("strmHandleEOFMonitor: stream checking for file change on '%s', inode %u/%u size %llu/%llu\n", - pThis->pszCurrFName, (unsigned) pThis->inode, (unsigned) statName.st_ino, - (long long unsigned) pThis->iCurrOffs, (long long unsigned) statName.st_size); - - /* Inode unchanged but file size on disk is less than current offset - * means file was truncated, we also reopen if 'reopenOnTruncate' is on - */ - if (pThis->inode != statName.st_ino - || (pThis->bReopenOnTruncate && statName.st_size < pThis->iCurrOffs)) { - DBGPRINTF("we had a file change on '%s'\n", pThis->pszCurrFName); - CHKiRet(strmCloseFile(pThis)); - CHKiRet(strmOpenFile(pThis)); - } else { - ABORT_FINALIZE(RS_RET_EOF); - } - -finalize_it: - RETiRet; -} - - /* handle the EOF case of a stream * The EOF case is somewhat complicated, as the proper action depends on the * mode the stream is in. If there are multiple files (circular logs, most @@ -655,11 +614,8 @@ strmHandleEOF(strm_t *const pThis) case STREAMTYPE_FILE_MONITOR: DBGOPRINT((obj_t*) pThis, "file '%s' (%d) EOF, rotationCheck %d\n", pThis->pszCurrFName, pThis->fd, pThis->rotationCheck); - if(pThis->rotationCheck == STRM_ROTATION_DO_CHECK) { - CHKiRet(strmHandleEOFMonitor(pThis)); - } else { - ABORT_FINALIZE(RS_RET_EOF); - } +DBGPRINTF("RGER: EOF!\n"); + ABORT_FINALIZE(RS_RET_EOF); break; } @@ -776,6 +732,7 @@ strmReadBuf(strm_t *pThis, int *padBytes) } iLenRead = read(pThis->fd, pThis->pIOBuf, toRead); DBGOPRINT((obj_t*) pThis, "file %d read %ld bytes\n", pThis->fd, iLenRead); + DBGOPRINT((obj_t*) pThis, "file %d read %*s\n", pThis->fd, (unsigned) iLenRead, (char*) pThis->pIOBuf); /* end crypto */ if(iLenRead == 0) { CHKiRet(strmHandleEOF(pThis)); @@ -1025,6 +982,7 @@ strmReadLine(strm_t *const pThis, cstr_t **ppCStr, uint8_t mode, sbool bEscapeLF } pThis->strtOffs = pThis->iCurrOffs; /* we are at begin of next line */ } else { +DBGPRINTF("RGER: strmReadLine iRet %d\n", iRet); if(*ppCStr != NULL) { if(cstrLen(*ppCStr) > 0) { /* we may have an empty string in an unsuccesfull poll or after restart! */ @@ -2161,14 +2119,6 @@ strmSetReadTimeout(strm_t *const __restrict__ pThis, const int val) pThis->readTimeout = val; } -void ATTR_NONNULL() -strmSet_checkRotation(strm_t *const pThis, const int val) { - ISOBJ_TYPE_assert(pThis, strm); - assert(val == STRM_ROTATION_DO_CHECK || val == STRM_ROTATION_DO_NOT_CHECK); - pThis->rotationCheck = val; -} - - static rsRetVal ATTR_NONNULL() strmSetbDeleteOnClose(strm_t *const pThis, const int val) { diff --git a/runtime/stream.h b/runtime/stream.h index 4af3d3345b..38db1aba0c 100644 --- a/runtime/stream.h +++ b/runtime/stream.h @@ -97,10 +97,6 @@ typedef enum { STRM_COMPRESS_ZSTD = 1 } strm_compressionDriver_t; -/* settings for stream rotation (applies not to all processing modes!) */ -#define STRM_ROTATION_DO_CHECK 0 -#define STRM_ROTATION_DO_NOT_CHECK 1 - #define STREAM_ASYNC_NUMBUFS 2 /* must be a power of 2 -- TODO: make configurable */ /* The strm_t data structure */ struct strm_s { @@ -254,6 +250,5 @@ void strmSetReadTimeout(strm_t *const __restrict__ pThis, const int val); const uchar * ATTR_NONNULL() strmGetPrevLineSegment(strm_t *const pThis); const uchar * ATTR_NONNULL() strmGetPrevMsgSegment(strm_t *const pThis); int ATTR_NONNULL() strmGetPrevWasNL(const strm_t *const pThis); -void ATTR_NONNULL() strmSet_checkRotation(strm_t *const pThis, const int val); #endif /* #ifndef STREAM_H_INCLUDED */ diff --git a/tests/imfile-logrotate-async.sh b/tests/imfile-logrotate-async.sh index 1210b201c5..c15a69d24b 100755 --- a/tests/imfile-logrotate-async.sh +++ b/tests/imfile-logrotate-async.sh @@ -26,8 +26,8 @@ generate_conf add_conf ' $WorkDirectory '$RSYSLOG_DYNNAME'.spool -global( debug.whitelist="off" - debug.files=["rainerscript.c", "ratelimit.c", "ruleset.c", "main Q", "msg.c", "../action.c"] +global( debug.whitelist="on" + debug.files=["imfile.c", "stream.c"] ) module(load="../plugins/imfile/.libs/imfile" mode="inotify" PollingInterval="2") @@ -72,6 +72,9 @@ ls -li $RSYSLOG_DYNNAME.input* #wait_file_lines $RSYSLOG_OUT_LOG $msgcount $RETRIES wait_file_lines +touch $RSYSLOG_DYNNAME.input.log +./msleep 1000 + shutdown_when_empty wait_shutdown seq_check