Skip to content

Commit

Permalink
logger.c: Retry file lock also after ENOLCK
Browse files Browse the repository at this point in the history
This means "No locks available".  We now handle it just like
EAGAIN, retrying after a random delay and hoping for recover.
Also adds diagnostic output of the lock delay (for both cases)
after this happened, helping in tracking down the problem.

The srand(NODE) is made unconditional and moved to to john.c.
Without it, all nodes are seeded with 1.
  • Loading branch information
magnumripper committed Feb 14, 2025
1 parent dd770ca commit 3b418ed
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
2 changes: 2 additions & 0 deletions src/john.c
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,8 @@ static void john_load(void)
if (mpi_p > 1)
john_set_mpi();
#endif
/* Without this, all nodes get the same PRNG sequence. */
srand(NODE);
}
#if HAVE_OPENCL
/*
Expand Down
34 changes: 28 additions & 6 deletions src/logger.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "cracker.h"
#include "signals.h"
#include "logger.h"
#include "timer.h"

static int cfg_beep;
static int cfg_log_passwords;
Expand Down Expand Up @@ -136,24 +137,36 @@ int log_lock(int fd, int cmd, int type, const char *name,
: cmd == F_UNLCK ? "F_UNLCK" : "");
#endif

uint64_t lock_start = 0;
int warned = 0;
int retries = 0;

lock.l_type = type;
while (fcntl(fd, cmd, &lock)) {
if (errno == EAGAIN) {
static int warned;
if (errno == EAGAIN || errno == ENOLCK) {
unsigned int warn_bit = (errno == EAGAIN ? 1 : 2);
struct timeval t;

if (cmd == F_SETLK)
return -1;

if (!warned++) {
log_event("Got EAGAIN despite F_SETLKW (only logged once per node) %s:%d %s", source_file, line, function);
fprintf(stderr, "Node %d: File locking apparently exhausted, check ulimits and any NFS server limits. This is recoverable but will harm performance (muting further of these messages from same node)\n", NODE);
srand(NODE);
if (!(warned & warn_bit)) {
if (!lock_start)
lock_start = john_get_nano();

log_event("- Got %s despite F_SETLKW trying to lock %s", errno == EAGAIN ? "EAGAIN" : "ENOLCK", name);
if (options.node_count)
fprintf(stderr, "%u: ", NODE);
fprintf(stderr, "File locking apparently exhausted (\"%s\" trying to lock %s). Check ulimits and any NFS server limits. Retrying...\n", strerror(errno), name);

warned |= warn_bit; /* Do not print again while retrying */
}

/* Sleep for a random time of max. ~260 ms */
t.tv_sec = 0; t.tv_usec = (rand() & 1023) << 8;
select(0, NULL, NULL, NULL, &t);

retries++;
continue;
} else if (errno != EINTR)
pexit("%s:%d %s() fcntl(%s, %s, %s)",
Expand All @@ -164,6 +177,15 @@ int log_lock(int fd, int cmd, int type, const char *name,
: cmd == F_UNLCK ? "F_UNLCK" : "");
}

if (retries) {
char *delay_str = human_prefix_small((john_get_nano() - lock_start) / 1E9);

log_event("+ Got a lock after %d retr%s, %ss", retries, retries > 1 ? "ies" : "y", delay_str);
if (options.node_count)
fprintf(stderr, "%u: ", NODE);
fprintf(stderr, "Got a lock after %d retr%s, %ss\n", retries, retries > 1 ? "ies" : "y", delay_str);
}

#ifdef LOCK_DEBUG
fprintf(stderr, "%u: %s(): Locked %s\n", NODE, function, name);
#endif
Expand Down

0 comments on commit 3b418ed

Please sign in to comment.