From 94c40964df92cd19393a15d18c9704bec4020362 Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Tue, 31 Dec 2024 17:27:40 +0000 Subject: [PATCH] REQMOD stuck when adapted request (body) is not forwarded (#1966) Squid forwards request bodies using BodyPipe objects. A BodyPipe object has two associated agents: producer and consumer. Those agents are set independently, at different processing stages. If BodyPipe consumer is not set, the producer may get stuck waiting for BodyPipe buffer space. When producer creates a BodyPipe, it effectively relies on some code somewhere to register a consumer (or declare a registration failure), but automatically tracking that expectation fulfillment is impractical For REQMOD transactions involving adapted request bodies, including ICAP 204 transactions, Client::startRequestBodyFlow() sets body consumer. If that method is not called, there will be no consumer, and REQMOD may get stuck. Many `if` statements can block request forwarding altogether or block a being-forwarded request from reaching that method. For example, adapted_http_access and miss_access denials block request forwarding Without REQMOD, request processing can probably get stuck for similar lack-of-consumer reasons, but regular requests ought to be killed by various I/O or forwarding timeouts. There are no such timeouts for those REQMOD transactions that are only waiting for request body consumer to clear adapted BodyPipe space (e.g., after all ICAP 204 I/Os are over). Relying on timeouts is also very inefficient For a `mgr:mem` observer, stuck REQMOD transactions look like a ModXact memory leak. A `mgr:jobs` report shows ModXact jobs with RBS(1) status --- src/BodyPipe.cc | 24 ++++++++++++------------ src/errorpage.cc | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/BodyPipe.cc b/src/BodyPipe.cc index dfdf53ae1c9..3b5a8fbe4ba 100644 --- a/src/BodyPipe.cc +++ b/src/BodyPipe.cc @@ -266,18 +266,18 @@ BodyPipe::clearConsumer() void BodyPipe::expectNoConsumption() { - // We may be called multiple times because multiple jobs on the consumption - // chain may realize that there will be no more setConsumer() calls (e.g., - // consuming code and retrying code). It is both difficult and not really - // necessary for them to coordinate their expectNoConsumption() calls. - - // As a consequence, we may be called when we are auto-consuming already. - - if (!abortedConsumption && !exhausted()) { - // Before we abort, any regular consumption should be over and auto - // consumption must not be started. - Must(!theConsumer); - + // We and enableAutoConsumption() may be called multiple times because + // multiple jobs on the consumption chain may realize that there will be no + // more setConsumer() calls (e.g., consuming code and retrying code). It is + // both difficult and unnecessary for them to coordinate their calls. + + // As a consequence, we may be called when already auto-consuming, including + // cases where abortedConsumption is still false. We could try to harden + // this by also aborting consumption from enableAutoConsumption() when there + // is no consumer, but see errorAppendEntry() TODO for a better plan. + + debugs(91, 7, status()); + if (!abortedConsumption && !exhausted() && !theConsumer) { AsyncCall::Pointer call= asyncCall(91, 7, "BodyProducer::noteBodyConsumerAborted", BodyProducerDialer(theProducer, diff --git a/src/errorpage.cc b/src/errorpage.cc index de019667325..bbbfc7acde5 100644 --- a/src/errorpage.cc +++ b/src/errorpage.cc @@ -741,6 +741,29 @@ errorAppendEntry(StoreEntry * entry, ErrorState * err) assert (entry->isEmpty()); debugs(4, 4, "storing " << err << " in " << *entry); + if (const auto &request = err->request) { + if (const auto &bodyPipe = request->body_pipe) { + // We cannot expectNoConsumption() here (yet): This request may be a + // virgin request being consumed by adaptation that should continue + // even in error-handling cases. startAutoConsumptionIfNeeded() call + // triggered by enableAutoConsumption() below skips such requests. + // + // Today, we also cannot enableAutoConsumption() earlier because it + // could result in premature consumption in BodyPipe::postAppend() + // followed by an unwanted setConsumerIfNotLate() failure. + // + // TODO: Simplify BodyPipe auto-consumption by automatically + // enabling it when no new consumers are expected, removing the need + // for explicit enableAutoConsumption() calls like the one below. + // + // Code like clientReplyContext::sendClientOldEntry() might use + // another StoreEntry for this master transaction, but we want to + // consume this request body even in those hypothetical error cases + // to prevent stuck (client-Squid or REQMOD) transactions. + bodyPipe->enableAutoConsumption(); + } + } + if (entry->store_status != STORE_PENDING) { debugs(4, 2, "Skipping error page due to store_status: " << entry->store_status); /*