diff --git a/src/FwdState.cc b/src/FwdState.cc index 94d96104370..d43a1ced411 100644 --- a/src/FwdState.cc +++ b/src/FwdState.cc @@ -128,7 +128,8 @@ FwdState::FwdState(const Comm::ConnectionPointer &client, StoreEntry * e, HttpRe waitingForDispatched(false), destinations(new ResolvedPeers()), pconnRace(raceImpossible), - storedWholeReply_(nullptr) + storedWholeReply_(nullptr), + peeringTimer(r) { debugs(17, 2, "Forwarding client request " << client << ", url=" << e->url()); HTTPMSGLOCK(request); @@ -186,6 +187,8 @@ FwdState::stopAndDestroy(const char *reason) cancelStep(reason); + peeringTimer.stop(); + PeerSelectionInitiator::subscribed = false; // may already be false self = nullptr; // we hope refcounting destroys us soon; may already be nil /* do not place any code here as this object may be gone by now */ @@ -259,8 +262,6 @@ FwdState::completed() flags.forward_completed = true; - request->hier.stopPeerClock(false); - if (EBIT_TEST(entry->flags, ENTRY_ABORTED)) { debugs(17, 3, "entry aborted"); return ; @@ -780,8 +781,6 @@ FwdState::retryOrBail() // TODO: should we call completed() here and move doneWithRetries there? doneWithRetries(); - request->hier.stopPeerClock(false); - if (self != nullptr && !err && shutting_down && entry->isEmpty()) { const auto anErr = new ErrorState(ERR_SHUTTING_DOWN, Http::scServiceUnavailable, request, al); errorAppendEntry(entry, anErr); @@ -1128,8 +1127,6 @@ FwdState::connectStart() err = nullptr; request->clearError(); - request->hier.startPeerClock(); - const auto callback = asyncCallback(17, 5, FwdState::noteConnection, this); HttpRequest::Pointer cause = request; const auto cs = new HappyConnOpener(destinations, callback, cause, start_t, n_tries, al); @@ -1572,3 +1569,25 @@ ResetMarkingsToServer(HttpRequest * request, Comm::Connection &conn) Ip::Qos::setSockNfmark(&conn, conn.nfmark); } +/* PeeringActivityTimer */ + +// The simple methods below are not inlined to avoid exposing some of the +// current FwdState.h users to a full HttpRequest definition they do not need. + +PeeringActivityTimer::PeeringActivityTimer(const HttpRequestPointer &r): request(r) +{ + Assure(request); + timer().resume(); +} + +PeeringActivityTimer::~PeeringActivityTimer() +{ + stop(); +} + +Stopwatch & +PeeringActivityTimer::timer() +{ + return request->hier.totalPeeringTime; +} + diff --git a/src/FwdState.h b/src/FwdState.h index c435609e3bc..316243c9030 100644 --- a/src/FwdState.h +++ b/src/FwdState.h @@ -49,6 +49,40 @@ void ResetMarkingsToServer(HttpRequest *, Comm::Connection &); class HelperReply; +/// Eliminates excessive Stopwatch pause() calls in a task with multiple code +/// locations that pause a stopwatch. Ideally, there would be just one such +/// location (e.g., a task class destructor), but current code idiosyncrasies +/// necessitate this state. For simplicity sake, this class currently manages a +/// Stopwatch at a hard-coded location: HttpRequest::hier.totalPeeringTime. +class PeeringActivityTimer +{ +public: + PeeringActivityTimer(const HttpRequestPointer &); ///< resumes timer + ~PeeringActivityTimer(); ///< \copydoc stop() + + /// pauses timer if stop() has not been called + void stop() + { + if (!stopped) { + timer().pause(); + stopped = true; + } + } + +private: + /// managed Stopwatch object within HierarchyLogEntry + Stopwatch &timer(); + + /// the owner of managed HierarchyLogEntry + HttpRequestPointer request; + + // We cannot rely on timer().ran(): This class eliminates excessive calls + // within a single task (e.g., an AsyncJob) while the timer (and its ran() + // state) may be shared/affected by multiple concurrent tasks. + /// Whether the task is done participating in the managed activity. + bool stopped = false; +}; + class FwdState: public RefCountable, public PeerSelectionInitiator { CBDATA_CHILD(FwdState); @@ -212,6 +246,9 @@ class FwdState: public RefCountable, public PeerSelectionInitiator /// Whether the entire reply (including any body) was written to Store. /// The string literal value is only used for debugging. const char *storedWholeReply_; + + /// Measures time spent on selecting and communicating with peers. + PeeringActivityTimer peeringTimer; }; class acl_tos; diff --git a/src/HierarchyLogEntry.h b/src/HierarchyLogEntry.h index 5f5d8b13f41..053cf9b134a 100644 --- a/src/HierarchyLogEntry.h +++ b/src/HierarchyLogEntry.h @@ -9,6 +9,7 @@ #ifndef SQUID_SRC_HIERARCHYLOGENTRY_H #define SQUID_SRC_HIERARCHYLOGENTRY_H +#include "base/Stopwatch.h" #include "comm/Connection.h" #include "enums.h" #include "hier_code.h" @@ -37,22 +38,10 @@ class HierarchyLogEntry /// Call this after each peer socket write(2), including failed ones. void notePeerWrite(); - /// Start recording total time spent communicating with peers - void startPeerClock(); - /** - * Record total time spent communicating with peers - * \param force whether to overwrite old recorded value if any - */ - void stopPeerClock(const bool force); - /// Estimates response generation and sending delay at the last peer. /// \returns whether the estimate (stored in `responseTime`) is available. bool peerResponseTime(struct timeval &responseTime); - /// Estimates the total time spent communicating with peers. - /// \returns whether the estimate (stored in `responseTime`) is available. - bool totalResponseTime(struct timeval &responseTime); - public: hier_code code; char host[SQUIDHOSTNAMELEN]; @@ -70,13 +59,14 @@ class HierarchyLogEntry Comm::ConnectionPointer tcpServer; ///< TCP/IP level details of the last peer/server connection int64_t bodyBytesRead; ///< number of body bytes received from the next hop or -1 + /// cumulative time spent (so far) communicating with all peers (see %hier.stopPeerClock(true); if (requestBodySource != nullptr) stopConsumingFrom(requestBodySource); diff --git a/src/format/Format.cc b/src/format/Format.cc index cdce6a75b42..ab25dd03e95 100644 --- a/src/format/Format.cc +++ b/src/format/Format.cc @@ -635,9 +635,16 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS break; case LFT_TOTAL_SERVER_SIDE_RESPONSE_TIME: { - struct timeval totalResponseTime; - if (al->hier.totalResponseTime(totalResponseTime)) { - outtv = totalResponseTime; + // XXX: al->hier.totalPeeringTime is not updated until prepareLogWithRequestDetails(). + // TODO: Avoid the need for updates by keeping totalPeeringTime (or even ALE::hier) in one place. + const auto &timer = (!al->hier.totalPeeringTime.ran() && al->request) ? + al->request->hier.totalPeeringTime : al->hier.totalPeeringTime; + if (timer.ran()) { + using namespace std::chrono_literals; + const auto duration = timer.total(); + outtv.tv_sec = std::chrono::duration_cast(duration).count(); + const auto totalUsec = std::chrono::duration_cast(duration); + outtv.tv_usec = (totalUsec % std::chrono::microseconds(1s)).count(); doMsec = 1; } } diff --git a/src/log/access_log.cc b/src/log/access_log.cc index ff01005f465..1a53bf59412 100644 --- a/src/log/access_log.cc +++ b/src/log/access_log.cc @@ -187,12 +187,6 @@ HierarchyLogEntry::HierarchyLogEntry() : store_complete_stop.tv_usec =0; clearPeerNotes(); - - totalResponseTime_.tv_sec = -1; - totalResponseTime_.tv_usec = 0; - - firstConnStart_.tv_sec = 0; - firstConnStart_.tv_usec = 0; } void @@ -241,26 +235,6 @@ HierarchyLogEntry::notePeerWrite() peer_last_write_ = current_time; } -void -HierarchyLogEntry::startPeerClock() -{ - if (!firstConnStart_.tv_sec) - firstConnStart_ = current_time; -} - -void -HierarchyLogEntry::stopPeerClock(const bool force) -{ - debugs(46, 5, "First connection started: " << firstConnStart_ << - ", current total response time value: " << (totalResponseTime_.tv_sec * 1000 + totalResponseTime_.tv_usec/1000) << - (force ? ", force fixing" : "")); - if (!force && totalResponseTime_.tv_sec != -1) - return; - - if (firstConnStart_.tv_sec) - tvSub(totalResponseTime_, firstConnStart_, current_time); -} - bool HierarchyLogEntry::peerResponseTime(struct timeval &responseTime) { @@ -288,19 +262,6 @@ HierarchyLogEntry::peerResponseTime(struct timeval &responseTime) return true; } -bool -HierarchyLogEntry::totalResponseTime(struct timeval &responseTime) -{ - // This should not really happen, but there may be rare code - // paths that lead to FwdState discarded (or transaction logged) - // without (or before) a stopPeerClock() call. - if (firstConnStart_.tv_sec && totalResponseTime_.tv_sec == -1) - stopPeerClock(false); - - responseTime = totalResponseTime_; - return responseTime.tv_sec >= 0 && responseTime.tv_usec >= 0; -} - static void accessLogRegisterWithCacheManager(void) { diff --git a/src/tests/stub_access_log.cc b/src/tests/stub_access_log.cc index 93bfd75174f..8dbed03787f 100644 --- a/src/tests/stub_access_log.cc +++ b/src/tests/stub_access_log.cc @@ -17,7 +17,6 @@ HierarchyLogEntry::HierarchyLogEntry() STUB void HierarchyLogEntry::notePeerRead() STUB void HierarchyLogEntry::notePeerWrite() STUB bool HierarchyLogEntry::peerResponseTime(struct timeval &) STUB_RETVAL(false) -bool HierarchyLogEntry::totalResponseTime(struct timeval &) STUB_RETVAL(false) ping_data::ping_data() : n_sent(0), diff --git a/src/tunnel.cc b/src/tunnel.cc index 81cd72cbb76..993daab861b 100644 --- a/src/tunnel.cc +++ b/src/tunnel.cc @@ -211,6 +211,9 @@ class TunnelStateData: public PeerSelectionInitiator /// over the (encrypted, if needed) transport connection to that cache_peer JobWait peerWait; + /// Measures time spent on selecting and communicating with peers. + PeeringActivityTimer peeringTimer; + void copyRead(Connection &from, IOCB *completion); /// continue to set up connection to a peer, going async for SSL peers @@ -310,7 +313,7 @@ TunnelStateData::serverClosed() { server.noteClosure(); - request->hier.stopPeerClock(false); + peeringTimer.stop(); finishWritingAndDelete(client); } @@ -386,6 +389,16 @@ TunnelStateData::deleteThis() delete this; } +// TODO: Replace with a reusable API guaranteeing non-nil pointer forwarding. +/// safely extracts HttpRequest from a never-nil ClientHttpRequest pointer +static auto & +guaranteedRequest(const ClientHttpRequest * const cr) +{ + Assure(cr); + Assure(cr->request); + return *cr->request; +} + TunnelStateData::TunnelStateData(ClientHttpRequest *clientRequest) : startTime(squid_curtime), destinations(new ResolvedPeers()), @@ -393,7 +406,8 @@ TunnelStateData::TunnelStateData(ClientHttpRequest *clientRequest) : committedToServer(false), n_tries(0), banRetries(nullptr), - codeContext(CodeContext::Current()) + codeContext(CodeContext::Current()), + peeringTimer(&guaranteedRequest(clientRequest)) { debugs(26, 3, "TunnelStateData constructed this=" << this); client.readPendingFunc = &tunnelDelayedClientRead; @@ -477,8 +491,7 @@ TunnelStateData::retryOrBail(const char *context) /* bail */ - if (request) - request->hier.stopPeerClock(false); + peeringTimer.stop(); // TODO: Add sendSavedErrorOr(err_type type, Http::StatusCode, context). // Then, the remaining method code (below) should become the common part of @@ -1403,8 +1416,7 @@ TunnelStateData::sendError(ErrorState *finalError, const char *reason) { debugs(26, 3, "aborting transaction for " << reason); - if (request) - request->hier.stopPeerClock(false); + peeringTimer.stop(); cancelStep(reason); @@ -1438,15 +1450,12 @@ TunnelStateData::cancelStep(const char *reason) void TunnelStateData::startConnecting() { - if (request) - request->hier.startPeerClock(); - assert(!destinations->empty()); assert(!transporting()); delete savedError; // may still be nil savedError = nullptr; - request->hier.peer_reply_status = Http::scNone; // TODO: Move to startPeerClock()? + request->hier.peer_reply_status = Http::scNone; const auto callback = asyncCallback(17, 5, TunnelStateData::noteConnection, this); const auto cs = new HappyConnOpener(destinations, callback, request, startTime, n_tries, al);