From c4b9f67b6dc7347fb99c233f27e824c97344bfbd Mon Sep 17 00:00:00 2001 From: Thejas-bhat <thejas.orkombu@couchbase.com> Date: Tue, 3 Dec 2024 16:09:10 +0530 Subject: [PATCH] code comments --- index/scorch/introducer.go | 1 + index/scorch/merge.go | 8 +++++++- index/scorch/persister.go | 15 +++++++-------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go index 0d472ac8b..d7864ddb8 100644 --- a/index/scorch/introducer.go +++ b/index/scorch/introducer.go @@ -429,6 +429,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { } skipped := true + // make the newly merged segments part of the newSnapshot being constructed for i, newMergedSegment := range nextMerge.new { // checking if this newly merged segment is worth keeping based on // obsoleted doc count since the merge intro started diff --git a/index/scorch/merge.go b/index/scorch/merge.go index be6de3863..1e318237d 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -443,6 +443,9 @@ type mergeTaskIntroStatus struct { skipped bool } +// this is important when it comes to introducing multiple merged segments in a +// single introducer channel push. That way there is a check to ensure that the +// file count doesn't explode during the index's lifetime. type mergedSegmentHistory struct { workerID uint64 oldNewDocIDs []uint64 @@ -501,6 +504,9 @@ func (s *Scorch) mergeSegmentBasesParallel(snapshot *IndexSnapshot, flushableObj newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) filename := zapFileName(newSegmentID) path := s.path + string(os.PathSeparator) + filename + + // the newly merged segment is already flushed out to disk, just needs + // to be opened using mmap. newDocNums, _, err := s.segPlugin.Merge(segsBatch, dropsBatch, path, s.closeCh, s) if err != nil { @@ -527,7 +533,7 @@ func (s *Scorch) mergeSegmentBasesParallel(snapshot *IndexSnapshot, flushableObj // close the new merged segments _ = closeNewMergedSegments(newMergedSegments) - // tbd: need a better way to handle error + // tbd: need a better way to consolidate errors return nil, nil, errs[0] } diff --git a/index/scorch/persister.go b/index/scorch/persister.go index c2d6754af..38eb468a4 100644 --- a/index/scorch/persister.go +++ b/index/scorch/persister.go @@ -369,11 +369,13 @@ type flushable struct { totDocs uint64 } -var DefaultNumPersisterWorkers = 1 +// number workers which parallely perform an in-memory merge of the segments followed +// by a flush operation. +var DefaultNumPersisterWorkers = 4 // maximum size of data that a single worker is allowed to perform the in-memory // merge operation. -var DefaultMaxSizeInMemoryMerge = 0 +var DefaultMaxSizeInMemoryMerge = 200 * 1024 * 1024 func legacyFlushBehaviour() bool { // DefaultMaxSizeInMemoryMerge = 0 is a special value to preserve the leagcy @@ -417,6 +419,8 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) ( flushSet = append(flushSet, val) } else { + // constructs a flushSet where each flushable object contains a set of segments + // to be merged and flushed out to disk. for i, snapshot := range snapshot.segment { if totSize >= DefaultMaxSizeInMemoryMerge { if len(sbs) >= DefaultMinSegmentsForInMemoryMerge { @@ -480,12 +484,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) ( return false, nil } - // deploy the workers, have a wait group which waits for the flush set to complete - // each worker - // 1. merges the segments using mergeSegmentBases() - // wait for group to finish - // - // construct equiv snapshot and do a persistSnapshotDirect() + // drains out (after merging in memory) the segments in the flushSet parallely newSnapshot, newSegmentIDs, err := s.mergeSegmentBasesParallel(snapshot, flushSet) if err != nil { return false, err