Skip to content

Commit

Permalink
field cardinality temp save
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Dec 6, 2024
1 parent 3b1eb20 commit 5bbe43e
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 25 deletions.
14 changes: 10 additions & 4 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator

index int
docs *roaring.Bitmap
cardinality int
index int
docs *roaring.Bitmap

postings segment.PostingsList

Expand Down Expand Up @@ -146,6 +147,7 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,

results := make(chan *asynchSegmentResult)
var totalBytesRead uint64
var fieldCardinality int64
for _, s := range is.segment {
go func(s *SegmentSnapshot) {
dict, err := s.segment.Dictionary(field)
Expand All @@ -155,6 +157,8 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,
if dictStats, ok := dict.(segment.DiskStatsReporter); ok {
atomic.AddUint64(&totalBytesRead, dictStats.BytesRead())
}

atomic.AddInt64(&fieldCardinality, int64(dict.Cardinality()))

Check failure on line 161 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 161 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 161 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 161 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 161 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, ubuntu-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 161 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else {
Expand All @@ -166,9 +170,11 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,

var err error
rv := &IndexSnapshotFieldDict{
snapshot: is,
cursors: make([]*segmentDictCursor, 0, len(is.segment)),
snapshot: is,
cursors: make([]*segmentDictCursor, 0, len(is.segment)),
cardinality: int(fieldCardinality),
}

for count := 0; count < len(is.segment); count++ {
asr := <-results
if asr.err != nil && err == nil {
Expand Down
10 changes: 6 additions & 4 deletions index/scorch/snapshot_index_dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ type segmentDictCursor struct {
}

type IndexSnapshotFieldDict struct {
snapshot *IndexSnapshot
cursors []*segmentDictCursor
entry index.DictEntry
bytesRead uint64
cardinality int
bytesRead uint64

snapshot *IndexSnapshot
cursors []*segmentDictCursor
entry index.DictEntry
}

func (i *IndexSnapshotFieldDict) BytesRead() uint64 {
Expand Down
4 changes: 2 additions & 2 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,8 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
Total: 1,
Successful: 1,
},
Hits: knnHits,
totalDocCount: count,
Hits: knnHits,
docCount: count,
}, nil
}

Expand Down
13 changes: 10 additions & 3 deletions pre_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,23 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {

// -----------------------------------------------------------------------------
type bm25PreSearchResultProcessor struct {
docCount uint64 // bm25 specific stats
docCount uint64 // bm25 specific stats
fieldCardinality map[string]uint64
}

func newBM25PreSearchResultProcessor() *bm25PreSearchResultProcessor {
return &bm25PreSearchResultProcessor{}
return &bm25PreSearchResultProcessor{
fieldCardinality: make(map[string]uint64),
}
}

// TODO How will this work for queries other than term queries?
func (b *bm25PreSearchResultProcessor) add(sr *SearchResult, indexName string) {
b.docCount += (sr.totalDocCount)
b.docCount += (sr.docCount)

for field, cardinality := range sr.fieldCardinality {
b.fieldCardinality[field] += cardinality
}
}

func (b *bm25PreSearchResultProcessor) finalize(sr *SearchResult) {
Expand Down
5 changes: 3 additions & 2 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,10 @@ type SearchResult struct {
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`

// The following fields are applicable to BM25 preSearch
// todo add more fields beyond docCount
totalDocCount uint64
docCount uint64
fieldCardinality map[string]uint64 // search_field -> cardinality
}

func (sr *SearchResult) Size() int {
Expand Down
24 changes: 14 additions & 10 deletions search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,22 @@ func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, te

func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
count, ok := ctx.Value(search.BM25PreSearchDataKey).(uint64)
if !ok {
var err error
count, err = indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
var count uint64
if ctx != nil {
ctxCount, ok := ctx.Value(search.BM25PreSearchDataKey).(uint64)
if !ok {
var err error
ctxCount, err = indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
} else {
fmt.Printf("fetched from ctx \n")
}
} else {
fmt.Printf("fetched from ctx \n")
}
count = ctxCount

}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
Expand Down

0 comments on commit 5bbe43e

Please sign in to comment.