379 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			379 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
| //  Copyright (c) 2014 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package search
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"reflect"
 | |
| 	"sort"
 | |
| 
 | |
| 	"github.com/blevesearch/bleve/v2/size"
 | |
| 	index "github.com/blevesearch/bleve_index_api"
 | |
| )
 | |
| 
 | |
| var reflectStaticSizeDocumentMatch int
 | |
| var reflectStaticSizeSearchContext int
 | |
| var reflectStaticSizeLocation int
 | |
| 
 | |
| func init() {
 | |
| 	var dm DocumentMatch
 | |
| 	reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
 | |
| 	var sc SearchContext
 | |
| 	reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
 | |
| 	var l Location
 | |
| 	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
 | |
| }
 | |
| 
 | |
| type ArrayPositions []uint64
 | |
| 
 | |
| func (ap ArrayPositions) Equals(other ArrayPositions) bool {
 | |
| 	if len(ap) != len(other) {
 | |
| 		return false
 | |
| 	}
 | |
| 	for i := range ap {
 | |
| 		if ap[i] != other[i] {
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func (ap ArrayPositions) Compare(other ArrayPositions) int {
 | |
| 	for i, p := range ap {
 | |
| 		if i >= len(other) {
 | |
| 			return 1
 | |
| 		}
 | |
| 		if p < other[i] {
 | |
| 			return -1
 | |
| 		}
 | |
| 		if p > other[i] {
 | |
| 			return 1
 | |
| 		}
 | |
| 	}
 | |
| 	if len(ap) < len(other) {
 | |
| 		return -1
 | |
| 	}
 | |
| 	return 0
 | |
| }
 | |
| 
 | |
| type Location struct {
 | |
| 	// Pos is the position of the term within the field, starting at 1
 | |
| 	Pos uint64 `json:"pos"`
 | |
| 
 | |
| 	// Start and End are the byte offsets of the term in the field
 | |
| 	Start uint64 `json:"start"`
 | |
| 	End   uint64 `json:"end"`
 | |
| 
 | |
| 	// ArrayPositions contains the positions of the term within any elements.
 | |
| 	ArrayPositions ArrayPositions `json:"array_positions"`
 | |
| }
 | |
| 
 | |
| func (l *Location) Size() int {
 | |
| 	return reflectStaticSizeLocation + size.SizeOfPtr +
 | |
| 		len(l.ArrayPositions)*size.SizeOfUint64
 | |
| }
 | |
| 
 | |
| type Locations []*Location
 | |
| 
 | |
| func (p Locations) Len() int      { return len(p) }
 | |
| func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
 | |
| 
 | |
| func (p Locations) Less(i, j int) bool {
 | |
| 	c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
 | |
| 	if c < 0 {
 | |
| 		return true
 | |
| 	}
 | |
| 	if c > 0 {
 | |
| 		return false
 | |
| 	}
 | |
| 	return p[i].Pos < p[j].Pos
 | |
| }
 | |
| 
 | |
| func (p Locations) Dedupe() Locations { // destructive!
 | |
| 	if len(p) <= 1 {
 | |
| 		return p
 | |
| 	}
 | |
| 
 | |
| 	sort.Sort(p)
 | |
| 
 | |
| 	slow := 0
 | |
| 
 | |
| 	for _, pfast := range p {
 | |
| 		pslow := p[slow]
 | |
| 		if pslow.Pos == pfast.Pos &&
 | |
| 			pslow.Start == pfast.Start &&
 | |
| 			pslow.End == pfast.End &&
 | |
| 			pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
 | |
| 			continue // duplicate, so only move fast ahead
 | |
| 		}
 | |
| 
 | |
| 		slow++
 | |
| 
 | |
| 		p[slow] = pfast
 | |
| 	}
 | |
| 
 | |
| 	return p[:slow+1]
 | |
| }
 | |
| 
 | |
| type TermLocationMap map[string]Locations
 | |
| 
 | |
| func (t TermLocationMap) AddLocation(term string, location *Location) {
 | |
| 	t[term] = append(t[term], location)
 | |
| }
 | |
| 
 | |
| type FieldTermLocationMap map[string]TermLocationMap
 | |
| 
 | |
| type FieldTermLocation struct {
 | |
| 	Field    string
 | |
| 	Term     string
 | |
| 	Location Location
 | |
| }
 | |
| 
 | |
| type FieldFragmentMap map[string][]string
 | |
| 
 | |
| type DocumentMatch struct {
 | |
| 	Index           string                `json:"index,omitempty"`
 | |
| 	ID              string                `json:"id"`
 | |
| 	IndexInternalID index.IndexInternalID `json:"-"`
 | |
| 	Score           float64               `json:"score"`
 | |
| 	Expl            *Explanation          `json:"explanation,omitempty"`
 | |
| 	Locations       FieldTermLocationMap  `json:"locations,omitempty"`
 | |
| 	Fragments       FieldFragmentMap      `json:"fragments,omitempty"`
 | |
| 	Sort            []string              `json:"sort,omitempty"`
 | |
| 
 | |
| 	// Fields contains the values for document fields listed in
 | |
| 	// SearchRequest.Fields. Text fields are returned as strings, numeric
 | |
| 	// fields as float64s and date fields as time.RFC3339 formatted strings.
 | |
| 	Fields map[string]interface{} `json:"fields,omitempty"`
 | |
| 
 | |
| 	// used to maintain natural index order
 | |
| 	HitNumber uint64 `json:"-"`
 | |
| 
 | |
| 	// used to temporarily hold field term location information during
 | |
| 	// search processing in an efficient, recycle-friendly manner, to
 | |
| 	// be later incorporated into the Locations map when search
 | |
| 	// results are completed
 | |
| 	FieldTermLocations []FieldTermLocation `json:"-"`
 | |
| }
 | |
| 
 | |
| func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
 | |
| 	if dm.Fields == nil {
 | |
| 		dm.Fields = make(map[string]interface{})
 | |
| 	}
 | |
| 	existingVal, ok := dm.Fields[name]
 | |
| 	if !ok {
 | |
| 		dm.Fields[name] = value
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	valSlice, ok := existingVal.([]interface{})
 | |
| 	if ok {
 | |
| 		// already a slice, append to it
 | |
| 		valSlice = append(valSlice, value)
 | |
| 	} else {
 | |
| 		// create a slice
 | |
| 		valSlice = []interface{}{existingVal, value}
 | |
| 	}
 | |
| 	dm.Fields[name] = valSlice
 | |
| }
 | |
| 
 | |
| // Reset allows an already allocated DocumentMatch to be reused
 | |
| func (dm *DocumentMatch) Reset() *DocumentMatch {
 | |
| 	// remember the []byte used for the IndexInternalID
 | |
| 	indexInternalID := dm.IndexInternalID
 | |
| 	// remember the []interface{} used for sort
 | |
| 	sort := dm.Sort
 | |
| 	// remember the FieldTermLocations backing array
 | |
| 	ftls := dm.FieldTermLocations
 | |
| 	for i := range ftls { // recycle the ArrayPositions of each location
 | |
| 		ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
 | |
| 	}
 | |
| 	// idiom to copy over from empty DocumentMatch (0 allocations)
 | |
| 	*dm = DocumentMatch{}
 | |
| 	// reuse the []byte already allocated (and reset len to 0)
 | |
| 	dm.IndexInternalID = indexInternalID[:0]
 | |
| 	// reuse the []interface{} already allocated (and reset len to 0)
 | |
| 	dm.Sort = sort[:0]
 | |
| 	// reuse the FieldTermLocations already allocated (and reset len to 0)
 | |
| 	dm.FieldTermLocations = ftls[:0]
 | |
| 	return dm
 | |
| }
 | |
| 
 | |
| func (dm *DocumentMatch) Size() int {
 | |
| 	sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
 | |
| 		len(dm.Index) +
 | |
| 		len(dm.ID) +
 | |
| 		len(dm.IndexInternalID)
 | |
| 
 | |
| 	if dm.Expl != nil {
 | |
| 		sizeInBytes += dm.Expl.Size()
 | |
| 	}
 | |
| 
 | |
| 	for k, v := range dm.Locations {
 | |
| 		sizeInBytes += size.SizeOfString + len(k)
 | |
| 		for k1, v1 := range v {
 | |
| 			sizeInBytes += size.SizeOfString + len(k1) +
 | |
| 				size.SizeOfSlice
 | |
| 			for _, entry := range v1 {
 | |
| 				sizeInBytes += entry.Size()
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for k, v := range dm.Fragments {
 | |
| 		sizeInBytes += size.SizeOfString + len(k) +
 | |
| 			size.SizeOfSlice
 | |
| 
 | |
| 		for _, entry := range v {
 | |
| 			sizeInBytes += size.SizeOfString + len(entry)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for _, entry := range dm.Sort {
 | |
| 		sizeInBytes += size.SizeOfString + len(entry)
 | |
| 	}
 | |
| 
 | |
| 	for k, _ := range dm.Fields {
 | |
| 		sizeInBytes += size.SizeOfString + len(k) +
 | |
| 			size.SizeOfPtr
 | |
| 	}
 | |
| 
 | |
| 	return sizeInBytes
 | |
| }
 | |
| 
 | |
| // Complete performs final preparation & transformation of the
 | |
| // DocumentMatch at the end of search processing, also allowing the
 | |
| // caller to provide an optional preallocated locations slice
 | |
| func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
 | |
| 	// transform the FieldTermLocations slice into the Locations map
 | |
| 	nlocs := len(dm.FieldTermLocations)
 | |
| 	if nlocs > 0 {
 | |
| 		if cap(prealloc) < nlocs {
 | |
| 			prealloc = make([]Location, nlocs)
 | |
| 		}
 | |
| 		prealloc = prealloc[:nlocs]
 | |
| 
 | |
| 		var lastField string
 | |
| 		var tlm TermLocationMap
 | |
| 		var needsDedupe bool
 | |
| 
 | |
| 		for i, ftl := range dm.FieldTermLocations {
 | |
| 			if lastField != ftl.Field {
 | |
| 				lastField = ftl.Field
 | |
| 
 | |
| 				if dm.Locations == nil {
 | |
| 					dm.Locations = make(FieldTermLocationMap)
 | |
| 				}
 | |
| 
 | |
| 				tlm = dm.Locations[ftl.Field]
 | |
| 				if tlm == nil {
 | |
| 					tlm = make(TermLocationMap)
 | |
| 					dm.Locations[ftl.Field] = tlm
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			loc := &prealloc[i]
 | |
| 			*loc = ftl.Location
 | |
| 
 | |
| 			if len(loc.ArrayPositions) > 0 { // copy
 | |
| 				loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
 | |
| 			}
 | |
| 
 | |
| 			locs := tlm[ftl.Term]
 | |
| 
 | |
| 			// if the loc is before or at the last location, then there
 | |
| 			// might be duplicates that need to be deduplicated
 | |
| 			if !needsDedupe && len(locs) > 0 {
 | |
| 				last := locs[len(locs)-1]
 | |
| 				cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
 | |
| 				if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
 | |
| 					needsDedupe = true
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			tlm[ftl.Term] = append(locs, loc)
 | |
| 
 | |
| 			dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
 | |
| 				Location: Location{
 | |
| 					ArrayPositions: ftl.Location.ArrayPositions[:0],
 | |
| 				},
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if needsDedupe {
 | |
| 			for _, tlm := range dm.Locations {
 | |
| 				for term, locs := range tlm {
 | |
| 					tlm[term] = locs.Dedupe()
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
 | |
| 
 | |
| 	return prealloc
 | |
| }
 | |
| 
 | |
| func (dm *DocumentMatch) String() string {
 | |
| 	return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
 | |
| }
 | |
| 
 | |
| type DocumentMatchCollection []*DocumentMatch
 | |
| 
 | |
| func (c DocumentMatchCollection) Len() int           { return len(c) }
 | |
| func (c DocumentMatchCollection) Swap(i, j int)      { c[i], c[j] = c[j], c[i] }
 | |
| func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
 | |
| 
 | |
| type Searcher interface {
 | |
| 	Next(ctx *SearchContext) (*DocumentMatch, error)
 | |
| 	Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
 | |
| 	Close() error
 | |
| 	Weight() float64
 | |
| 	SetQueryNorm(float64)
 | |
| 	Count() uint64
 | |
| 	Min() int
 | |
| 	Size() int
 | |
| 
 | |
| 	DocumentMatchPoolSize() int
 | |
| }
 | |
| 
 | |
| type SearcherOptions struct {
 | |
| 	Explain            bool
 | |
| 	IncludeTermVectors bool
 | |
| 	Score              string
 | |
| }
 | |
| 
 | |
| // SearchContext represents the context around a single search
 | |
| type SearchContext struct {
 | |
| 	DocumentMatchPool *DocumentMatchPool
 | |
| 	Collector         Collector
 | |
| 	IndexReader       index.IndexReader
 | |
| }
 | |
| 
 | |
| func (sc *SearchContext) Size() int {
 | |
| 	sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
 | |
| 		reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
 | |
| 
 | |
| 	if sc.DocumentMatchPool != nil {
 | |
| 		for _, entry := range sc.DocumentMatchPool.avail {
 | |
| 			if entry != nil {
 | |
| 				sizeInBytes += entry.Size()
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return sizeInBytes
 | |
| }
 |