mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-03 21:08:25 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			130 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			130 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
//  Copyright (c) 2015 Couchbase, Inc.
 | 
						|
//
 | 
						|
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
// you may not use this file except in compliance with the License.
 | 
						|
// You may obtain a copy of the License at
 | 
						|
//
 | 
						|
// 		http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
//
 | 
						|
// Unless required by applicable law or agreed to in writing, software
 | 
						|
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
// See the License for the specific language governing permissions and
 | 
						|
// limitations under the License.
 | 
						|
 | 
						|
package upsidedown
 | 
						|
 | 
						|
import (
 | 
						|
	index "github.com/blevesearch/bleve_index_api"
 | 
						|
)
 | 
						|
 | 
						|
type IndexRow interface {
 | 
						|
	KeySize() int
 | 
						|
	KeyTo([]byte) (int, error)
 | 
						|
	Key() []byte
 | 
						|
 | 
						|
	ValueSize() int
 | 
						|
	ValueTo([]byte) (int, error)
 | 
						|
	Value() []byte
 | 
						|
}
 | 
						|
 | 
						|
type AnalysisResult struct {
 | 
						|
	DocID string
 | 
						|
	Rows  []IndexRow
 | 
						|
}
 | 
						|
 | 
						|
func (udc *UpsideDownCouch) Analyze(d index.Document) *AnalysisResult {
 | 
						|
	return udc.analyze(d)
 | 
						|
}
 | 
						|
 | 
						|
func (udc *UpsideDownCouch) analyze(d index.Document) *AnalysisResult {
 | 
						|
	rv := &AnalysisResult{
 | 
						|
		DocID: d.ID(),
 | 
						|
		Rows:  make([]IndexRow, 0, 100),
 | 
						|
	}
 | 
						|
 | 
						|
	docIDBytes := []byte(d.ID())
 | 
						|
 | 
						|
	// track our back index entries
 | 
						|
	backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
 | 
						|
 | 
						|
	// information we collate as we merge fields with same name
 | 
						|
	fieldTermFreqs := make(map[uint16]index.TokenFrequencies)
 | 
						|
	fieldLengths := make(map[uint16]int)
 | 
						|
	fieldIncludeTermVectors := make(map[uint16]bool)
 | 
						|
	fieldNames := make(map[uint16]string)
 | 
						|
 | 
						|
	analyzeField := func(field index.Field, storable bool) {
 | 
						|
		fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
 | 
						|
		if newFieldRow != nil {
 | 
						|
			rv.Rows = append(rv.Rows, newFieldRow)
 | 
						|
		}
 | 
						|
		fieldNames[fieldIndex] = field.Name()
 | 
						|
 | 
						|
		if field.Options().IsIndexed() {
 | 
						|
			field.Analyze()
 | 
						|
			fieldLength := field.AnalyzedLength()
 | 
						|
			tokenFreqs := field.AnalyzedTokenFrequencies()
 | 
						|
			existingFreqs := fieldTermFreqs[fieldIndex]
 | 
						|
			if existingFreqs == nil {
 | 
						|
				fieldTermFreqs[fieldIndex] = tokenFreqs
 | 
						|
			} else {
 | 
						|
				existingFreqs.MergeAll(field.Name(), tokenFreqs)
 | 
						|
				fieldTermFreqs[fieldIndex] = existingFreqs
 | 
						|
			}
 | 
						|
			fieldLengths[fieldIndex] += fieldLength
 | 
						|
			fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
 | 
						|
		}
 | 
						|
 | 
						|
		if storable && field.Options().IsStored() {
 | 
						|
			rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// walk all the fields, record stored fields now
 | 
						|
	// place information about indexed fields into map
 | 
						|
	// this collates information across fields with
 | 
						|
	// same names (arrays)
 | 
						|
	d.VisitFields(func(field index.Field) {
 | 
						|
		analyzeField(field, true)
 | 
						|
	})
 | 
						|
 | 
						|
	if d.HasComposite() {
 | 
						|
		for fieldIndex, tokenFreqs := range fieldTermFreqs {
 | 
						|
			// see if any of the composite fields need this
 | 
						|
			d.VisitComposite(func(field index.CompositeField) {
 | 
						|
				field.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
 | 
						|
			})
 | 
						|
		}
 | 
						|
 | 
						|
		d.VisitComposite(func(field index.CompositeField) {
 | 
						|
			analyzeField(field, false)
 | 
						|
		})
 | 
						|
	}
 | 
						|
 | 
						|
	rowsCapNeeded := len(rv.Rows) + 1
 | 
						|
	for _, tokenFreqs := range fieldTermFreqs {
 | 
						|
		rowsCapNeeded += len(tokenFreqs)
 | 
						|
	}
 | 
						|
 | 
						|
	rv.Rows = append(make([]IndexRow, 0, rowsCapNeeded), rv.Rows...)
 | 
						|
 | 
						|
	backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
 | 
						|
 | 
						|
	// walk through the collated information and process
 | 
						|
	// once for each indexed field (unique name)
 | 
						|
	for fieldIndex, tokenFreqs := range fieldTermFreqs {
 | 
						|
		fieldLength := fieldLengths[fieldIndex]
 | 
						|
		includeTermVectors := fieldIncludeTermVectors[fieldIndex]
 | 
						|
 | 
						|
		// encode this field
 | 
						|
		rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
 | 
						|
	}
 | 
						|
 | 
						|
	// build the back index row
 | 
						|
	backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
 | 
						|
	rv.Rows = append(rv.Rows, backIndexRow)
 | 
						|
 | 
						|
	return rv
 | 
						|
}
 |