1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Improve issue search (#2387)

* Improve issue indexer

* Fix new issue sqlite bug

* Different test indexer paths for each db

* Add integration indexer paths to make clean
This commit is contained in:
Ethan Koenig
2017-09-16 13:16:21 -07:00
committed by Lauris BH
parent 52e11b24bf
commit b0f7457d9e
122 changed files with 15280 additions and 1458 deletions

View File

@@ -45,7 +45,7 @@ const RowBufferSize = 4 * 1024
var VersionKey = []byte{'v'}
const Version uint8 = 5
const Version uint8 = 7
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
@@ -499,44 +499,65 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) {
addRows = make([]UpsideDownCouchRow, 0, len(rows))
if backIndexRow == nil {
addRows = addRows[0:len(rows)]
for i, row := range rows {
addRows[i] = row
}
return addRows, nil, nil
}
updateRows = make([]UpsideDownCouchRow, 0, len(rows))
deleteRows = make([]UpsideDownCouchRow, 0, len(rows))
existingTermKeys := make(map[string]bool)
for _, key := range backIndexRow.AllTermKeys() {
existingTermKeys[string(key)] = true
var existingTermKeys map[string]struct{}
backIndexTermKeys := backIndexRow.AllTermKeys()
if len(backIndexTermKeys) > 0 {
existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys))
for _, key := range backIndexTermKeys {
existingTermKeys[string(key)] = struct{}{}
}
}
existingStoredKeys := make(map[string]bool)
for _, key := range backIndexRow.AllStoredKeys() {
existingStoredKeys[string(key)] = true
var existingStoredKeys map[string]struct{}
backIndexStoredKeys := backIndexRow.AllStoredKeys()
if len(backIndexStoredKeys) > 0 {
existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys))
for _, key := range backIndexStoredKeys {
existingStoredKeys[string(key)] = struct{}{}
}
}
keyBuf := GetRowBuffer()
for _, row := range rows {
switch row := row.(type) {
case *TermFrequencyRow:
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf[:keySize]))
} else {
addRows = append(addRows, row)
if existingTermKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf[:keySize]))
continue
}
}
addRows = append(addRows, row)
case *StoredRow:
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf[:keySize]))
} else {
addRows = append(addRows, row)
if existingStoredKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf[:keySize]))
continue
}
}
addRows = append(addRows, row)
default:
updateRows = append(updateRows, row)
}
@@ -583,33 +604,41 @@ func encodeFieldType(f document.Field) byte {
fieldType = 'd'
case *document.BooleanField:
fieldType = 'b'
case *document.GeoPointField:
fieldType = 'g'
case *document.CompositeField:
fieldType = 'c'
}
return fieldType
}
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) {
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) {
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
termFreqRows := make([]TermFrequencyRow, len(tokenFreqs))
termFreqRowsUsed := 0
terms := make([]string, 0, len(tokenFreqs))
for k, tf := range tokenFreqs {
var termFreqRow *TermFrequencyRow
termFreqRow := &termFreqRows[termFreqRowsUsed]
termFreqRowsUsed++
InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID,
uint64(frequencyFromTokenFreq(tf)), fieldNorm)
if includeTermVectors {
var tv []*TermVector
tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
} else {
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
}
// record the back index entry
backIndexTermEntry := BackIndexTermEntry{Term: proto.String(k), Field: proto.Uint32(uint32(fieldIndex))}
backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry)
terms = append(terms, k)
rows = append(rows, termFreqRow)
}
backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms}
backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry)
return rows, backIndexTermEntries
return rows, backIndexTermsEntries
}
func (udc *UpsideDownCouch) Delete(id string) (err error) {
@@ -682,9 +711,11 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) {
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
idBytes := []byte(id)
for _, backIndexEntry := range backIndexRow.termEntries {
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0)
deleteRows = append(deleteRows, tfr)
for _, backIndexEntry := range backIndexRow.termsEntries {
for i := range backIndexEntry.Terms {
tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0)
deleteRows = append(deleteRows, tfr)
}
}
for _, se := range backIndexRow.storedEntries {
sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil)
@@ -706,6 +737,8 @@ func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document
return document.NewDateTimeFieldFromBytes(name, pos, value)
case 'b':
return document.NewBooleanFieldFromBytes(name, pos, value)
case 'g':
return document.NewGeoPointFieldFromBytes(name, pos, value)
}
return nil
}
@@ -715,6 +748,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
}
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
a := make([]TermVector, len(tf.Locations))
rv := make([]*TermVector, len(tf.Locations))
for i, l := range tf.Locations {
@@ -727,14 +761,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
rows = append(rows, newFieldRow)
}
}
tv := TermVector{
a[i] = TermVector{
field: fieldIndex,
arrayPositions: l.ArrayPositions,
pos: uint64(l.Position),
start: uint64(l.Start),
end: uint64(l.End),
}
rv[i] = &tv
rv[i] = &a[i]
}
return rv, rows
@@ -745,18 +779,19 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
return nil
}
a := make([]index.TermFieldVector, len(in))
rv := make([]*index.TermFieldVector, len(in))
for i, tv := range in {
fieldName := udc.fieldCache.FieldIndexed(tv.field)
tfv := index.TermFieldVector{
a[i] = index.TermFieldVector{
Field: fieldName,
ArrayPositions: tv.arrayPositions,
Pos: tv.pos,
Start: tv.start,
End: tv.end,
}
rv[i] = &tfv
rv[i] = &a[i]
}
return rv
}
@@ -1008,7 +1043,7 @@ func init() {
func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) {
// use a temporary row structure to build key
tempRow := &BackIndexRow{
tempRow := BackIndexRow{
doc: docID,
}