1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Search bar for issues/pulls (#530)

This commit is contained in:
Ethan Koenig
2017-01-24 21:43:02 -05:00
committed by Lunny Xiao
parent 8bc431952f
commit 833f8b94c2
195 changed files with 221830 additions and 60 deletions

View File

@@ -0,0 +1,110 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
)
func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult {
rv := &index.AnalysisResult{
DocID: d.ID,
Rows: make([]index.IndexRow, 0, 100),
}
docIDBytes := []byte(d.ID)
// track our back index entries
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
// information we collate as we merge fields with same name
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies)
fieldLengths := make(map[uint16]int)
fieldIncludeTermVectors := make(map[uint16]bool)
fieldNames := make(map[uint16]string)
analyzeField := func(field document.Field, storable bool) {
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
if newFieldRow != nil {
rv.Rows = append(rv.Rows, newFieldRow)
}
fieldNames[fieldIndex] = field.Name()
if field.Options().IsIndexed() {
fieldLength, tokenFreqs := field.Analyze()
existingFreqs := fieldTermFreqs[fieldIndex]
if existingFreqs == nil {
fieldTermFreqs[fieldIndex] = tokenFreqs
} else {
existingFreqs.MergeAll(field.Name(), tokenFreqs)
fieldTermFreqs[fieldIndex] = existingFreqs
}
fieldLengths[fieldIndex] += fieldLength
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
}
if storable && field.Options().IsStored() {
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
}
}
// walk all the fields, record stored fields now
// place information about indexed fields into map
// this collates information across fields with
// same names (arrays)
for _, field := range d.Fields {
analyzeField(field, true)
}
if len(d.CompositeFields) > 0 {
for fieldIndex, tokenFreqs := range fieldTermFreqs {
// see if any of the composite fields need this
for _, compositeField := range d.CompositeFields {
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
}
}
for _, compositeField := range d.CompositeFields {
analyzeField(compositeField, false)
}
}
rowsCapNeeded := len(rv.Rows) + 1
for _, tokenFreqs := range fieldTermFreqs {
rowsCapNeeded += len(tokenFreqs)
}
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...)
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded)
// walk through the collated information and process
// once for each indexed field (unique name)
for fieldIndex, tokenFreqs := range fieldTermFreqs {
fieldLength := fieldLengths[fieldIndex]
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
// encode this field
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries)
}
// build the back index row
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries)
rv.Rows = append(rv.Rows, backIndexRow)
return rv
}

View File

@@ -0,0 +1,8 @@
#!/bin/sh
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//`
for BENCHMARK in $BENCHMARKS
do
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS
done

View File

@@ -0,0 +1,172 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"sort"
"github.com/blevesearch/bleve/index/store"
)
// the functions in this file are only intended to be used by
// the bleve_dump utility and the debug http handlers
// if your application relies on them, you're doing something wrong
// they may change or be removed at any time
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) {
start := prefix
if start == nil {
start = []byte{0}
}
it := kvreader.PrefixIterator(start)
defer func() {
cerr := it.Close()
if cerr != nil {
rv <- cerr
}
}()
key, val, valid := it.Current()
for valid {
ck := make([]byte, len(key))
copy(ck, key)
cv := make([]byte, len(val))
copy(cv, val)
row, err := ParseFromKeyValue(ck, cv)
if err != nil {
rv <- err
return
}
rv <- row
it.Next()
key, val, valid = it.Current()
}
}
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) {
it := kvreader.RangeIterator(start, end)
defer func() {
cerr := it.Close()
if cerr != nil {
rv <- cerr
}
}()
key, val, valid := it.Current()
for valid {
ck := make([]byte, len(key))
copy(ck, key)
cv := make([]byte, len(val))
copy(cv, val)
row, err := ParseFromKeyValue(ck, cv)
if err != nil {
rv <- err
return
}
rv <- row
it.Next()
key, val, valid = it.Current()
}
}
func (i *IndexReader) DumpAll() chan interface{} {
rv := make(chan interface{})
go func() {
defer close(rv)
dumpRange(i.kvreader, rv, nil, nil)
}()
return rv
}
func (i *IndexReader) DumpFields() chan interface{} {
rv := make(chan interface{})
go func() {
defer close(rv)
dumpPrefix(i.kvreader, rv, []byte{'f'})
}()
return rv
}
type keyset [][]byte
func (k keyset) Len() int { return len(k) }
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
// DumpDoc returns all rows in the index related to this doc id
func (i *IndexReader) DumpDoc(id string) chan interface{} {
idBytes := []byte(id)
rv := make(chan interface{})
go func() {
defer close(rv)
back, err := backIndexRowForDoc(i.kvreader, []byte(id))
if err != nil {
rv <- err
return
}
// no such doc
if back == nil {
return
}
// build sorted list of term keys
keys := make(keyset, 0)
for _, entry := range back.termEntries {
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0)
key := tfr.Key()
keys = append(keys, key)
}
sort.Sort(keys)
// first add all the stored rows
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
dumpPrefix(i.kvreader, rv, storedRowPrefix)
// now walk term keys in order and add them as well
if len(keys) > 0 {
it := i.kvreader.RangeIterator(keys[0], nil)
defer func() {
cerr := it.Close()
if cerr != nil {
rv <- cerr
}
}()
for _, key := range keys {
it.Seek(key)
rkey, rval, valid := it.Current()
if !valid {
break
}
rck := make([]byte, len(rkey))
copy(rck, key)
rcv := make([]byte, len(rval))
copy(rcv, rval)
row, err := ParseFromKeyValue(rck, rcv)
if err != nil {
rv <- err
return
}
rv <- row
}
}
}()
return rv
}

View File

@@ -0,0 +1,78 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
)
type UpsideDownCouchFieldDict struct {
indexReader *IndexReader
iterator store.KVIterator
dictRow *DictionaryRow
dictEntry *index.DictEntry
field uint16
}
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) {
startKey := NewDictionaryRow(startTerm, field, 0).Key()
if endTerm == nil {
endTerm = []byte{ByteSeparator}
} else {
endTerm = incrementBytes(endTerm)
}
endKey := NewDictionaryRow(endTerm, field, 0).Key()
it := indexReader.kvreader.RangeIterator(startKey, endKey)
return &UpsideDownCouchFieldDict{
indexReader: indexReader,
iterator: it,
dictRow: &DictionaryRow{}, // Pre-alloced, reused row.
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry.
field: field,
}, nil
}
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) {
key, val, valid := r.iterator.Current()
if !valid {
return nil, nil
}
err := r.dictRow.parseDictionaryK(key)
if err != nil {
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err)
}
err = r.dictRow.parseDictionaryV(val)
if err != nil {
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err)
}
r.dictEntry.Term = string(r.dictRow.term)
r.dictEntry.Count = r.dictRow.count
// advance the iterator to the next term
r.iterator.Next()
return r.dictEntry, nil
}
func (r *UpsideDownCouchFieldDict) Close() error {
return r.iterator.Close()
}

View File

@@ -0,0 +1,189 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
)
type IndexReader struct {
index *UpsideDownCouch
kvreader store.KVReader
docCount uint64
}
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors)
}
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors)
}
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, nil, nil)
}
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
}
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
}
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, termPrefix, termPrefix)
}
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) {
return newUpsideDownCouchDocIDReader(i)
}
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
return newUpsideDownCouchDocIDReaderOnly(i, ids)
}
func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
// first hit the back index to confirm doc exists
var backIndexRow *BackIndexRow
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id))
if err != nil {
return
}
if backIndexRow == nil {
return
}
doc = document.NewDocument(id)
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil)
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
safeVal := make([]byte, len(val))
copy(safeVal, val)
var row *StoredRow
row, err = NewStoredRowKV(key, safeVal)
if err != nil {
doc = nil
return
}
if row != nil {
fieldName := i.index.fieldCache.FieldIndexed(row.field)
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value)
if field != nil {
doc.AddField(field)
}
}
it.Next()
key, val, valid = it.Current()
}
return
}
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
back, err := backIndexRowForDoc(i.kvreader, id)
if err != nil {
return nil, err
}
if back == nil {
return nil, nil
}
rv := make(index.FieldTerms, len(fields))
fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false)
if ok {
fieldsMap[id] = f
}
}
for _, entry := range back.termEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok {
rv[field] = append(rv[field], *entry.Term)
}
}
return rv, nil
}
func (i *IndexReader) Fields() (fields []string, err error) {
fields = make([]string, 0)
it := i.kvreader.PrefixIterator([]byte{'f'})
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
var row UpsideDownCouchRow
row, err = ParseFromKeyValue(key, val)
if err != nil {
fields = nil
return
}
if row != nil {
fieldRow, ok := row.(*FieldRow)
if ok {
fields = append(fields, fieldRow.name)
}
}
it.Next()
key, val, valid = it.Current()
}
return
}
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) {
internalRow := NewInternalRow(key, nil)
return i.kvreader.Get(internalRow.Key())
}
func (i *IndexReader) DocCount() (uint64, error) {
return i.docCount, nil
}
func (i *IndexReader) Close() error {
return i.kvreader.Close()
}
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) {
return string(id), nil
}
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) {
return index.IndexInternalID(id), nil
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didn't overflow, so stop
break
}
}
return rv
}

View File

@@ -0,0 +1,325 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"sort"
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
)
type UpsideDownCouchTermFieldReader struct {
count uint64
indexReader *IndexReader
iterator store.KVIterator
term []byte
tfrNext *TermFrequencyRow
keyBuf []byte
field uint16
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
dictionaryRow := NewDictionaryRow(term, field, 0)
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
if err != nil {
return nil, err
}
if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
tfrNext: &TermFrequencyRow{},
field: field,
}, nil
}
err = dictionaryRow.parseDictionaryV(val)
if err != nil {
return nil, err
}
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
it := indexReader.kvreader.PrefixIterator(tfr.Key())
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
indexReader: indexReader,
iterator: it,
count: dictionaryRow.count,
term: term,
field: field,
}, nil
}
func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
return r.count
}
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
if r.iterator != nil {
// We treat tfrNext also like an initialization flag, which
// tells us whether we need to invoke the underlying
// iterator.Next(). The first time, don't call iterator.Next().
if r.tfrNext != nil {
r.iterator.Next()
} else {
r.tfrNext = &TermFrequencyRow{}
}
key, val, valid := r.iterator.Current()
if valid {
tfr := r.tfrNext
err := tfr.parseKDoc(key, r.term)
if err != nil {
return nil, err
}
err = tfr.parseV(val)
if err != nil {
return nil, err
}
rv := preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
rv.ID = append(rv.ID, tfr.doc...)
rv.Freq = tfr.freq
rv.Norm = float64(tfr.norm)
if tfr.vectors != nil {
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
}
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) {
if r.iterator != nil {
if r.tfrNext == nil {
r.tfrNext = &TermFrequencyRow{}
}
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0)
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0])
if err != nil {
return nil, err
}
r.iterator.Seek(r.keyBuf)
key, val, valid := r.iterator.Current()
if valid {
err := tfr.parseKDoc(key, r.term)
if err != nil {
return nil, err
}
err = tfr.parseV(val)
if err != nil {
return nil, err
}
rv = preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
rv.ID = append(rv.ID, tfr.doc...)
rv.Freq = tfr.freq
rv.Norm = float64(tfr.norm)
if tfr.vectors != nil {
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
}
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchTermFieldReader) Close() error {
if r.indexReader != nil {
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
}
if r.iterator != nil {
return r.iterator.Close()
}
return nil
}
type UpsideDownCouchDocIDReader struct {
indexReader *IndexReader
iterator store.KVIterator
only []string
onlyPos int
onlyMode bool
}
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
startBytes := []byte{0x0}
endBytes := []byte{0xff}
bisr := NewBackIndexRow(startBytes, nil, nil)
bier := NewBackIndexRow(endBytes, nil, nil)
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
return &UpsideDownCouchDocIDReader{
indexReader: indexReader,
iterator: it,
}, nil
}
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
// ensure ids are sorted
sort.Strings(ids)
startBytes := []byte{0x0}
if len(ids) > 0 {
startBytes = []byte(ids[0])
}
endBytes := []byte{0xff}
if len(ids) > 0 {
endBytes = incrementBytes([]byte(ids[len(ids)-1]))
}
bisr := NewBackIndexRow(startBytes, nil, nil)
bier := NewBackIndexRow(endBytes, nil, nil)
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
return &UpsideDownCouchDocIDReader{
indexReader: indexReader,
iterator: it,
only: ids,
onlyMode: true,
}, nil
}
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
key, val, valid := r.iterator.Current()
if r.onlyMode {
var rv index.IndexInternalID
for valid && r.onlyPos < len(r.only) {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
ok := r.nextOnly()
if !ok {
return nil, nil
}
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
key, val, valid = r.iterator.Current()
continue
} else {
rv = append([]byte(nil), br.doc...)
break
}
}
if valid && r.onlyPos < len(r.only) {
ok := r.nextOnly()
if ok {
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
}
return rv, nil
}
} else {
if valid {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
rv := append([]byte(nil), br.doc...)
r.iterator.Next()
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
if r.onlyMode {
r.onlyPos = sort.SearchStrings(r.only, string(docID))
if r.onlyPos >= len(r.only) {
// advanced to key after our last only key
return nil, nil
}
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
key, val, valid := r.iterator.Current()
var rv index.IndexInternalID
for valid && r.onlyPos < len(r.only) {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
// the only key we seek'd to didn't exist
// now look for the closest key that did exist in only
r.onlyPos = sort.SearchStrings(r.only, string(br.doc))
if r.onlyPos >= len(r.only) {
// advanced to key after our last only key
return nil, nil
}
// now seek to this new only key
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
key, val, valid = r.iterator.Current()
continue
} else {
rv = append([]byte(nil), br.doc...)
break
}
}
if valid && r.onlyPos < len(r.only) {
ok := r.nextOnly()
if ok {
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
}
return rv, nil
}
} else {
bir := NewBackIndexRow(docID, nil, nil)
r.iterator.Seek(bir.Key())
key, val, valid := r.iterator.Current()
if valid {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
rv := append([]byte(nil), br.doc...)
r.iterator.Next()
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchDocIDReader) Close() error {
return r.iterator.Close()
}
// move the r.only pos forward one, skipping duplicates
// return true if there is more data, or false if we got to the end of the list
func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
// advance 1 position, until we see a different key
// it's already sorted, so this skips duplicates
start := r.onlyPos
r.onlyPos++
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
start = r.onlyPos
r.onlyPos++
}
// inidicate if we got to the end of the list
return r.onlyPos < len(r.only)
}

View File

@@ -0,0 +1,853 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"github.com/golang/protobuf/proto"
)
const ByteSeparator byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow
type UpsideDownCouchRow interface {
KeySize() int
KeyTo([]byte) (int, error)
Key() []byte
Value() []byte
ValueSize() int
ValueTo([]byte) (int, error)
}
func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
if len(key) > 0 {
switch key[0] {
case 'v':
return NewVersionRowKV(key, value)
case 'f':
return NewFieldRowKV(key, value)
case 'd':
return NewDictionaryRowKV(key, value)
case 't':
return NewTermFrequencyRowKV(key, value)
case 'b':
return NewBackIndexRowKV(key, value)
case 's':
return NewStoredRowKV(key, value)
case 'i':
return NewInternalRowKV(key, value)
}
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0]))
}
return nil, fmt.Errorf("Invalid empty key")
}
// VERSION
type VersionRow struct {
version uint8
}
func (v *VersionRow) Key() []byte {
return []byte{'v'}
}
func (v *VersionRow) KeySize() int {
return 1
}
func (v *VersionRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'v'
return 1, nil
}
func (v *VersionRow) Value() []byte {
return []byte{byte(v.version)}
}
func (v *VersionRow) ValueSize() int {
return 1
}
func (v *VersionRow) ValueTo(buf []byte) (int, error) {
buf[0] = v.version
return 1, nil
}
func (v *VersionRow) String() string {
return fmt.Sprintf("Version: %d", v.version)
}
func NewVersionRow(version uint8) *VersionRow {
return &VersionRow{
version: version,
}
}
func NewVersionRowKV(key, value []byte) (*VersionRow, error) {
rv := VersionRow{}
buf := bytes.NewBuffer(value)
err := binary.Read(buf, binary.LittleEndian, &rv.version)
if err != nil {
return nil, err
}
return &rv, nil
}
// INTERNAL STORAGE
type InternalRow struct {
key []byte
val []byte
}
func (i *InternalRow) Key() []byte {
buf := make([]byte, i.KeySize())
size, _ := i.KeyTo(buf)
return buf[:size]
}
func (i *InternalRow) KeySize() int {
return len(i.key) + 1
}
func (i *InternalRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'i'
actual := copy(buf[1:], i.key)
return 1 + actual, nil
}
func (i *InternalRow) Value() []byte {
return i.val
}
func (i *InternalRow) ValueSize() int {
return len(i.val)
}
func (i *InternalRow) ValueTo(buf []byte) (int, error) {
actual := copy(buf, i.val)
return actual, nil
}
func (i *InternalRow) String() string {
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val)
}
func NewInternalRow(key, val []byte) *InternalRow {
return &InternalRow{
key: key,
val: val,
}
}
func NewInternalRowKV(key, value []byte) (*InternalRow, error) {
rv := InternalRow{}
rv.key = key[1:]
rv.val = value
return &rv, nil
}
// FIELD definition
type FieldRow struct {
index uint16
name string
}
func (f *FieldRow) Key() []byte {
buf := make([]byte, f.KeySize())
size, _ := f.KeyTo(buf)
return buf[:size]
}
func (f *FieldRow) KeySize() int {
return 3
}
func (f *FieldRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'f'
binary.LittleEndian.PutUint16(buf[1:3], f.index)
return 3, nil
}
func (f *FieldRow) Value() []byte {
return append([]byte(f.name), ByteSeparator)
}
func (f *FieldRow) ValueSize() int {
return len(f.name) + 1
}
func (f *FieldRow) ValueTo(buf []byte) (int, error) {
size := copy(buf, f.name)
buf[size] = ByteSeparator
return size + 1, nil
}
func (f *FieldRow) String() string {
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
}
func NewFieldRow(index uint16, name string) *FieldRow {
return &FieldRow{
index: index,
name: name,
}
}
func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
rv := FieldRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
err = binary.Read(buf, binary.LittleEndian, &rv.index)
if err != nil {
return nil, err
}
buf = bytes.NewBuffer(value)
rv.name, err = buf.ReadString(ByteSeparator)
if err != nil {
return nil, err
}
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
return &rv, nil
}
// DICTIONARY
const DictionaryRowMaxValueSize = binary.MaxVarintLen64
type DictionaryRow struct {
term []byte
count uint64
field uint16
}
func (dr *DictionaryRow) Key() []byte {
buf := make([]byte, dr.KeySize())
size, _ := dr.KeyTo(buf)
return buf[:size]
}
func (dr *DictionaryRow) KeySize() int {
return len(dr.term) + 3
}
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
size := copy(buf[3:], dr.term)
return size + 3, nil
}
func (dr *DictionaryRow) Value() []byte {
buf := make([]byte, dr.ValueSize())
size, _ := dr.ValueTo(buf)
return buf[:size]
}
func (dr *DictionaryRow) ValueSize() int {
return DictionaryRowMaxValueSize
}
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {
used := binary.PutUvarint(buf, dr.count)
return used, nil
}
func (dr *DictionaryRow) String() string {
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
}
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
return &DictionaryRow{
term: term,
field: field,
count: count,
}
}
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
rv, err := NewDictionaryRowK(key)
if err != nil {
return nil, err
}
err = rv.parseDictionaryV(value)
if err != nil {
return nil, err
}
return rv, nil
}
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
rv := &DictionaryRow{}
err := rv.parseDictionaryK(key)
if err != nil {
return nil, err
}
return rv, nil
}
func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
dr.field = binary.LittleEndian.Uint16(key[1:3])
if dr.term != nil {
dr.term = dr.term[:0]
}
dr.term = append(dr.term, key[3:]...)
return nil
}
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
dr.count = count
return nil
}
// TERM FIELD FREQUENCY
type TermVector struct {
field uint16
arrayPositions []uint64
pos uint64
start uint64
end uint64
}
func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
}
type TermFrequencyRow struct {
term []byte
doc []byte
freq uint64
vectors []*TermVector
norm float32
field uint16
}
func (tfr *TermFrequencyRow) Term() []byte {
return tfr.term
}
func (tfr *TermFrequencyRow) Freq() uint64 {
return tfr.freq
}
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
buf := make([]byte, 3)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
return buf
}
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
buf := make([]byte, 3+len(tfr.term))
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
copy(buf[3:], tfr.term)
return buf
}
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
buf := make([]byte, 3+len(tfr.term)+1)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
buf[3+termLen] = ByteSeparator
return buf
}
func (tfr *TermFrequencyRow) Key() []byte {
buf := make([]byte, tfr.KeySize())
size, _ := tfr.KeyTo(buf)
return buf[:size]
}
func (tfr *TermFrequencyRow) KeySize() int {
return 3 + len(tfr.term) + 1 + len(tfr.doc)
}
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], tfr.doc)
return 3 + termLen + 1 + docLen, nil
}
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
keySize := tfr.KeySize()
if cap(buf) < keySize {
buf = make([]byte, keySize)
}
actualSize, err := tfr.KeyTo(buf[0:keySize])
return buf[0:actualSize], err
}
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.Key()
}
func (tfr *TermFrequencyRow) DictionaryRowKeySize() int {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.KeySize()
}
func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.KeyTo(buf)
}
func (tfr *TermFrequencyRow) Value() []byte {
buf := make([]byte, tfr.ValueSize())
size, _ := tfr.ValueTo(buf)
return buf[:size]
}
func (tfr *TermFrequencyRow) ValueSize() int {
bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64
for _, vector := range tfr.vectors {
bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64
}
return bufLen
}
func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) {
used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq)
normuint32 := math.Float32bits(tfr.norm)
newbuf := buf[used : used+binary.MaxVarintLen64]
used += binary.PutUvarint(newbuf, uint64(normuint32))
for _, vector := range tfr.vectors {
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field))
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions)))
for _, arrayPosition := range vector.arrayPositions {
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition)
}
}
return used, nil
}
func (tfr *TermFrequencyRow) String() string {
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
}
func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
tfr.term = term
tfr.field = field
tfr.doc = docID
tfr.freq = freq
tfr.norm = norm
return tfr
}
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: docID,
freq: freq,
norm: norm,
}
}
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: docID,
freq: freq,
norm: norm,
vectors: vectors,
}
}
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) {
rv := &TermFrequencyRow{}
err := rv.parseK(key)
if err != nil {
return nil, err
}
return rv, nil
}
func (tfr *TermFrequencyRow) parseK(key []byte) error {
keyLen := len(key)
if keyLen < 3 {
return fmt.Errorf("invalid term frequency key, no valid field")
}
tfr.field = binary.LittleEndian.Uint16(key[1:3])
termEndPos := bytes.IndexByte(key[3:], ByteSeparator)
if termEndPos < 0 {
return fmt.Errorf("invalid term frequency key, no byte separator terminating term")
}
tfr.term = key[3 : 3+termEndPos]
docLen := keyLen - (3 + termEndPos + 1)
if docLen < 1 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
tfr.doc = key[3+termEndPos+1:]
return nil
}
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
tfr.doc = key[3+len(term)+1:]
if len(tfr.doc) <= 0 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
return nil
}
func (tfr *TermFrequencyRow) parseV(value []byte) error {
var bytesRead int
tfr.freq, bytesRead = binary.Uvarint(value)
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, invalid frequency")
}
currOffset := bytesRead
var norm uint64
norm, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, no norm")
}
currOffset += bytesRead
tfr.norm = math.Float32frombits(uint32(norm))
tfr.vectors = nil
var field uint64
field, bytesRead = binary.Uvarint(value[currOffset:])
for bytesRead > 0 {
currOffset += bytesRead
tv := TermVector{}
tv.field = uint16(field)
// at this point we expect at least one term vector
if tfr.vectors == nil {
tfr.vectors = make([]*TermVector, 0)
}
tv.pos, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no position")
}
currOffset += bytesRead
tv.start, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no start")
}
currOffset += bytesRead
tv.end, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no end")
}
currOffset += bytesRead
var arrayPositionsLen uint64 = 0
arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen")
}
currOffset += bytesRead
if arrayPositionsLen > 0 {
tv.arrayPositions = make([]uint64, arrayPositionsLen)
for i := 0; uint64(i) < arrayPositionsLen; i++ {
tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i)
}
currOffset += bytesRead
}
}
tfr.vectors = append(tfr.vectors, &tv)
// try to read next record (may not exist)
field, bytesRead = binary.Uvarint(value[currOffset:])
}
if len(value[currOffset:]) > 0 && bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector field invalid")
}
return nil
}
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
rv, err := NewTermFrequencyRowK(key)
if err != nil {
return nil, err
}
err = rv.parseV(value)
if err != nil {
return nil, err
}
return rv, nil
}
type BackIndexRow struct {
doc []byte
termEntries []*BackIndexTermEntry
storedEntries []*BackIndexStoreEntry
}
func (br *BackIndexRow) AllTermKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, len(br.termEntries))
for i, termEntry := range br.termEntries {
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0)
rv[i] = termRow.Key()
}
return rv
}
func (br *BackIndexRow) AllStoredKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, len(br.storedEntries))
for i, storedEntry := range br.storedEntries {
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
rv[i] = storedRow.Key()
}
return rv
}
func (br *BackIndexRow) Key() []byte {
buf := make([]byte, br.KeySize())
size, _ := br.KeyTo(buf)
return buf[:size]
}
func (br *BackIndexRow) KeySize() int {
return len(br.doc) + 1
}
func (br *BackIndexRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'b'
used := copy(buf[1:], br.doc)
return used + 1, nil
}
func (br *BackIndexRow) Value() []byte {
buf := make([]byte, br.ValueSize())
size, _ := br.ValueTo(buf)
return buf[:size]
}
func (br *BackIndexRow) ValueSize() int {
birv := &BackIndexRowValue{
TermEntries: br.termEntries,
StoredEntries: br.storedEntries,
}
return birv.Size()
}
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
birv := &BackIndexRowValue{
TermEntries: br.termEntries,
StoredEntries: br.storedEntries,
}
return birv.MarshalTo(buf)
}
func (br *BackIndexRow) String() string {
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries)
}
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
return &BackIndexRow{
doc: docID,
termEntries: entries,
storedEntries: storedFields,
}
}
func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
rv := BackIndexRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
rv.doc, err = buf.ReadBytes(ByteSeparator)
if err == io.EOF && len(rv.doc) < 1 {
err = fmt.Errorf("invalid doc length 0 - % x", key)
}
if err != nil && err != io.EOF {
return nil, err
} else if err == nil {
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
}
var birv BackIndexRowValue
err = proto.Unmarshal(value, &birv)
if err != nil {
return nil, err
}
rv.termEntries = birv.TermEntries
rv.storedEntries = birv.StoredEntries
return &rv, nil
}
// STORED
type StoredRow struct {
doc []byte
field uint16
arrayPositions []uint64
typ byte
value []byte
}
func (s *StoredRow) Key() []byte {
buf := make([]byte, s.KeySize())
size, _ := s.KeyTo(buf)
return buf[0:size]
}
func (s *StoredRow) KeySize() int {
return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions))
}
func (s *StoredRow) KeyTo(buf []byte) (int, error) {
docLen := len(s.doc)
buf[0] = 's'
copy(buf[1:], s.doc)
buf[1+docLen] = ByteSeparator
binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field)
bytesUsed := 1 + docLen + 1 + 2
for _, arrayPosition := range s.arrayPositions {
varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition)
bytesUsed += varbytes
}
return bytesUsed, nil
}
func (s *StoredRow) Value() []byte {
buf := make([]byte, s.ValueSize())
size, _ := s.ValueTo(buf)
return buf[:size]
}
func (s *StoredRow) ValueSize() int {
return len(s.value) + 1
}
func (s *StoredRow) ValueTo(buf []byte) (int, error) {
buf[0] = s.typ
used := copy(buf[1:], s.value)
return used + 1, nil
}
func (s *StoredRow) String() string {
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value)
}
func (s *StoredRow) ScanPrefixForDoc() []byte {
docLen := len(s.doc)
buf := make([]byte, 1+docLen+1)
buf[0] = 's'
copy(buf[1:], s.doc)
buf[1+docLen] = ByteSeparator
return buf
}
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
return &StoredRow{
doc: docID,
field: field,
arrayPositions: arrayPositions,
typ: typ,
value: value,
}
}
func NewStoredRowK(key []byte) (*StoredRow, error) {
rv := StoredRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
rv.doc, err = buf.ReadBytes(ByteSeparator)
if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
err = fmt.Errorf("invalid doc length 0")
return nil, err
}
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
err = binary.Read(buf, binary.LittleEndian, &rv.field)
if err != nil {
return nil, err
}
rv.arrayPositions = make([]uint64, 0)
nextArrayPos, err := binary.ReadUvarint(buf)
for err == nil {
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
nextArrayPos, err = binary.ReadUvarint(buf)
}
return &rv, nil
}
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
rv, err := NewStoredRowK(key)
if err != nil {
return nil, err
}
rv.typ = value[0]
rv.value = value[1:]
return rv, nil
}

View File

@@ -0,0 +1,76 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"encoding/binary"
)
var mergeOperator upsideDownMerge
var dictionaryTermIncr []byte
var dictionaryTermDecr []byte
func init() {
dictionaryTermIncr = make([]byte, 8)
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1))
dictionaryTermDecr = make([]byte, 8)
var negOne = int64(-1)
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne))
}
type upsideDownMerge struct{}
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) {
// set up record based on key
dr, err := NewDictionaryRowK(key)
if err != nil {
return nil, false
}
if len(existingValue) > 0 {
// if existing value, parse it
err = dr.parseDictionaryV(existingValue)
if err != nil {
return nil, false
}
}
// now process operands
for _, operand := range operands {
next := int64(binary.LittleEndian.Uint64(operand))
if next < 0 && uint64(-next) > dr.count {
// subtracting next from existing would overflow
dr.count = 0
} else if next < 0 {
dr.count -= uint64(-next)
} else {
dr.count += uint64(next)
}
}
return dr.Value(), true
}
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) {
left := int64(binary.LittleEndian.Uint64(leftOperand))
right := int64(binary.LittleEndian.Uint64(rightOperand))
rv := make([]byte, 8)
binary.LittleEndian.PutUint64(rv, uint64(left+right))
return rv, true
}
func (m *upsideDownMerge) Name() string {
return "upsideDownMerge"
}

View File

@@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"encoding/json"
"sync/atomic"
"github.com/blevesearch/bleve/index/store"
)
type indexStat struct {
updates, deletes, batches, errors uint64
analysisTime, indexTime uint64
termSearchersStarted uint64
termSearchersFinished uint64
numPlainTextBytesIndexed uint64
i *UpsideDownCouch
}
func (i *indexStat) statsMap() map[string]interface{} {
m := map[string]interface{}{}
m["updates"] = atomic.LoadUint64(&i.updates)
m["deletes"] = atomic.LoadUint64(&i.deletes)
m["batches"] = atomic.LoadUint64(&i.batches)
m["errors"] = atomic.LoadUint64(&i.errors)
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime)
m["index_time"] = atomic.LoadUint64(&i.indexTime)
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
if o, ok := i.i.store.(store.KVStoreStats); ok {
m["kv"] = o.StatsMap()
}
return m
}
func (i *indexStat) MarshalJSON() ([]byte, error) {
m := i.statsMap()
return json.Marshal(m)
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,684 @@
// Code generated by protoc-gen-gogo.
// source: upsidedown.proto
// DO NOT EDIT!
/*
Package upsidedown is a generated protocol buffer package.
It is generated from these files:
upsidedown.proto
It has these top-level messages:
BackIndexTermEntry
BackIndexStoreEntry
BackIndexRowValue
*/
package upsidedown
import proto "github.com/golang/protobuf/proto"
import math "math"
import io "io"
import fmt "fmt"
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = math.Inf
type BackIndexTermEntry struct {
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"`
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} }
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexTermEntry) ProtoMessage() {}
func (m *BackIndexTermEntry) GetTerm() string {
if m != nil && m.Term != nil {
return *m.Term
}
return ""
}
func (m *BackIndexTermEntry) GetField() uint32 {
if m != nil && m.Field != nil {
return *m.Field
}
return 0
}
type BackIndexStoreEntry struct {
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} }
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexStoreEntry) ProtoMessage() {}
func (m *BackIndexStoreEntry) GetField() uint32 {
if m != nil && m.Field != nil {
return *m.Field
}
return 0
}
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
if m != nil {
return m.ArrayPositions
}
return nil
}
type BackIndexRowValue struct {
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"`
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
func (*BackIndexRowValue) ProtoMessage() {}
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry {
if m != nil {
return m.TermEntries
}
return nil
}
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
if m != nil {
return m.StoredEntries
}
return nil
}
func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
s := string(data[iNdEx:postIndex])
m.Term = &s
iNdEx = postIndex
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Field = &v
hasFields[0] |= uint64(0x00000002)
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
if hasFields[0]&uint64(0x00000001) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
if hasFields[0]&uint64(0x00000002) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
return nil
}
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error {
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Field = &v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType)
}
var v uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.ArrayPositions = append(m.ArrayPositions, v)
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
if hasFields[0]&uint64(0x00000001) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
return nil
}
func (m *BackIndexRowValue) Unmarshal(data []byte) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{})
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
func skipUpsidedown(data []byte) (n int, err error) {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
wireType := int(wire & 0x7)
switch wireType {
case 0:
for {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
iNdEx++
if data[iNdEx-1] < 0x80 {
break
}
}
return iNdEx, nil
case 1:
iNdEx += 8
return iNdEx, nil
case 2:
var length int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
length |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
iNdEx += length
if length < 0 {
return 0, ErrInvalidLengthUpsidedown
}
return iNdEx, nil
case 3:
for {
var innerWire uint64
var start int = iNdEx
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
innerWire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
innerWireType := int(innerWire & 0x7)
if innerWireType == 4 {
break
}
next, err := skipUpsidedown(data[start:])
if err != nil {
return 0, err
}
iNdEx = start + next
}
return iNdEx, nil
case 4:
return iNdEx, nil
case 5:
iNdEx += 4
return iNdEx, nil
default:
return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
}
}
panic("unreachable")
}
var (
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
)
func (m *BackIndexTermEntry) Size() (n int) {
var l int
_ = l
if m.Term != nil {
l = len(*m.Term)
n += 1 + l + sovUpsidedown(uint64(l))
}
if m.Field != nil {
n += 1 + sovUpsidedown(uint64(*m.Field))
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *BackIndexStoreEntry) Size() (n int) {
var l int
_ = l
if m.Field != nil {
n += 1 + sovUpsidedown(uint64(*m.Field))
}
if len(m.ArrayPositions) > 0 {
for _, e := range m.ArrayPositions {
n += 1 + sovUpsidedown(uint64(e))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *BackIndexRowValue) Size() (n int) {
var l int
_ = l
if len(m.TermEntries) > 0 {
for _, e := range m.TermEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if len(m.StoredEntries) > 0 {
for _, e := range m.StoredEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func sovUpsidedown(x uint64) (n int) {
for {
n++
x >>= 7
if x == 0 {
break
}
}
return n
}
func sozUpsidedown(x uint64) (n int) {
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63))))
}
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if m.Term == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term)))
i += copy(data[i:], *m.Term)
}
if m.Field == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0x10
i++
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if m.Field == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
}
if len(m.ArrayPositions) > 0 {
for _, num := range m.ArrayPositions {
data[i] = 0x10
i++
i = encodeVarintUpsidedown(data, i, uint64(num))
}
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func (m *BackIndexRowValue) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if len(m.TermEntries) > 0 {
for _, msg := range m.TermEntries {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n
}
}
if len(m.StoredEntries) > 0 {
for _, msg := range m.StoredEntries {
data[i] = 0x12
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n
}
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
data[offset+4] = uint8(v >> 32)
data[offset+5] = uint8(v >> 40)
data[offset+6] = uint8(v >> 48)
data[offset+7] = uint8(v >> 56)
return offset + 8
}
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
return offset + 4
}
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int {
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return offset + 1
}

View File

@@ -0,0 +1,14 @@
message BackIndexTermEntry {
required string term = 1;
required uint32 field = 2;
}
message BackIndexStoreEntry {
required uint32 field = 1;
repeated uint64 arrayPositions = 2;
}
message BackIndexRowValue {
repeated BackIndexTermEntry termEntries = 1;
repeated BackIndexStoreEntry storedEntries = 2;
}