1
1
mirror of https://github.com/go-gitea/gitea synced 2025-12-07 13:28:25 +00:00

Improve issue search (#2387)

* Improve issue indexer

* Fix new issue sqlite bug

* Different test indexer paths for each db

* Add integration indexer paths to make clean
This commit is contained in:
Ethan Koenig
2017-09-16 13:16:21 -07:00
committed by Lauris BH
parent 52e11b24bf
commit b0f7457d9e
122 changed files with 15280 additions and 1458 deletions

View File

@@ -38,14 +38,14 @@ type BooleanSearcher struct {
initialized bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) {
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorer.NewConjunctionQueryScorer(explain),
scorer: scorer.NewConjunctionQueryScorer(options),
matches: make([]*search.DocumentMatch, 2),
}
rv.computeQueryNorm()

View File

@@ -31,10 +31,10 @@ type ConjunctionSearcher struct {
maxIDIdx int
scorer *scorer.ConjunctionQueryScorer
initialized bool
explain bool
options search.SearcherOptions
}
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) {
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
@@ -45,10 +45,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
explain: explain,
options: options,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewConjunctionQueryScorer(explain),
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
return &rv, nil

View File

@@ -50,11 +50,22 @@ func tooManyClauses(count int) bool {
}
func tooManyClausesErr() error {
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount)
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
DisjunctionMaxClauseCount)
}
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
if tooManyClauses(len(qsearchers)) {
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
*DisjunctionSearcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options,
true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr()
}
// build the downstream searchers
@@ -70,7 +81,7 @@ func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
searchers: searchers,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(explain),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
@@ -161,7 +172,8 @@ func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
}
}
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@@ -199,7 +211,8 @@ func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {

View File

@@ -28,13 +28,13 @@ type DocIDSearcher struct {
}
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
explain bool) (searcher *DocIDSearcher, err error) {
options search.SearcherOptions) (searcher *DocIDSearcher, err error) {
reader, err := indexReader.DocIDReaderOnly(ids)
if err != nil {
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, explain)
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &DocIDSearcher{
scorer: scorer,
reader: reader,

View File

@@ -0,0 +1,88 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
type FilteringSearcher struct {
child search.Searcher
accept FilterFunc
}
func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher {
return &FilteringSearcher{
child: s,
accept: filter,
}
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(next) {
return next, nil
}
next, err = f.child.Next(ctx)
}
return nil, err
}
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
adv, err := f.child.Advance(ctx, ID)
if err != nil {
return nil, err
}
if adv == nil {
return nil, nil
}
if f.accept(adv) {
return adv, nil
}
return f.Next(ctx)
}
func (f *FilteringSearcher) Close() error {
return f.child.Close()
}
func (f *FilteringSearcher) Weight() float64 {
return f.child.Weight()
}
func (f *FilteringSearcher) SetQueryNorm(n float64) {
f.child.SetQueryNorm(n)
}
func (f *FilteringSearcher) Count() uint64 {
return f.child.Count()
}
func (f *FilteringSearcher) Min() int {
return f.child.Min()
}
func (f *FilteringSearcher) DocumentMatchPoolSize() int {
return f.child.DocumentMatchPoolSize()
}

View File

@@ -19,17 +19,9 @@ import (
"github.com/blevesearch/bleve/search"
)
type FuzzySearcher struct {
indexReader index.IndexReader
term string
prefix int
fuzziness int
field string
explain bool
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
@@ -40,46 +32,18 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
}
}
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, field, prefixTerm)
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
qsearchersClose()
return nil, err
}
return &FuzzySearcher{
indexReader: indexReader,
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
explain: explain,
searcher: searcher,
}, nil
return NewMultiTermSearcher(indexReader, candidateTerms, field,
boost, options, true)
}
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzziness int, field, prefixTerm string) (rv []string, err error) {
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
@@ -108,36 +72,3 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzzine
return rv, err
}
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *FuzzySearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}
func (s *FuzzySearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

View File

@@ -0,0 +1,173 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
var onBoundarySearcher search.Searcher
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader,
notOnBoundaryTerms, field, boost, options, false)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
return NewMatchNoneSearcher(indexReader)
}
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
}
return nil, nil
}
func buildRectFilter(indexReader index.IndexReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
}
return false
}
}

View File

@@ -0,0 +1,117 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.RectFromPointDistance(centerLon, centerLat, dist)
if err != nil {
return nil, err
}
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options)
if err != nil {
return nil, err
}
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher,
buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
// if the desired box crosses the dateline, it is automatically split into
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
boxSearcher, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{leftSearcher, rightSearcher}, 0, options)
if err != nil {
_ = leftSearcher.Close()
_ = rightSearcher.Close()
return nil, err
}
return boxSearcher, nil
}
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
func buildDistFilter(indexReader index.IndexReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
dist := geo.Haversin(lon, lat, centerLon, centerLat)
if dist <= maxDist/1000 {
return true
}
}
return false
}
}

View File

@@ -27,7 +27,7 @@ type MatchAllSearcher struct {
count uint64
}
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) {
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReaderAll()
if err != nil {
return nil, err
@@ -37,7 +37,7 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain b
_ = reader.Close()
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, explain)
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,

View File

@@ -0,0 +1,85 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcherBytes(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherBytes(indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
}

View File

@@ -17,22 +17,16 @@ package searcher
import (
"bytes"
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
type NumericRangeSearcher struct {
indexReader index.IndexReader
min *float64
max *float64
field string
explain bool
searcher *DisjunctionSearcher
}
func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
func NewNumericRangeSearcher(indexReader index.IndexReader,
min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
@@ -62,64 +56,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *f
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
var err error
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr()
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
func filterCandidateTerms(indexReader index.IndexReader,
terms [][]byte, field string) (rv [][]byte, err error) {
fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
if err != nil {
qsearchersClose()
return nil, err
}
return &NumericRangeSearcher{
indexReader: indexReader,
min: min,
max: max,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
// enumerate the terms and check against list of terms
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
termBytes := []byte(tfd.Term)
i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 {
rv = append(rv, terms[i])
}
terms = terms[i:]
tfd, err = fieldDict.Next()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *NumericRangeSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *NumericRangeSearcher) Close() error {
return s.searcher.Close()
return rv, err
}
type termRange struct {
@@ -190,7 +169,8 @@ func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
@@ -225,11 +205,3 @@ func newRangeBytes(minBytes, maxBytes []byte) *termRange {
endTerm: maxBytes,
}
}
func (s *NumericRangeSearcher) Min() int {
return 0
}
func (s *NumericRangeSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

View File

@@ -15,6 +15,7 @@
package searcher
import (
"fmt"
"math"
"github.com/blevesearch/bleve/index"
@@ -27,11 +28,72 @@ type PhraseSearcher struct {
queryNorm float64
currMust *search.DocumentMatch
slop int
terms []string
terms [][]string
initialized bool
}
func NewPhraseSearcher(indexReader index.IndexReader, mustSearcher *ConjunctionSearcher, terms []string) (*PhraseSearcher, error) {
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
for i, term := range terms {
mterms[i] = []string{term}
}
return NewMultiPhraseSearcher(indexReader, mterms, field, options)
}
func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
var termPositionSearchers []search.Searcher
for _, termPos := range terms {
if len(termPos) == 1 && termPos[0] != "" {
// single term
ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, ts)
} else if len(termPos) > 1 {
// multiple terms
var termSearchers []search.Searcher
for _, term := range termPos {
if term == "" {
continue
}
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
}
disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, disjunction)
}
}
mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
}
// build our searcher
rv := PhraseSearcher{
indexReader: indexReader,
@@ -96,68 +158,150 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
}
}
var rv *search.DocumentMatch
for s.currMust != nil {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
firstTerm := s.terms[0]
for field, termLocMap := range s.currMust.Locations {
rvtlm := make(search.TermLocationMap, 0)
locations, ok := termLocMap[firstTerm]
if ok {
OUTER:
for _, location := range locations {
crvtlm := make(search.TermLocationMap, 0)
INNER:
for i := 0; i < len(s.terms); i++ {
nextTerm := s.terms[i]
if nextTerm != "" {
// look through all these term locations
// to try and find the correct offsets
nextLocations, ok := termLocMap[nextTerm]
if ok {
for _, nextLocation := range nextLocations {
if nextLocation.Pos == location.Pos+float64(i) && nextLocation.SameArrayElement(location) {
// found a location match for this term
crvtlm.AddLocation(nextTerm, nextLocation)
continue INNER
}
}
// if we got here we didn't find a location match for this term
continue OUTER
} else {
continue OUTER
}
}
}
// if we got here all the terms matched
freq++
search.MergeTermLocationMaps(rvtlm, crvtlm)
rvftlm[field] = rvtlm
}
}
}
if freq > 0 {
// return match
rv = s.currMust
rv.Locations = rvftlm
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
return rv, nil
}
// check this match against phrase constraints
rv := s.checkCurrMustMatch(ctx)
// prepare for next iteration (either loop or subsequent call to Next())
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
// if match satisfied phrase constraints return it as a hit
if rv != nil {
return rv, nil
}
}
return nil, nil
}
// checkCurrMustMatch is soley concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher)
// also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
for field, tlm := range s.currMust.Locations {
f, rvtlm := s.checkCurrMustMatchField(ctx, tlm)
if f > 0 {
freq += f
rvftlm[field] = rvtlm
}
}
if freq > 0 {
// return match
rv := s.currMust
rv.Locations = rvftlm
return rv
}
return nil
}
// checkCurrMustMatchField is soley concerned with determining if one particular
// field within the currMust DocumentMatch Locations satisfies the phase
// constraints (possibly more than once). if so, the number of times it was
// satisfied, and these locations are returned. otherwise 0 and either
// a nil or empty TermLocationMap
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
rv := make(search.TermLocationMap, len(s.terms))
for _, p := range paths {
p.MergeInto(rv)
}
return len(paths), rv
}
type phrasePart struct {
term string
loc *search.Location
}
type phrasePath []*phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
in[pp.term] = append(in[pp.term], pp.loc)
}
}
// findPhrasePaths is a function to identify phase matches from a set of known
// term locations. the implementation is recursive, so care must be taken
// with arguments and return values.
//
// prev - the previous location, nil on first invocation
// phraseTerms - slice containing the phrase terms themselves
// may contain empty string as placeholder (don't care)
// tlm - the Term Location Map containing all relevant term locations
// offset - the offset from the previous that this next term must match
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
//
// returns slice of paths, or nil if invocation did not find any successul paths
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
return []phrasePath{p}
}
car := phraseTerms[0]
cdr := phraseTerms[1:]
// empty term is treated as match (continue)
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
nextPos := prevPos + 1
if prevPos == 0 {
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
}
var rv []phrasePath
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
continue
}
// compute distance from previous phrase term
dist := 0
if prevPos != 0 {
dist = editDistance(prevPos+1, loc.Pos)
}
// if enough slop reamining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
// this location works, add it to the path (but not for empty term)
px := append(p, &phrasePart{term: carTerm, loc: loc})
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
}
}
}
return rv
}
func editDistance(p1, p2 uint64) int {
dist := int(p1 - p2)
if dist < 0 {
return -dist
}
return dist
}
func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)

View File

@@ -21,7 +21,14 @@ import (
"github.com/blevesearch/bleve/search"
)
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (search.Searcher, error) {
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
var candidateTerms []string
@@ -30,39 +37,19 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, fi
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm)
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
qsearchersClose()
return nil, err
}
return searcher, err
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true)
}
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
@@ -79,7 +66,8 @@ func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Reg
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if pattern.MatchString(tfd.Term) {
matchPos := pattern.FindStringIndex(tfd.Term)
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr()

View File

@@ -22,16 +22,13 @@ import (
type TermSearcher struct {
indexReader index.IndexReader
term string
field string
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
explain bool
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, true)
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
@@ -40,12 +37,27 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), explain)
scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
term: term,
field: field,
explain: explain,
reader: reader,
scorer: scorer,
}, nil

View File

@@ -19,93 +19,21 @@ import (
"github.com/blevesearch/bleve/search"
)
type TermPrefixSearcher struct {
indexReader index.IndexReader
prefix string
field string
explain bool
searcher *DisjunctionSearcher
}
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
var qsearcher *TermSearcher
qsearcher, err = NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
if err != nil {
qsearchersClose()
_ = fieldDict.Close()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
err = fieldDict.Close()
if err != nil {
qsearchersClose()
return nil, err
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
qsearchersClose()
return nil, err
}
return &TermPrefixSearcher{
indexReader: indexReader,
prefix: prefix,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *TermPrefixSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *TermPrefixSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *TermPrefixSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *TermPrefixSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *TermPrefixSearcher) Close() error {
return s.searcher.Close()
}
func (s *TermPrefixSearcher) Min() int {
return 0
}
func (s *TermPrefixSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View File

@@ -0,0 +1,79 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewTermRangeSearcher(indexReader index.IndexReader,
min, max []byte, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
if min == nil {
min = []byte{}
}
rangeMax := max
if rangeMax != nil {
// the term dictionary range end has an unfortunate implementation
rangeMax = append(rangeMax, 0)
}
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictRange(field, min, rangeMax)
if err != nil {
return nil, err
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
// check again, as we might have removed only entry
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
}
// if our term list included the max, it would be the last item
if !*inclusiveMax && max != nil && string(max) == terms[len(terms)-1] {
terms = terms[:len(terms)-1]
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}