1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Improve issue search (#2387)

* Improve issue indexer

* Fix new issue sqlite bug

* Different test indexer paths for each db

* Add integration indexer paths to make clean
This commit is contained in:
Ethan Koenig
2017-09-16 13:16:21 -07:00
committed by Lauris BH
parent 52e11b24bf
commit b0f7457d9e
122 changed files with 15280 additions and 1458 deletions

View File

@@ -4,6 +4,7 @@
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
@@ -33,29 +34,33 @@ Discuss usage and development of bleve in the [google group](https://groups.goog
## Indexing
message := struct{
Id string
From string
Body string
}{
Id: "example",
From: "marty.schoch@gmail.com",
Body: "bleve indexing is easy",
}
```go
message := struct{
Id string
From string
Body string
}{
Id: "example",
From: "marty.schoch@gmail.com",
Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping)
if err != nil {
panic(err)
}
index.Index(message.Id, message)
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping)
if err != nil {
panic(err)
}
index.Index(message.Id, message)
```
## Querying
index, _ := bleve.Open("example.bleve")
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
```go
index, _ := bleve.Open("example.bleve")
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
```
## License

View File

@@ -0,0 +1,145 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package custom
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const Name = "custom"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
var err error
var charFilters []analysis.CharFilter
charFiltersValue, ok := config["char_filters"]
if ok {
switch charFiltersValue := charFiltersValue.(type) {
case []string:
charFilters, err = getCharFilters(charFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
if err != nil {
return nil, err
}
charFilters, err = getCharFilters(charFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
}
}
var tokenizerName string
tokenizerValue, ok := config["tokenizer"]
if ok {
tokenizerName, ok = tokenizerValue.(string)
if !ok {
return nil, fmt.Errorf("must specify tokenizer as string")
}
} else {
return nil, fmt.Errorf("must specify tokenizer")
}
tokenizer, err := cache.TokenizerNamed(tokenizerName)
if err != nil {
return nil, err
}
var tokenFilters []analysis.TokenFilter
tokenFiltersValue, ok := config["token_filters"]
if ok {
switch tokenFiltersValue := tokenFiltersValue.(type) {
case []string:
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
if err != nil {
return nil, err
}
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
}
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
}
if charFilters != nil {
rv.CharFilters = charFilters
}
if tokenFilters != nil {
rv.TokenFilters = tokenFilters
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
charFilters := make([]analysis.CharFilter, len(charFilterNames))
for i, charFilterName := range charFilterNames {
charFilter, err := cache.CharFilterNamed(charFilterName)
if err != nil {
return nil, err
}
charFilters[i] = charFilter
}
return charFilters, nil
}
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
for i, tokenFilterName := range tokenFilterNames {
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
if err != nil {
return nil, err
}
tokenFilters[i] = tokenFilter
}
return tokenFilters, nil
}
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
stringSlice := make([]string, len(interfaceSlice))
for i, interfaceObj := range interfaceSlice {
stringObj, ok := interfaceObj.(string)
if ok {
stringSlice[i] = stringObj
} else {
return nil, fmt.Errorf(objType + " name must be a string")
}
}
return stringSlice, nil
}

View File

@@ -1,46 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/tokenizer/letter"
"github.com/blevesearch/bleve/registry"
)
const Name = "simple"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(letter.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
},
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}

View File

@@ -0,0 +1,79 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicodenorm
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"golang.org/x/text/unicode/norm"
)
const Name = "normalize_unicode"
const NFC = "nfc"
const NFD = "nfd"
const NFKC = "nfkc"
const NFKD = "nfkd"
var forms = map[string]norm.Form{
NFC: norm.NFC,
NFD: norm.NFD,
NFKC: norm.NFKC,
NFKD: norm.NFKD,
}
type UnicodeNormalizeFilter struct {
form norm.Form
}
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
form, ok := forms[formName]
if !ok {
return nil, fmt.Errorf("no form named %s", formName)
}
return &UnicodeNormalizeFilter{
form: form,
}, nil
}
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
filter, err := NewUnicodeNormalizeFilter(formName)
if err != nil {
panic(err)
}
return filter
}
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = s.form.Bytes(token.Term)
}
return input
}
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
formVal, ok := config["form"].(string)
if !ok {
return nil, fmt.Errorf("must specify form")
}
form := formVal
return NewUnicodeNormalizeFilter(form)
}
func init() {
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
}

View File

@@ -1,76 +0,0 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package character
import (
"unicode/utf8"
"github.com/blevesearch/bleve/analysis"
)
type IsTokenRune func(r rune) bool
type CharacterTokenizer struct {
isTokenRun IsTokenRune
}
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer {
return &CharacterTokenizer{
isTokenRun: f,
}
}
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, 1024)
offset := 0
start := 0
end := 0
count := 0
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) {
isToken := c.isTokenRun(currRune)
if isToken {
end = offset + size
} else {
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
count++
}
start = offset + size
end = start
}
offset += size
}
// if we ended in the middle of a token, finish it
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
}
return rv
}

View File

@@ -1,33 +0,0 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package letter
import (
"unicode"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizer/character"
"github.com/blevesearch/bleve/registry"
)
const Name = "letter"
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return character.NewCharacterTokenizer(unicode.IsLetter), nil
}
func init() {
registry.RegisterTokenizer(Name, TokenizerConstructor)
}

View File

@@ -1,23 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build appengine appenginevm
package bleve
// in the appengine environment we cannot support disk based indexes
// so we do no extra configuration in this method
func initDisk() {
}

View File

@@ -0,0 +1,137 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/numeric"
)
var GeoPrecisionStep uint = 9
type GeoPointField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
}
func (n *GeoPointField) Name() string {
return n.name
}
func (n *GeoPointField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoPointField) Options() IndexingOptions {
return n.options
}
func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (n *GeoPointField) Value() []byte {
return n.value
}
func (n *GeoPointField) Lon() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLon(uint64(i64)), nil
}
func (n *GeoPointField) Lat() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLat(uint64(i64)), nil
}
func (n *GeoPointField) GoString() string {
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *GeoPointField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
}
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField {
mhash := geo.MortonHash(lon, lat)
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

9
vendor/github.com/blevesearch/bleve/geo/README.md generated vendored Normal file
View File

@@ -0,0 +1,9 @@
# geo support in bleve
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).
## Notes
- All of the APIs will use float64 for lon/lat values.
- When describing a point in function arguments or return values, we always use the order lon, lat.
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly.

170
vendor/github.com/blevesearch/bleve/geo/geo.go generated vendored Normal file
View File

@@ -0,0 +1,170 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"github.com/blevesearch/bleve/numeric"
)
// GeoBits is the number of bits used for a single geo point
// Currently this is 32bits for lon and 32bits for lat
var GeoBits uint = 32
var minLon = -180.0
var minLat = -90.0
var maxLon = 180.0
var maxLat = 90.0
var minLonRad = minLon * degreesToRadian
var minLatRad = minLat * degreesToRadian
var maxLonRad = maxLon * degreesToRadian
var maxLatRad = maxLat * degreesToRadian
var geoTolerance = 1E-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
// MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 {
return numeric.Interleave(scaleLon(lon), scaleLat(lat))
}
func scaleLon(lon float64) uint64 {
rv := uint64((lon - minLon) * lonScale)
return rv
}
func scaleLat(lat float64) uint64 {
rv := uint64((lat - minLat) * latScale)
return rv
}
// MortonUnhashLon extracts the longitude value from the provided morton hash.
func MortonUnhashLon(hash uint64) float64 {
return unscaleLon(numeric.Deinterleave(hash))
}
// MortonUnhashLat extracts the latitude value from the provided morton hash.
func MortonUnhashLat(hash uint64) float64 {
return unscaleLat(numeric.Deinterleave(hash >> 1))
}
func unscaleLon(lon uint64) float64 {
return (float64(lon) / lonScale) + minLon
}
func unscaleLat(lat uint64) float64 {
return (float64(lat) / latScale) + minLat
}
// compareGeo will compare two float values and see if they are the same
// taking into consideration a known geo tolerance.
func compareGeo(a, b float64) float64 {
compare := a - b
if math.Abs(compare) <= geoTolerance {
return 0
}
return compare
}
// RectIntersects checks whether rectangles a and b intersect
func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY)
}
// RectWithin checks whether box a is within box b
func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY)
return rv
}
// BoundingBoxContains checks whether the lon/lat point is within the box
func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool {
return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 &&
compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0
}
const degreesToRadian = math.Pi / 180
const radiansToDegrees = 180 / math.Pi
// DegreesToRadians converts an angle in degrees to radians
func DegreesToRadians(d float64) float64 {
return d * degreesToRadian
}
// RadiansToDegrees converts an angle in radians to degress
func RadiansToDegrees(r float64) float64 {
return r * radiansToDegrees
}
var earthMeanRadiusMeters = 6371008.7714
func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, float64, error) {
err := checkLongitude(lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(lat)
if err != nil {
return 0, 0, 0, 0, err
}
radLon := DegreesToRadians(lon)
radLat := DegreesToRadians(lat)
radDistance := (dist + 7e-2) / earthMeanRadiusMeters
minLatL := radLat - radDistance
maxLatL := radLat + radDistance
var minLonL, maxLonL float64
if minLatL > minLatRad && maxLatL < maxLatRad {
deltaLon := asin(sin(radDistance) / cos(radLat))
minLonL = radLon - deltaLon
if minLonL < minLonRad {
minLonL += 2 * math.Pi
}
maxLonL = radLon + deltaLon
if maxLonL > maxLonRad {
maxLonL -= 2 * math.Pi
}
} else {
// pole is inside distance
minLatL = math.Max(minLatL, minLatRad)
maxLatL = math.Min(maxLatL, maxLatRad)
minLonL = minLonRad
maxLonL = maxLonRad
}
return RadiansToDegrees(minLonL),
RadiansToDegrees(maxLatL),
RadiansToDegrees(maxLonL),
RadiansToDegrees(minLatL),
nil
}
func checkLatitude(latitude float64) error {
if math.IsNaN(latitude) || latitude < minLat || latitude > maxLat {
return fmt.Errorf("invalid latitude %f; must be between %f and %f", latitude, minLat, maxLat)
}
return nil
}
func checkLongitude(longitude float64) error {
if math.IsNaN(longitude) || longitude < minLon || longitude > maxLon {
return fmt.Errorf("invalid longitude %f; must be between %f and %f", longitude, minLon, maxLon)
}
return nil
}

98
vendor/github.com/blevesearch/bleve/geo/geo_dist.go generated vendored Normal file
View File

@@ -0,0 +1,98 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"strconv"
"strings"
)
type distanceUnit struct {
conv float64
suffixes []string
}
var inch = distanceUnit{0.0254, []string{"in", "inch"}}
var yard = distanceUnit{0.9144, []string{"yd", "yards"}}
var feet = distanceUnit{0.3048, []string{"ft", "feet"}}
var kilom = distanceUnit{1000, []string{"km", "kilometers"}}
var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}}
var millim = distanceUnit{0.001, []string{"mm", "millimeters"}}
var centim = distanceUnit{0.01, []string{"cm", "centimeters"}}
var miles = distanceUnit{1609.344, []string{"mi", "miles"}}
var meters = distanceUnit{1, []string{"m", "meters"}}
var distanceUnits = []*distanceUnit{
&inch, &yard, &feet, &kilom, &nauticalm, &millim, &centim, &miles, &meters,
}
// ParseDistance attempts to parse a distance string and return distance in
// meters. Example formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters"
// If the unit cannot be determined, the entire string is parsed and the
// unit of meters is assumed.
// If the number portion cannot be parsed, 0 and the parse error are returned.
func ParseDistance(d string) (float64, error) {
for _, unit := range distanceUnits {
for _, unitSuffix := range unit.suffixes {
if strings.HasSuffix(d, unitSuffix) {
parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64)
if err != nil {
return 0, err
}
return parsedNum * unit.conv, nil
}
}
}
// no unit matched, try assuming meters?
parsedNum, err := strconv.ParseFloat(d, 64)
if err != nil {
return 0, err
}
return parsedNum, nil
}
// ParseDistanceUnit attempts to parse a distance unit and return the
// multiplier for converting this to meters. If the unit cannot be parsed
// then 0 and the error message is returned.
func ParseDistanceUnit(u string) (float64, error) {
for _, unit := range distanceUnits {
for _, unitSuffix := range unit.suffixes {
if u == unitSuffix {
return unit.conv, nil
}
}
}
return 0, fmt.Errorf("unknown distance unit: %s", u)
}
// Haversin computes the distance between two points.
// This implemenation uses the sloppy math implemenations which trade off
// accuracy for performance. The distance returned is in kilometers.
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
x1 := lat1 * degreesToRadian
x2 := lat2 * degreesToRadian
h1 := 1 - cos(x1-x2)
h2 := 1 - cos((lon1-lon2)*degreesToRadian)
h := (h1 + cos(x1)*cos(x2)*h2) / 2
avgLat := (x1 + x2) / 2
diameter := earthDiameter(avgLat)
return diameter * asin(math.Min(1, math.Sqrt(h)))
}

140
vendor/github.com/blevesearch/bleve/geo/parse.go generated vendored Normal file
View File

@@ -0,0 +1,140 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"reflect"
"strings"
)
// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to
// interpret it is as geo point. Supported formats:
// Container:
// slice length 2 (GeoJSON)
// first element lon, second element lat
// map[string]interface{}
// exact keys lat and lon or lng
// struct
// w/exported fields case-insensitive match on lat and lon or lng
// struct
// satisfying Later and Loner or Lnger interfaces
//
// in all cases values must be some sort of numeric-like thing: int/uint/float
func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
var foundLon, foundLat bool
thingVal := reflect.ValueOf(thing)
thingTyp := thingVal.Type()
// is it a slice
if thingVal.IsValid() && thingVal.Kind() == reflect.Slice {
// must be length 2
if thingVal.Len() == 2 {
first := thingVal.Index(0)
if first.CanInterface() {
firstVal := first.Interface()
lon, foundLon = extractNumericVal(firstVal)
}
second := thingVal.Index(1)
if second.CanInterface() {
secondVal := second.Interface()
lat, foundLat = extractNumericVal(secondVal)
}
}
}
// is it a map
if l, ok := thing.(map[string]interface{}); ok {
if lval, ok := l["lon"]; ok {
lon, foundLon = extractNumericVal(lval)
} else if lval, ok := l["lng"]; ok {
lon, foundLon = extractNumericVal(lval)
}
if lval, ok := l["lat"]; ok {
lat, foundLat = extractNumericVal(lval)
}
}
// now try reflection on struct fields
if thingVal.IsValid() && thingVal.Kind() == reflect.Struct {
for i := 0; i < thingVal.NumField(); i++ {
fieldName := thingTyp.Field(i).Name
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lon, foundLon = extractNumericVal(fieldVal)
}
}
if strings.HasPrefix(strings.ToLower(fieldName), "lng") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lon, foundLon = extractNumericVal(fieldVal)
}
}
if strings.HasPrefix(strings.ToLower(fieldName), "lat") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lat, foundLat = extractNumericVal(fieldVal)
}
}
}
}
// last hope, some interfaces
// lon
if l, ok := thing.(loner); ok {
lon = l.Lon()
foundLon = true
} else if l, ok := thing.(lnger); ok {
lon = l.Lng()
foundLon = true
}
// lat
if l, ok := thing.(later); ok {
lat = l.Lat()
foundLat = true
}
return lon, lat, foundLon && foundLat
}
// extract numeric value (if possible) and returns a float64
func extractNumericVal(v interface{}) (float64, bool) {
val := reflect.ValueOf(v)
typ := val.Type()
switch typ.Kind() {
case reflect.Float32, reflect.Float64:
return val.Float(), true
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return float64(val.Int()), true
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return float64(val.Uint()), true
}
return 0, false
}
// various support interfaces which can be used to find lat/lon
type loner interface {
Lon() float64
}
type later interface {
Lat() float64
}
type lnger interface {
Lng() float64
}

212
vendor/github.com/blevesearch/bleve/geo/sloppy.go generated vendored Normal file
View File

@@ -0,0 +1,212 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"math"
)
var earthDiameterPerLatitude []float64
var sinTab []float64
var cosTab []float64
var asinTab []float64
var asinDer1DivF1Tab []float64
var asinDer2DivF2Tab []float64
var asinDer3DivF3Tab []float64
var asinDer4DivF4Tab []float64
const radiusTabsSize = (1 << 10) + 1
const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
const radiusIndexer = 1 / radiusDelta
const sinCosTabsSize = (1 << 11) + 1
const asinTabsSize = (1 << 13) + 1
const oneDivF2 = 1 / 2.0
const oneDivF3 = 1 / 6.0
const oneDivF4 = 1 / 24.0
// 1.57079632673412561417e+00 first 33 bits of pi/2
var pio2Hi = math.Float64frombits(0x3FF921FB54400000)
// 6.07710050650619224932e-11 pi/2 - PIO2_HI
var pio2Lo = math.Float64frombits(0x3DD0B4611A626331)
var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00
var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17
var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01
var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01
var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01
var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02
var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04
var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05
var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00
var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00
var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01
var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02
var twoPiHi = 4 * pio2Hi
var twoPiLo = 4 * pio2Lo
var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1
var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1
var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo)
var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99
var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian)
var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1)
var asinIndexer = 1 / asinDelta
func init() {
// initializes the tables used for the sloppy math functions
// sin and cos
sinTab = make([]float64, sinCosTabsSize)
cosTab = make([]float64, sinCosTabsSize)
sinCosPiIndex := (sinCosTabsSize - 1) / 2
sinCosPiMul2Index := 2 * sinCosPiIndex
sinCosPiMul05Index := sinCosPiIndex / 2
sinCosPiMul15Index := 3 * sinCosPiIndex / 2
for i := 0; i < sinCosTabsSize; i++ {
// angle: in [0,2*PI].
angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo
sinAngle := math.Sin(angle)
cosAngle := math.Cos(angle)
// For indexes corresponding to null cosine or sine, we make sure the value is zero
// and not an epsilon. This allows for a much better accuracy for results close to zero.
if i == sinCosPiIndex {
sinAngle = 0.0
} else if i == sinCosPiMul2Index {
sinAngle = 0.0
} else if i == sinCosPiMul05Index {
sinAngle = 0.0
} else if i == sinCosPiMul15Index {
sinAngle = 0.0
}
sinTab[i] = sinAngle
cosTab[i] = cosAngle
}
// asin
asinTab = make([]float64, asinTabsSize)
asinDer1DivF1Tab = make([]float64, asinTabsSize)
asinDer2DivF2Tab = make([]float64, asinTabsSize)
asinDer3DivF3Tab = make([]float64, asinTabsSize)
asinDer4DivF4Tab = make([]float64, asinTabsSize)
for i := 0; i < asinTabsSize; i++ {
// x: in [0,ASIN_MAX_VALUE_FOR_TABS].
x := float64(i) * asinDelta
asinTab[i] = math.Asin(x)
oneMinusXSqInv := 1.0 / (1 - x*x)
oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv)
oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv
oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv
oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv
asinDer1DivF1Tab[i] = oneMinusXSqInv05
asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2
asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3
asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4
}
// earth radius
a := 6378137.0
b := 6356752.31420
a2 := a * a
b2 := b * b
earthDiameterPerLatitude = make([]float64, radiusTabsSize)
earthDiameterPerLatitude[0] = 2.0 * a / 1000
earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000
for i := 1; i < radiusTabsSize-1; i++ {
lat := math.Pi * float64(i) / (2*radiusTabsSize - 1)
one := math.Pow(a2*math.Cos(lat), 2)
two := math.Pow(b2*math.Sin(lat), 2)
three := math.Pow(float64(a)*math.Cos(lat), 2)
four := math.Pow(b*math.Sin(lat), 2)
radius := math.Sqrt((one + two) / (three + four))
earthDiameterPerLatitude[i] = 2 * radius / 1000
}
}
// earthDiameter returns an estimation of the earth's diameter at the specified
// latitude in kilometers
func earthDiameter(lat float64) float64 {
index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude)))
if math.IsNaN(index) {
return 0
}
return earthDiameterPerLatitude[int(index)]
}
var pio2 = math.Pi / 2
func sin(a float64) float64 {
return cos(a - pio2)
}
// cos is a sloppy math (faster) implementation of math.Cos
func cos(a float64) float64 {
if a < 0.0 {
a = -a
}
if a > sinCosMaxValueForIntModulo {
return math.Cos(a)
}
// index: possibly outside tables range.
index := int(a*sinCosIndexer + 0.5)
delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo
// Making sure index is within tables range.
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo.
index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1)
indexCos := cosTab[index]
indexSin := sinTab[index]
return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4)))
}
// asin is a sloppy math (faster) implementation of math.Asin
func asin(a float64) float64 {
var negateResult bool
if a < 0 {
a = -a
negateResult = true
}
if a <= asinMaxValueForTabs {
index := int(a*asinIndexer + 0.5)
delta := a - float64(index)*asinDelta
result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index])))
if negateResult {
return -result
}
return result
}
// value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
// This part is derived from fdlibm.
if a < 1 {
t := (1.0 - a) * 0.5
p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5)))))
q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4)))
s := math.Sqrt(t)
z := s + s*(p/q)
result := asinPio2Hi - ((z + z) - asinPio2Lo)
if negateResult {
return -result
}
return result
}
// value >= 1.0, or value is NaN
if a == 1.0 {
if negateResult {
return -math.Pi / 2
}
return math.Pi / 2
}
return math.NaN()
}

View File

@@ -49,6 +49,17 @@ func (b *Batch) Index(id string, data interface{}) error {
return nil
}
// IndexAdvanced adds the specified index operation to the
// batch which skips the mapping. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) IndexAdvanced(doc *document.Document) (err error) {
if doc.ID == "" {
return ErrorEmptyID
}
b.internal.Update(doc)
return nil
}
// Delete adds the specified delete operation to the
// batch. NOTE: the bleve Index is not updated until
// the batch is executed.
@@ -99,12 +110,15 @@ func (b *Batch) Reset() {
// them.
//
// The DocumentMapping used to index a value is deduced by the following rules:
// 1) If value implements Classifier interface, resolve the mapping from Type().
// 2) If value has a string field or value at IndexMapping.TypeField.
// 1) If value implements mapping.bleveClassifier interface, resolve the mapping
// from BleveType().
// 2) If value implements mapping.Classifier interface, resolve the mapping
// from Type().
// 3) If value has a string field or value at IndexMapping.TypeField.
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
// is described below.
// 3) If IndexMapping.DefaultType is registered, return it.
// 4) Return IndexMapping.DefaultMapping.
// 4) If IndexMapping.DefaultType is registered, return it.
// 5) Return IndexMapping.DefaultMapping.
//
// Each field or nested field of the value is identified by a string path, then
// mapped to one or several FieldMappings which extract the result for analysis.

View File

@@ -48,6 +48,8 @@ type Index interface {
Advanced() (store.KVStore, error)
}
type DocumentFieldTermVisitor func(field string, term []byte)
type IndexReader interface {
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)
@@ -64,7 +66,7 @@ type IndexReader interface {
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
Document(id string) (*document.Document, error)
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error)
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
Fields() ([]string, error)

View File

@@ -90,7 +90,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...)
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded)
backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
// walk through the collated information and process
// once for each indexed field (unique name)
@@ -99,11 +99,11 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
// encode this field
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries)
rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
}
// build the back index row
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries)
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
rv.Rows = append(rv.Rows, backIndexRow)
return rv

View File

@@ -127,10 +127,12 @@ func (i *IndexReader) DumpDoc(id string) chan interface{} {
}
// build sorted list of term keys
keys := make(keyset, 0)
for _, entry := range back.termEntries {
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0)
key := tfr.Key()
keys = append(keys, key)
for _, entry := range back.termsEntries {
for i := range entry.Terms {
tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0)
key := tfr.Key()
keys = append(keys, key)
}
}
sort.Sort(keys)

View File

@@ -101,15 +101,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
return
}
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
back, err := backIndexRowForDoc(i.kvreader, id)
if err != nil {
return nil, err
}
if back == nil {
return nil, nil
}
rv := make(index.FieldTerms, len(fields))
func (i *IndexReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false)
@@ -117,12 +109,34 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri
fieldsMap[id] = f
}
}
for _, entry := range back.termEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok {
rv[field] = append(rv[field], *entry.Term)
}
tempRow := BackIndexRow{
doc: id,
}
return rv, nil
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
if err != nil {
return err
}
value, err := i.kvreader.Get(keyBuf[:keySize])
if err != nil {
return err
}
if value == nil {
return nil
}
return visitBackIndexRow(value, func(field uint32, term []byte) {
if field, ok := fieldsMap[uint16(field)]; ok {
visitor(field, term)
}
})
}
func (i *IndexReader) Fields() (fields []string, err error) {

View File

@@ -24,46 +24,57 @@ import (
)
type UpsideDownCouchTermFieldReader struct {
count uint64
indexReader *IndexReader
iterator store.KVIterator
term []byte
tfrNext *TermFrequencyRow
keyBuf []byte
field uint16
count uint64
indexReader *IndexReader
iterator store.KVIterator
term []byte
tfrNext *TermFrequencyRow
tfrPrealloc TermFrequencyRow
keyBuf []byte
field uint16
includeTermVectors bool
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
dictionaryRow := NewDictionaryRow(term, field, 0)
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
bufNeeded := termFrequencyRowKeySize(term, nil)
if bufNeeded < dictionaryRowKeySize(term) {
bufNeeded = dictionaryRowKeySize(term)
}
buf := make([]byte, bufNeeded)
bufUsed := dictionaryRowKeyTo(buf, field, term)
val, err := indexReader.kvreader.Get(buf[:bufUsed])
if err != nil {
return nil, err
}
if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
tfrNext: &TermFrequencyRow{},
field: field,
}, nil
rv := &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
field: field,
includeTermVectors: includeTermVectors,
}
rv.tfrNext = &rv.tfrPrealloc
return rv, nil
}
err = dictionaryRow.parseDictionaryV(val)
count, err := dictionaryRowParseV(val)
if err != nil {
return nil, err
}
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
it := indexReader.kvreader.PrefixIterator(tfr.Key())
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
indexReader: indexReader,
iterator: it,
count: dictionaryRow.count,
term: term,
field: field,
indexReader: indexReader,
iterator: it,
count: count,
term: term,
field: field,
includeTermVectors: includeTermVectors,
}, nil
}
@@ -79,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
if r.tfrNext != nil {
r.iterator.Next()
} else {
r.tfrNext = &TermFrequencyRow{}
r.tfrNext = &r.tfrPrealloc
}
key, val, valid := r.iterator.Current()
if valid {
@@ -88,7 +99,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
if err != nil {
return nil, err
}
err = tfr.parseV(val)
err = tfr.parseV(val, r.includeTermVectors)
if err != nil {
return nil, err
}
@@ -125,7 +136,7 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, pr
if err != nil {
return nil, err
}
err = tfr.parseV(val)
err = tfr.parseV(val, r.includeTermVectors)
if err != nil {
return nil, err
}

View File

@@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte {
}
func (dr *DictionaryRow) KeySize() int {
return len(dr.term) + 3
return dictionaryRowKeySize(dr.term)
}
func dictionaryRowKeySize(term []byte) int {
return len(term) + 3
}
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
}
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
size := copy(buf[3:], dr.term)
return size + 3, nil
binary.LittleEndian.PutUint16(buf[1:3], field)
size := copy(buf[3:], term)
return size + 3
}
func (dr *DictionaryRow) Value() []byte {
@@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
}
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
count, err := dictionaryRowParseV(value)
if err != nil {
return err
}
dr.count = count
return nil
}
func dictionaryRowParseV(value []byte) (uint64, error) {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
return count, nil
}
// TERM FIELD FREQUENCY
type TermVector struct {
@@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte {
}
func (tfr *TermFrequencyRow) KeySize() int {
return 3 + len(tfr.term) + 1 + len(tfr.doc)
return termFrequencyRowKeySize(tfr.term, tfr.doc)
}
func termFrequencyRowKeySize(term, doc []byte) int {
return 3 + len(term) + 1 + len(doc)
}
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
}
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
binary.LittleEndian.PutUint16(buf[1:3], field)
termLen := copy(buf[3:], term)
buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], tfr.doc)
return 3 + termLen + 1 + docLen, nil
docLen := copy(buf[3+termLen+1:], doc)
return 3 + termLen + 1 + docLen
}
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
@@ -538,7 +562,7 @@ func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
return nil
}
func (tfr *TermFrequencyRow) parseV(value []byte) error {
func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error {
var bytesRead int
tfr.freq, bytesRead = binary.Uvarint(value)
if bytesRead <= 0 {
@@ -556,6 +580,10 @@ func (tfr *TermFrequencyRow) parseV(value []byte) error {
tfr.norm = math.Float32frombits(uint32(norm))
tfr.vectors = nil
if !includeTermVectors {
return nil
}
var field uint64
field, bytesRead = binary.Uvarint(value[currOffset:])
for bytesRead > 0 {
@@ -620,7 +648,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
return nil, err
}
err = rv.parseV(value)
err = rv.parseV(value, true)
if err != nil {
return nil, err
}
@@ -630,7 +658,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
type BackIndexRow struct {
doc []byte
termEntries []*BackIndexTermEntry
termsEntries []*BackIndexTermsEntry
storedEntries []*BackIndexStoreEntry
}
@@ -638,10 +666,12 @@ func (br *BackIndexRow) AllTermKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, len(br.termEntries))
for i, termEntry := range br.termEntries {
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0)
rv[i] = termRow.Key()
rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely
for _, termsEntry := range br.termsEntries {
for i := range termsEntry.Terms {
termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0)
rv = append(rv, termRow.Key())
}
}
return rv
}
@@ -682,7 +712,7 @@ func (br *BackIndexRow) Value() []byte {
func (br *BackIndexRow) ValueSize() int {
birv := &BackIndexRowValue{
TermEntries: br.termEntries,
TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries,
}
return birv.Size()
@@ -690,20 +720,20 @@ func (br *BackIndexRow) ValueSize() int {
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
birv := &BackIndexRowValue{
TermEntries: br.termEntries,
TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries,
}
return birv.MarshalTo(buf)
}
func (br *BackIndexRow) String() string {
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries)
return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries)
}
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
return &BackIndexRow{
doc: docID,
termEntries: entries,
termsEntries: entries,
storedEntries: storedFields,
}
}
@@ -732,7 +762,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
if err != nil {
return nil, err
}
rv.termEntries = birv.TermEntries
rv.termsEntries = birv.TermsEntries
rv.storedEntries = birv.StoredEntries
return &rv, nil
@@ -851,3 +881,232 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
rv.value = value[1:]
return rv, nil
}
type backIndexFieldTermVisitor func(field uint32, term []byte)
// visitBackIndexRow is designed to process a protobuf encoded
// value, without creating unnecessary garbage. Instead values are passed
// to a callback, inspected first, and only copied if necessary.
// Due to the fact that this borrows from generated code, it must be marnually
// updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
// the sections which create garbage or parse unintersting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// dont parse term entries
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
// instead, inspect them
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// don't parse stored entries
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
// don't track unrecognized data
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
// Instead values are passed to a callback, inspected first, and only copied if
// necessary. Due to the fact that this borrows from generated code, it must
// be marnually updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
var theField uint32
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
// m.Field = &v
theField = v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
callback(theField, data[iNdEx:postIndex])
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
// if hasFields[0]&uint64(0x00000001) == 0 {
// return new(github_com_golang_protobuf_proto.RequiredNotSetError)
// }
return nil
}

View File

@@ -45,7 +45,7 @@ const RowBufferSize = 4 * 1024
var VersionKey = []byte{'v'}
const Version uint8 = 5
const Version uint8 = 7
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
@@ -499,44 +499,65 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) {
addRows = make([]UpsideDownCouchRow, 0, len(rows))
if backIndexRow == nil {
addRows = addRows[0:len(rows)]
for i, row := range rows {
addRows[i] = row
}
return addRows, nil, nil
}
updateRows = make([]UpsideDownCouchRow, 0, len(rows))
deleteRows = make([]UpsideDownCouchRow, 0, len(rows))
existingTermKeys := make(map[string]bool)
for _, key := range backIndexRow.AllTermKeys() {
existingTermKeys[string(key)] = true
var existingTermKeys map[string]struct{}
backIndexTermKeys := backIndexRow.AllTermKeys()
if len(backIndexTermKeys) > 0 {
existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys))
for _, key := range backIndexTermKeys {
existingTermKeys[string(key)] = struct{}{}
}
}
existingStoredKeys := make(map[string]bool)
for _, key := range backIndexRow.AllStoredKeys() {
existingStoredKeys[string(key)] = true
var existingStoredKeys map[string]struct{}
backIndexStoredKeys := backIndexRow.AllStoredKeys()
if len(backIndexStoredKeys) > 0 {
existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys))
for _, key := range backIndexStoredKeys {
existingStoredKeys[string(key)] = struct{}{}
}
}
keyBuf := GetRowBuffer()
for _, row := range rows {
switch row := row.(type) {
case *TermFrequencyRow:
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf[:keySize]))
} else {
addRows = append(addRows, row)
if existingTermKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf[:keySize]))
continue
}
}
addRows = append(addRows, row)
case *StoredRow:
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf[:keySize]))
} else {
addRows = append(addRows, row)
if existingStoredKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf[:keySize]))
continue
}
}
addRows = append(addRows, row)
default:
updateRows = append(updateRows, row)
}
@@ -583,33 +604,41 @@ func encodeFieldType(f document.Field) byte {
fieldType = 'd'
case *document.BooleanField:
fieldType = 'b'
case *document.GeoPointField:
fieldType = 'g'
case *document.CompositeField:
fieldType = 'c'
}
return fieldType
}
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) {
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) {
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
termFreqRows := make([]TermFrequencyRow, len(tokenFreqs))
termFreqRowsUsed := 0
terms := make([]string, 0, len(tokenFreqs))
for k, tf := range tokenFreqs {
var termFreqRow *TermFrequencyRow
termFreqRow := &termFreqRows[termFreqRowsUsed]
termFreqRowsUsed++
InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID,
uint64(frequencyFromTokenFreq(tf)), fieldNorm)
if includeTermVectors {
var tv []*TermVector
tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
} else {
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
}
// record the back index entry
backIndexTermEntry := BackIndexTermEntry{Term: proto.String(k), Field: proto.Uint32(uint32(fieldIndex))}
backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry)
terms = append(terms, k)
rows = append(rows, termFreqRow)
}
backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms}
backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry)
return rows, backIndexTermEntries
return rows, backIndexTermsEntries
}
func (udc *UpsideDownCouch) Delete(id string) (err error) {
@@ -682,9 +711,11 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) {
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
idBytes := []byte(id)
for _, backIndexEntry := range backIndexRow.termEntries {
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0)
deleteRows = append(deleteRows, tfr)
for _, backIndexEntry := range backIndexRow.termsEntries {
for i := range backIndexEntry.Terms {
tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0)
deleteRows = append(deleteRows, tfr)
}
}
for _, se := range backIndexRow.storedEntries {
sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil)
@@ -706,6 +737,8 @@ func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document
return document.NewDateTimeFieldFromBytes(name, pos, value)
case 'b':
return document.NewBooleanFieldFromBytes(name, pos, value)
case 'g':
return document.NewGeoPointFieldFromBytes(name, pos, value)
}
return nil
}
@@ -715,6 +748,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
}
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
a := make([]TermVector, len(tf.Locations))
rv := make([]*TermVector, len(tf.Locations))
for i, l := range tf.Locations {
@@ -727,14 +761,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
rows = append(rows, newFieldRow)
}
}
tv := TermVector{
a[i] = TermVector{
field: fieldIndex,
arrayPositions: l.ArrayPositions,
pos: uint64(l.Position),
start: uint64(l.Start),
end: uint64(l.End),
}
rv[i] = &tv
rv[i] = &a[i]
}
return rv, rows
@@ -745,18 +779,19 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
return nil
}
a := make([]index.TermFieldVector, len(in))
rv := make([]*index.TermFieldVector, len(in))
for i, tv := range in {
fieldName := udc.fieldCache.FieldIndexed(tv.field)
tfv := index.TermFieldVector{
a[i] = index.TermFieldVector{
Field: fieldName,
ArrayPositions: tv.arrayPositions,
Pos: tv.pos,
Start: tv.start,
End: tv.end,
}
rv[i] = &tfv
rv[i] = &a[i]
}
return rv
}
@@ -1008,7 +1043,7 @@ func init() {
func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) {
// use a temporary row structure to build key
tempRow := &BackIndexRow{
tempRow := BackIndexRow{
doc: docID,
}

View File

@@ -3,15 +3,15 @@
// DO NOT EDIT!
/*
Package upsidedown is a generated protocol buffer package.
Package upsidedown is a generated protocol buffer package.
It is generated from these files:
upsidedown.proto
It is generated from these files:
upsidedown.proto
It has these top-level messages:
BackIndexTermEntry
BackIndexStoreEntry
BackIndexRowValue
It has these top-level messages:
BackIndexTermsEntry
BackIndexStoreEntry
BackIndexRowValue
*/
package upsidedown
@@ -26,30 +26,30 @@ import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto"
var _ = proto.Marshal
var _ = math.Inf
type BackIndexTermEntry struct {
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"`
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"`
XXX_unrecognized []byte `json:"-"`
type BackIndexTermsEntry struct {
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} }
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexTermEntry) ProtoMessage() {}
func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} }
func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexTermsEntry) ProtoMessage() {}
func (m *BackIndexTermEntry) GetTerm() string {
if m != nil && m.Term != nil {
return *m.Term
}
return ""
}
func (m *BackIndexTermEntry) GetField() uint32 {
func (m *BackIndexTermsEntry) GetField() uint32 {
if m != nil && m.Field != nil {
return *m.Field
}
return 0
}
func (m *BackIndexTermsEntry) GetTerms() []string {
if m != nil {
return m.Terms
}
return nil
}
type BackIndexStoreEntry struct {
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
@@ -75,7 +75,7 @@ func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
}
type BackIndexRowValue struct {
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"`
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
@@ -84,9 +84,9 @@ func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
func (*BackIndexRowValue) ProtoMessage() {}
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry {
func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
if m != nil {
return m.TermEntries
return m.TermsEntries
}
return nil
}
@@ -98,7 +98,7 @@ func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
return nil
}
func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
var hasFields [1]uint64
l := len(data)
iNdEx := 0
@@ -119,8 +119,26 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Field = &v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType)
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
@@ -138,28 +156,8 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
if postIndex > l {
return io.ErrUnexpectedEOF
}
s := string(data[iNdEx:postIndex])
m.Term = &s
m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
iNdEx = postIndex
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Field = &v
hasFields[0] |= uint64(0x00000002)
default:
var sizeOfWire int
for {
@@ -187,9 +185,6 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
if hasFields[0]&uint64(0x00000001) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
if hasFields[0]&uint64(0x00000002) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
return nil
}
@@ -299,7 +294,7 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error {
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType)
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
@@ -320,8 +315,8 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error {
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{})
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
@@ -472,16 +467,18 @@ var (
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
)
func (m *BackIndexTermEntry) Size() (n int) {
func (m *BackIndexTermsEntry) Size() (n int) {
var l int
_ = l
if m.Term != nil {
l = len(*m.Term)
n += 1 + l + sovUpsidedown(uint64(l))
}
if m.Field != nil {
n += 1 + sovUpsidedown(uint64(*m.Field))
}
if len(m.Terms) > 0 {
for _, s := range m.Terms {
l = len(s)
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
@@ -508,8 +505,8 @@ func (m *BackIndexStoreEntry) Size() (n int) {
func (m *BackIndexRowValue) Size() (n int) {
var l int
_ = l
if len(m.TermEntries) > 0 {
for _, e := range m.TermEntries {
if len(m.TermsEntries) > 0 {
for _, e := range m.TermsEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
@@ -539,7 +536,7 @@ func sovUpsidedown(x uint64) (n int) {
func sozUpsidedown(x uint64) (n int) {
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63))))
}
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) {
func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
@@ -549,26 +546,33 @@ func (m *BackIndexTermEntry) Marshal() (data []byte, err error) {
return data[:n], nil
}
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) {
func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if m.Term == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term)))
i += copy(data[i:], *m.Term)
}
if m.Field == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0x10
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
}
if len(m.Terms) > 0 {
for _, s := range m.Terms {
data[i] = 0x12
i++
l = len(s)
for l >= 1<<7 {
data[i] = uint8(uint64(l)&0x7f | 0x80)
l >>= 7
i++
}
data[i] = uint8(l)
i++
i += copy(data[i:], s)
}
}
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
@@ -625,8 +629,8 @@ func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
_ = i
var l int
_ = l
if len(m.TermEntries) > 0 {
for _, msg := range m.TermEntries {
if len(m.TermsEntries) > 0 {
for _, msg := range m.TermsEntries {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))

View File

@@ -1,6 +1,6 @@
message BackIndexTermEntry {
required string term = 1;
required uint32 field = 2;
message BackIndexTermsEntry {
required uint32 field = 1;
repeated string terms = 2;
}
message BackIndexStoreEntry {
@@ -9,6 +9,6 @@ message BackIndexStoreEntry {
}
message BackIndexRowValue {
repeated BackIndexTermEntry termEntries = 1;
repeated BackIndexTermsEntry termsEntries = 1;
repeated BackIndexStoreEntry storedEntries = 2;
}

View File

@@ -425,14 +425,15 @@ func (i *indexAliasImpl) Swap(in, out []Index) {
// could be slower in remote usages.
func createChildSearchRequest(req *SearchRequest) *SearchRequest {
rv := SearchRequest{
Query: req.Query,
Size: req.Size + req.From,
From: 0,
Highlight: req.Highlight,
Fields: req.Fields,
Facets: req.Facets,
Explain: req.Explain,
Sort: req.Sort,
Query: req.Query,
Size: req.Size + req.From,
From: 0,
Highlight: req.Highlight,
Fields: req.Fields,
Facets: req.Facets,
Explain: req.Explain,
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
}
return &rv
}

View File

@@ -253,6 +253,24 @@ func (i *indexImpl) Index(id string, data interface{}) (err error) {
return
}
// IndexAdvanced takes a document.Document object
// skips the mapping and indexes it.
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
if doc.ID == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err = i.i.Update(doc)
return
}
// Delete entries for the specified identifier from
// the index.
func (i *indexImpl) Delete(id string) (err error) {
@@ -370,7 +388,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}()
searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain)
searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
Explain: req.Explain,
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
})
if err != nil {
return nil, err
}
@@ -461,6 +482,14 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if err == nil {
value = boolean
}
case *document.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)

View File

@@ -59,3 +59,7 @@ func NewDateTimeFieldMapping() *mapping.FieldMapping {
func NewBooleanFieldMapping() *mapping.FieldMapping {
return mapping.NewBooleanFieldMapping()
}
func NewGeoPointFieldMapping() *mapping.FieldMapping {
return mapping.NewGeoPointFieldMapping()
}

View File

@@ -15,6 +15,7 @@
package mapping
import (
"encoding"
"encoding/json"
"fmt"
"reflect"
@@ -75,7 +76,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
}
}
switch field.Type {
case "text", "datetime", "number", "boolean":
case "text", "datetime", "number", "boolean", "geopoint":
default:
return fmt.Errorf("unknown field type: '%s'", field.Type)
}
@@ -481,8 +482,56 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
default:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
}
case reflect.Map:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
case reflect.Ptr:
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
}
}
} else {
dm.walkDocument(property, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
}
default:
dm.walkDocument(property, path, indexes, context)

View File

@@ -21,6 +21,7 @@ import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/geo"
)
// control the default behavior for dynamic fields (those not explicitly mapped)
@@ -124,6 +125,16 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
return rv
}
// NewGeoPointFieldMapping returns a default field mapping for geo points
func NewGeoPointFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "geopoint",
Store: true,
Index: true,
IncludeInAll: true,
}
}
// Options returns the indexing options for this field.
func (fm *FieldMapping) Options() document.IndexingOptions {
var rv document.IndexingOptions
@@ -208,6 +219,20 @@ func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string
}
}
func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) {
lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint)
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer {
analyzerName := fm.Analyzer
if analyzerName == "" {

View File

@@ -289,7 +289,12 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
}
func (im *IndexMappingImpl) determineType(data interface{}) string {
// first see if the object implements Classifier
// first see if the object implements bleveClassifier
bleveClassifier, ok := data.(bleveClassifier)
if ok {
return bleveClassifier.BleveType()
}
// next see if the object implements Classifier
classifier, ok := data.(Classifier)
if ok {
return classifier.Type()

View File

@@ -22,12 +22,21 @@ import (
"github.com/blevesearch/bleve/document"
)
// A Classifier is an interface describing any object
// which knows how to identify its own type.
// A Classifier is an interface describing any object which knows how to
// identify its own type. Alternatively, if a struct already has a Type
// field or method in conflict, one can use BleveType instead.
type Classifier interface {
Type() string
}
// A bleveClassifier is an interface describing any object which knows how
// to identify its own type. This is introduced as an alternative to the
// Classifier interface which often has naming conflicts with existing
// structures.
type bleveClassifier interface {
BleveType() string
}
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging

43
vendor/github.com/blevesearch/bleve/numeric/bin.go generated vendored Normal file
View File

@@ -0,0 +1,43 @@
package numeric
var interleaveMagic = []uint64{
0x5555555555555555,
0x3333333333333333,
0x0F0F0F0F0F0F0F0F,
0x00FF00FF00FF00FF,
0x0000FFFF0000FFFF,
0x00000000FFFFFFFF,
0xAAAAAAAAAAAAAAAA,
}
var interleaveShift = []uint{1, 2, 4, 8, 16}
// Interleave the first 32 bits of each uint64
// apdated from org.apache.lucene.util.BitUtil
// whcih was adapted from:
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
func Interleave(v1, v2 uint64) uint64 {
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]
v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3]
v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2]
v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1]
v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0]
v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4]
v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3]
v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2]
v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1]
v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0]
return (v2 << 1) | v1
}
// Deinterleave the 32-bit value starting at position 0
// to get the other 32-bit value, shift it by 1 first
func Deinterleave(b uint64) uint64 {
b &= interleaveMagic[0]
b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1]
b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2]
b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3]
b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4]
b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5]
return b
}

View File

@@ -139,6 +139,23 @@ func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewTermRangeQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *query.TermRangeQuery {
return query.NewTermRangeQuery(min, max)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *query.TermRangeQuery {
return query.NewTermRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewPhraseQuery creates a new Query for finding
// exact term phrases in the index.
// The provided terms must exist in the correct
@@ -184,3 +201,18 @@ func NewTermQuery(term string) *query.TermQuery {
func NewWildcardQuery(wildcard string) *query.WildcardQuery {
return query.NewWildcardQuery(wildcard)
}
// NewGeoBoundingBoxQuery creates a new Query for performing geo bounding
// box searches. The arguments describe the position of the box and documents
// which have an indexed geo point inside the box will be returned.
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *query.GeoBoundingBoxQuery {
return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat)
}
// NewGeoDistanceQuery creates a new Query for performing geo bounding
// box searches. The arguments describe a position and a distance. Documents
// which have an indexed geo point which is less than or equal to the provided
// distance from the given position will be returned.
func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery {
return query.NewGeoDistanceQuery(lon, lat, distance)
}

View File

@@ -20,10 +20,16 @@ import (
"time"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/datetime/optional"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query"
)
var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name
type numericRange struct {
Name string `json:"name,omitempty"`
Min *float64 `json:"min,omitempty"`
@@ -105,26 +111,41 @@ type FacetRequest struct {
}
func (fr *FacetRequest) Validate() error {
if len(fr.NumericRanges) > 0 && len(fr.DateTimeRanges) > 0 {
nrCount := len(fr.NumericRanges)
drCount := len(fr.DateTimeRanges)
if nrCount > 0 && drCount > 0 {
return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
}
nrNames := map[string]interface{}{}
for _, nr := range fr.NumericRanges {
if _, ok := nrNames[nr.Name]; ok {
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
if nrCount > 0 {
nrNames := map[string]interface{}{}
for _, nr := range fr.NumericRanges {
if _, ok := nrNames[nr.Name]; ok {
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
}
nrNames[nr.Name] = struct{}{}
if nr.Min == nil && nr.Max == nil {
return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
}
}
nrNames[nr.Name] = struct{}{}
}
drNames := map[string]interface{}{}
for _, dr := range fr.DateTimeRanges {
if _, ok := drNames[dr.Name]; ok {
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
} else {
dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
if err != nil {
return err
}
drNames := map[string]interface{}{}
for _, dr := range fr.DateTimeRanges {
if _, ok := drNames[dr.Name]; ok {
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
}
drNames[dr.Name] = struct{}{}
start, end := dr.ParseDates(dateTimeParser)
if start.IsZero() && end.IsZero() {
return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
}
}
drNames[dr.Name] = struct{}{}
}
return nil
}
@@ -149,6 +170,16 @@ func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
}
// AddDateTimeRangeString adds a bucket to a field
// containing date values.
func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges,
&dateTimeRange{Name: name, startString: start, endString: end})
}
// AddNumericRange adds a bucket to a field
// containing numeric values. Documents with a
// numeric value falling into this range are
@@ -219,14 +250,15 @@ func (h *HighlightRequest) AddField(field string) {
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
Query query.Query `json:"query"`
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
Query query.Query `json:"query"`
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
}
func (r *SearchRequest) Validate() error {
@@ -267,14 +299,15 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
var temp struct {
Q json.RawMessage `json:"query"`
Size *int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"`
Q json.RawMessage `json:"query"`
Size *int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
}
err := json.Unmarshal(input, &temp)
@@ -300,6 +333,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Highlight = temp.Highlight
r.Fields = temp.Fields
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err

View File

@@ -34,11 +34,20 @@ func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
return rv
}
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) {
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.Len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreHeap) add(doc *search.DocumentMatch) {
heap.Push(c, doc)
}
func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch {
func (c *collectStoreHeap) removeLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch)
}
@@ -49,17 +58,12 @@ func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.Documen
return make(search.DocumentMatchCollection, 0), nil
}
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= skip {
size--
doc := heap.Pop(c).(*search.DocumentMatch)
rv[size] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
for i := size - 1; i >= 0; i-- {
doc := heap.Pop(c).(*search.DocumentMatch)
rv[i] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
}
return rv, nil

View File

@@ -34,7 +34,16 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
return rv
}
func (c *collectStoreList) Add(doc *search.DocumentMatch) {
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreList) add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 {
@@ -46,7 +55,7 @@ func (c *collectStoreList) Add(doc *search.DocumentMatch) {
c.results.PushBack(doc)
}
func (c *collectStoreList) RemoveLast() *search.DocumentMatch {
func (c *collectStoreList) removeLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
}
@@ -73,6 +82,6 @@ func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.Documen
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreList) Len() int {
func (c *collectStoreList) len() int {
return c.results.Len()
}

View File

@@ -29,7 +29,16 @@ func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
return rv
}
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreSlice) add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest)
i := len(c.slice)
for ; i > 0; i-- {
@@ -44,7 +53,7 @@ func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
c.slice[i] = doc
}
func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch {
func (c *collectStoreSlice) removeLast() *search.DocumentMatch {
var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv
@@ -63,6 +72,6 @@ func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.Docume
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreSlice) Len() int {
func (c *collectStoreSlice) len() int {
return len(c.slice)
}

View File

@@ -22,6 +22,15 @@ import (
"golang.org/x/net/context"
)
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
// exceeded, nil is returned.
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
}
// PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value
var PreAllocSizeSkipCap = 1000
@@ -41,7 +50,7 @@ type TopNCollector struct {
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
store *collectStoreSlice
store collectorStore
needDocIds bool
neededFields []string
@@ -68,9 +77,15 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
backingSize = PreAllocSizeSkipCap + 1
}
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
if size+skip > 10 {
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
} else {
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
}
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
@@ -114,12 +129,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
default:
}
}
if hc.facetsBuilder != nil {
err = hc.facetsBuilder.Update(next)
if err != nil {
break
}
}
err = hc.collectSingle(searchContext, reader, next)
if err != nil {
@@ -144,6 +153,16 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
var err error
// visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d)
if err != nil {
return err
}
}
// increment total hits
hc.total++
d.HitNumber = hc.total
@@ -153,7 +172,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
hc.maxScore = d.Score
}
var err error
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds {
d.ID, err = reader.ExternalID(d.IndexInternalID)
@@ -162,22 +180,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
}
}
// see if we need to load the stored fields
if len(hc.neededFields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
// look them up
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if d.CachedFieldTerms == nil {
d.CachedFieldTerms = make(map[string][]string)
}
d.CachedFieldTerms.Merge(fieldTerms)
}
// compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt
@@ -197,9 +199,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
}
}
hc.store.Add(d)
if hc.store.Len() > hc.size+hc.skip {
removed := hc.store.RemoveLast()
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
if removed != nil {
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
@@ -215,9 +216,31 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
return nil
}
// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
})
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
return err
}
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
}
// finalizeResults starts with the heap containing the final top size+skip

View File

@@ -18,7 +18,6 @@ import (
"sort"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
@@ -35,6 +34,7 @@ type DateTimeFacetBuilder struct {
total int
missing int
ranges map[string]*dateTimeRange
sawValue bool
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
@@ -58,36 +58,35 @@ func (fb *DateTimeFacetBuilder) Field() string {
return fb.field
}
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
} else {
}
}
func (fb *DateTimeFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *DateTimeFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View File

@@ -17,7 +17,6 @@ package facet
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
@@ -34,6 +33,7 @@ type NumericFacetBuilder struct {
total int
missing int
ranges map[string]*numericRange
sawValue bool
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
@@ -57,36 +57,35 @@ func (fb *NumericFacetBuilder) Field() string {
return fb.field
}
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
} else {
}
}
func (fb *NumericFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *NumericFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View File

@@ -17,7 +17,6 @@ package facet
import (
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
@@ -27,6 +26,7 @@ type TermsFacetBuilder struct {
termsCount map[string]int
total int
missing int
sawValue bool
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
@@ -41,19 +41,20 @@ func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
existingCount, existed := fb.termsCount[term]
if existed {
fb.termsCount[term] = existingCount + 1
} else {
fb.termsCount[term] = 1
}
fb.total++
}
} else {
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
fb.total++
}
}
func (fb *TermsFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *TermsFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}

View File

@@ -21,7 +21,10 @@ import (
)
type FacetBuilder interface {
Update(index.FieldTerms)
StartDoc()
UpdateVisitor(field string, term []byte)
EndDoc()
Result() *FacetResult
Field() string
}
@@ -41,33 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field())
}
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
if fb.fields == nil {
for _, facetBuilder := range fb.facets {
fb.fields = append(fb.fields, facetBuilder.Field())
}
}
func (fb *FacetsBuilder) RequiredFields() []string {
return fb.fields
}
if len(fb.fields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fb.fields)
// look them up
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if docMatch.CachedFieldTerms == nil {
docMatch.CachedFieldTerms = make(map[string][]string)
}
docMatch.CachedFieldTerms.Merge(fieldTerms)
}
func (fb *FacetsBuilder) StartDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.Update(docMatch.CachedFieldTerms)
facetBuilder.StartDoc()
}
}
func (fb *FacetsBuilder) EndDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.EndDoc()
}
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
for _, facetBuilder := range fb.facets {
facetBuilder.UpdateVisitor(field, term)
}
return nil
}
type TermFacet struct {

View File

@@ -44,7 +44,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
continue
}
// make sure the array positions match
if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) {
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {

View File

@@ -37,7 +37,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) {
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches

View File

@@ -87,7 +87,7 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations {
if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
if otl.ArrayPositions.Equals(f.ArrayPositions()) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
}
}

View File

@@ -23,7 +23,7 @@ import (
type TermLocation struct {
Term string
ArrayPositions []float64
ArrayPositions search.ArrayPositions
Pos int
Start int
End int
@@ -103,15 +103,3 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
sort.Sort(rv)
return rv
}
func SameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool {
if len(fieldArrayPositions) != len(termLocationArrayPositions) {
return false
}
for i := 0; i < len(fieldArrayPositions); i++ {
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) {
return false
}
}
return true
}

View File

@@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
// pre-allocated to accommodate the requested number of DocumentMatch
// instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, 0, size)
avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available
for i := range startBlock {
startBlock[i].Sort = make([]string, 0, sortsize)
avail = append(avail, &startBlock[i])
i, j := 0, 0
for i < size {
avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
}
return &DocumentMatchPool{
avail: avail,

View File

@@ -22,7 +22,7 @@ import (
)
type BoolFieldQuery struct {
Bool bool `json:"bool"`
Bool bool `json:"bool"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
@@ -39,20 +39,19 @@ func (q *BoolFieldQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *BoolFieldQuery) Boost() float64{
return q.BoostVal.Value()
func (q *BoolFieldQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BoolFieldQuery) SetField(f string) {
q.FieldVal = f
}
func (q *BoolFieldQuery) Field() string{
func (q *BoolFieldQuery) Field() string {
return q.FieldVal
}
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
@@ -61,5 +60,5 @@ func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, e
if q.Bool {
term = "T"
}
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), explain)
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), options)
}

View File

@@ -25,10 +25,11 @@ import (
)
type BooleanQuery struct {
Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewBooleanQuery creates a compound Query composed
@@ -55,6 +56,15 @@ func NewBooleanQuery(must []Query, should []Query, mustNot []Query) *BooleanQuer
return &rv
}
func NewBooleanQueryForQueryString(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := NewBooleanQuery(nil, nil, nil)
rv.queryStringMode = true
rv.AddMust(must...)
rv.AddShould(should...)
rv.AddMustNot(mustNot...)
return rv
}
// SetMinShould requires that at least minShould of the
// should Queries must be satisfied.
func (q *BooleanQuery) SetMinShould(minShould float64) {
@@ -63,7 +73,9 @@ func (q *BooleanQuery) SetMinShould(minShould float64) {
func (q *BooleanQuery) AddMust(m ...Query) {
if q.Must == nil {
q.Must = NewConjunctionQuery([]Query{})
tmp := NewConjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Must = tmp
}
for _, mq := range m {
q.Must.(*ConjunctionQuery).AddQuery(mq)
@@ -72,7 +84,9 @@ func (q *BooleanQuery) AddMust(m ...Query) {
func (q *BooleanQuery) AddShould(m ...Query) {
if q.Should == nil {
q.Should = NewDisjunctionQuery([]Query{})
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Should = tmp
}
for _, mq := range m {
q.Should.(*DisjunctionQuery).AddQuery(mq)
@@ -81,7 +95,9 @@ func (q *BooleanQuery) AddShould(m ...Query) {
func (q *BooleanQuery) AddMustNot(m ...Query) {
if q.MustNot == nil {
q.MustNot = NewDisjunctionQuery([]Query{})
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.MustNot = tmp
}
for _, mq := range m {
q.MustNot.(*DisjunctionQuery).AddQuery(mq)
@@ -93,44 +109,67 @@ func (q *BooleanQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *BooleanQuery) Boost() float64{
func (q *BooleanQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
var err error
var mustNotSearcher search.Searcher
if q.MustNot != nil {
mustNotSearcher, err = q.MustNot.Searcher(i, m, explain)
mustNotSearcher, err = q.MustNot.Searcher(i, m, options)
if err != nil {
return nil, err
}
if q.Must == nil && q.Should == nil {
q.Must = NewMatchAllQuery()
// if must not is MatchNone, reset it to nil
if _, ok := mustNotSearcher.(*searcher.MatchNoneSearcher); ok {
mustNotSearcher = nil
}
}
var mustSearcher search.Searcher
if q.Must != nil {
mustSearcher, err = q.Must.Searcher(i, m, explain)
mustSearcher, err = q.Must.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if must searcher is MatchNone, reset it to nil
if _, ok := mustSearcher.(*searcher.MatchNoneSearcher); ok {
mustSearcher = nil
}
}
var shouldSearcher search.Searcher
if q.Should != nil {
shouldSearcher, err = q.Should.Searcher(i, m, explain)
shouldSearcher, err = q.Should.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if should searcher is MatchNone, reset it to nil
if _, ok := shouldSearcher.(*searcher.MatchNoneSearcher); ok {
shouldSearcher = nil
}
}
// if all 3 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil {
return searcher.NewMatchNoneSearcher(i)
}
// if only mustNotSearcher, start with MatchAll
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(i, 1.0, options)
if err != nil {
return nil, err
}
}
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
return shouldSearcher, nil
}
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, explain)
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, options)
}
func (q *BooleanQuery) Validate() error {

View File

@@ -24,8 +24,9 @@ import (
)
type ConjunctionQuery struct {
Conjuncts []Query `json:"conjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
Conjuncts []Query `json:"conjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewConjunctionQuery creates a new compound Query.
@@ -41,7 +42,7 @@ func (q *ConjunctionQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *ConjunctionQuery) Boost() float64{
func (q *ConjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -51,11 +52,10 @@ func (q *ConjunctionQuery) AddQuery(aq ...Query) {
}
}
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
ss := make([]search.Searcher, len(q.Conjuncts))
for in, conjunct := range q.Conjuncts {
var err error
ss[in], err = conjunct.Searcher(i, m, explain)
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Conjuncts))
for _, conjunct := range q.Conjuncts {
sr, err := conjunct.Searcher(i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
@@ -64,8 +64,16 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
}
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
return searcher.NewConjunctionSearcher(i, ss, explain)
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
return searcher.NewConjunctionSearcher(i, ss, options)
}
func (q *ConjunctionQuery) Validate() error {

View File

@@ -113,20 +113,19 @@ func (q *DateRangeQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *DateRangeQuery) Boost() float64{
func (q *DateRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DateRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *DateRangeQuery) Field() string{
func (q *DateRangeQuery) Field() string {
return q.FieldVal
}
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
min, max, err := q.parseEndpoints()
if err != nil {
return nil, err
@@ -137,7 +136,7 @@ func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, e
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), explain)
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
}
func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {

View File

@@ -25,9 +25,10 @@ import (
)
type DisjunctionQuery struct {
Disjuncts []Query `json:"disjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
Disjuncts []Query `json:"disjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
queryStringMode bool
}
// NewDisjunctionQuery creates a new compound Query.
@@ -43,11 +44,10 @@ func (q *DisjunctionQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *DisjunctionQuery) Boost() float64{
func (q *DisjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DisjunctionQuery) AddQuery(aq ...Query) {
for _, aaq := range aq {
q.Disjuncts = append(q.Disjuncts, aaq)
@@ -58,11 +58,10 @@ func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m
}
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
ss := make([]search.Searcher, len(q.Disjuncts))
for in, disjunct := range q.Disjuncts {
var err error
ss[in], err = disjunct.Searcher(i, m, explain)
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Disjuncts))
for _, disjunct := range q.Disjuncts {
sr, err := disjunct.Searcher(i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
@@ -71,8 +70,16 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
}
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, explain)
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
}
func (q *DisjunctionQuery) Validate() error {

View File

@@ -40,10 +40,10 @@ func (q *DocIDQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *DocIDQuery) Boost() float64{
func (q *DocIDQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), explain)
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), options)
}

View File

@@ -48,7 +48,7 @@ func (q *FuzzyQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *FuzzyQuery) Boost() float64{
func (q *FuzzyQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -56,7 +56,7 @@ func (q *FuzzyQuery) SetField(f string) {
q.FieldVal = f
}
func (q *FuzzyQuery) Field() string{
func (q *FuzzyQuery) Field() string {
return q.FieldVal
}
@@ -68,10 +68,10 @@ func (q *FuzzyQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), explain)
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options)
}

View File

@@ -0,0 +1,113 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingBoxQuery struct {
TopLeft []float64 `json:"top_left,omitempty"`
BottomRight []float64 `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery {
return &GeoBoundingBoxQuery{
TopLeft: []float64{topLeftLon, topLeftLat},
BottomRight: []float64{bottomRightLon, bottomRightLat},
}
}
func (q *GeoBoundingBoxQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingBoxQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingBoxQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingBoxQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts
leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
return nil, err
}
rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options)
}
return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
}
func (q *GeoBoundingBoxQuery) Validate() error {
return nil
}
func (q *GeoBoundingBoxQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
TopLeft interface{} `json:"top_left,omitempty"`
BottomRight interface{} `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.TopLeft)
if !found {
return fmt.Errorf("geo location top_left not in a valid format")
}
q.TopLeft = []float64{lon, lat}
lon, lat, found = geo.ExtractGeoPoint(tmp.BottomRight)
if !found {
return fmt.Errorf("geo location bottom_right not in a valid format")
}
q.BottomRight = []float64{lon, lat}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View File

@@ -0,0 +1,100 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoDistanceQuery struct {
Location []float64 `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery {
return &GeoDistanceQuery{
Location: []float64{lon, lat},
Distance: distance,
}
}
func (q *GeoDistanceQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoDistanceQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoDistanceQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoDistanceQuery) Field() string {
return q.FieldVal
}
func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err
}
return searcher.NewGeoPointDistanceSearcher(i, q.Location[0], q.Location[1],
dist, field, q.BoostVal.Value(), options)
}
func (q *GeoDistanceQuery) Validate() error {
return nil
}
func (q *GeoDistanceQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Location interface{} `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.Location)
if !found {
return fmt.Errorf("geo location not in a valid format")
}
q.Location = []float64{lon, lat}
q.Distance = tmp.Distance
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View File

@@ -90,7 +90,7 @@ func (q *MatchQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *MatchQuery) Boost() float64{
func (q *MatchQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -98,7 +98,7 @@ func (q *MatchQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchQuery) Field() string{
func (q *MatchQuery) Field() string {
return q.FieldVal
}
@@ -114,7 +114,7 @@ func (q *MatchQuery) SetOperator(operator MatchQueryOperator) {
q.Operator = operator
}
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
@@ -160,17 +160,17 @@ func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, expla
shouldQuery := NewDisjunctionQuery(tqs)
shouldQuery.SetMin(1)
shouldQuery.SetBoost(q.BoostVal.Value())
return shouldQuery.Searcher(i, m, explain)
return shouldQuery.Searcher(i, m, options)
case MatchQueryOperatorAnd:
mustQuery := NewConjunctionQuery(tqs)
mustQuery.SetBoost(q.BoostVal.Value())
return mustQuery.Searcher(i, m, explain)
return mustQuery.Searcher(i, m, options)
default:
return nil, fmt.Errorf("unhandled operator %d", q.Operator)
}
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, explain)
return noneQuery.Searcher(i, m, options)
}

View File

@@ -38,14 +38,12 @@ func (q *MatchAllQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *MatchAllQuery) Boost() float64{
func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), explain)
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), options)
}
func (q *MatchAllQuery) MarshalJSON() ([]byte, error) {

View File

@@ -38,11 +38,11 @@ func (q *MatchNoneQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *MatchNoneQuery) Boost() float64{
func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchNoneSearcher(i)
}

View File

@@ -49,7 +49,7 @@ func (q *MatchPhraseQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *MatchPhraseQuery) Boost() float64{
func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -57,11 +57,11 @@ func (q *MatchPhraseQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchPhraseQuery) Field() string{
func (q *MatchPhraseQuery) Field() string {
return q.FieldVal
}
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
@@ -81,15 +81,15 @@ func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 {
phrase := tokenStreamToPhrase(tokens)
phraseQuery := NewPhraseQuery(phrase, field)
phraseQuery := NewMultiPhraseQuery(phrase, field)
phraseQuery.SetBoost(q.BoostVal.Value())
return phraseQuery.Searcher(i, m, explain)
return phraseQuery.Searcher(i, m, options)
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, explain)
return noneQuery.Searcher(i, m, options)
}
func tokenStreamToPhrase(tokens analysis.TokenStream) []string {
func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string {
firstPosition := int(^uint(0) >> 1)
lastPosition := 0
for _, token := range tokens {
@@ -102,13 +102,10 @@ func tokenStreamToPhrase(tokens analysis.TokenStream) []string {
}
phraseLen := lastPosition - firstPosition + 1
if phraseLen > 0 {
rv := make([]string, phraseLen)
for i := 0; i < phraseLen; i++ {
rv[i] = ""
}
rv := make([][]string, phraseLen)
for _, token := range tokens {
pos := token.Position - firstPosition
rv[pos] = string(token.Term)
rv[pos] = append(rv[pos], string(token.Term))
}
return rv
}

View File

@@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type MultiPhraseQuery struct {
Terms [][]string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMultiPhraseQuery creates a new Query for finding
// term phrases in the index.
// It is like PhraseQuery, but each position in the
// phrase may be satisfied by a list of terms
// as opposed to just one.
// At least one of the terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery {
return &MultiPhraseQuery{
Terms: terms,
Field: field,
}
}
func (q *MultiPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MultiPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *MultiPhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error {
type _mphraseQuery MultiPhraseQuery
tmp := _mphraseQuery{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
return nil
}

View File

@@ -59,7 +59,7 @@ func (q *NumericRangeQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *NumericRangeQuery) Boost() float64{
func (q *NumericRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -67,16 +67,16 @@ func (q *NumericRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *NumericRangeQuery) Field() string{
func (q *NumericRangeQuery) Field() string {
return q.FieldVal
}
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), explain)
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *NumericRangeQuery) Validate() error {

View File

@@ -25,10 +25,9 @@ import (
)
type PhraseQuery struct {
Terms []string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
termQueries []Query
Terms []string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewPhraseQuery creates a new Query for finding
@@ -38,18 +37,9 @@ type PhraseQuery struct {
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewPhraseQuery(terms []string, field string) *PhraseQuery {
termQueries := make([]Query, 0)
for _, term := range terms {
if term != "" {
tq := NewTermQuery(term)
tq.SetField(field)
termQueries = append(termQueries, tq)
}
}
return &PhraseQuery{
Terms: terms,
Field: field,
termQueries: termQueries,
Terms: terms,
Field: field,
}
}
@@ -58,22 +48,16 @@ func (q *PhraseQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *PhraseQuery) Boost() float64{
func (q *PhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
conjunctionQuery := NewConjunctionQuery(q.termQueries)
conjunctionSearcher, err := conjunctionQuery.Searcher(i, m, explain)
if err != nil {
return nil, err
}
return searcher.NewPhraseSearcher(i, conjunctionSearcher.(*searcher.ConjunctionSearcher), q.Terms)
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *PhraseQuery) Validate() error {
if len(q.termQueries) < 1 {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
@@ -89,9 +73,5 @@ func (q *PhraseQuery) UnmarshalJSON(data []byte) error {
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
q.termQueries = make([]Query, len(q.Terms))
for i, term := range q.Terms {
q.termQueries[i] = &TermQuery{Term: term, FieldVal: q.Field, BoostVal: q.BoostVal}
}
return nil
}

View File

@@ -41,7 +41,7 @@ func (q *PrefixQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *PrefixQuery) Boost() float64{
func (q *PrefixQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -49,14 +49,14 @@ func (q *PrefixQuery) SetField(f string) {
q.FieldVal = f
}
func (q *PrefixQuery) Field() string{
func (q *PrefixQuery) Field() string {
return q.FieldVal
}
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), explain)
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), options)
}

View File

@@ -36,7 +36,8 @@ func SetLog(l *log.Logger) {
// A Query represents a description of the type
// and parameters for a query into the index.
type Query interface {
Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error)
Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error)
}
// A BoostableQuery represents a Query which can be boosted
@@ -122,7 +123,13 @@ func ParseQuery(input []byte) (Query, error) {
var rv PhraseQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
// now try multi-phrase
var rv2 MultiPhraseQuery
err = json.Unmarshal(input, &rv2)
if err != nil {
return nil, err
}
return &rv2, nil
}
return &rv, nil
}
@@ -154,8 +161,8 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasMin := tmp["min"]
_, hasMax := tmp["max"]
_, hasMin := tmp["min"].(float64)
_, hasMax := tmp["max"].(float64)
if hasMin || hasMax {
var rv NumericRangeQuery
err := json.Unmarshal(input, &rv)
@@ -164,6 +171,16 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasMinStr := tmp["min"].(string)
_, hasMaxStr := tmp["max"].(string)
if hasMinStr || hasMaxStr {
var rv TermRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasStart := tmp["start"]
_, hasEnd := tmp["end"]
if hasStart || hasEnd {
@@ -237,6 +254,25 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasTopLeft := tmp["top_left"]
_, hasBottomRight := tmp["bottom_right"]
if hasTopLeft && hasBottomRight {
var rv GeoBoundingBoxQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDistance := tmp["distance"]
if hasDistance {
var rv GeoDistanceQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type")
}
@@ -300,14 +336,6 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
return nil, err
}
return &q, nil
case *PhraseQuery:
q := *query.(*PhraseQuery)
children, err := expandSlice(q.termQueries)
if err != nil {
return nil, err
}
q.termQueries = children
return &q, nil
default:
return query, nil
}

View File

@@ -39,16 +39,20 @@ func (q *QueryStringQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *QueryStringQuery) Boost() float64{
func (q *QueryStringQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *QueryStringQuery) Parse() (Query, error) {
return parseQuerySyntax(q.Query)
}
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, err
}
return newQuery.Searcher(i, m, explain)
return newQuery.Searcher(i, m, options)
}
func (q *QueryStringQuery) Validate() error {

View File

@@ -27,6 +27,7 @@ tEQUAL tTILDE
%type <s> tSTRING
%type <s> tPHRASE
%type <s> tNUMBER
%type <s> posOrNegNumber
%type <s> tTILDE
%type <s> tBOOST
%type <q> searchBase
@@ -127,7 +128,15 @@ tSTRING tCOLON tSTRING tTILDE {
tNUMBER {
str := $1
logDebugGrammar("STRING - %s", str)
q := NewMatchQuery(str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
@@ -154,12 +163,21 @@ tSTRING tCOLON tSTRING {
$$ = q
}
|
tSTRING tCOLON tNUMBER {
tSTRING tCOLON posOrNegNumber {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := NewMatchQuery(str)
q.SetField(field)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat($3, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
@@ -172,9 +190,12 @@ tSTRING tCOLON tPHRASE {
$$ = q
}
|
tSTRING tCOLON tGREATER tNUMBER {
tSTRING tCOLON tGREATER posOrNegNumber {
field := $1
min, _ := strconv.ParseFloat($4, 64)
min, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@@ -182,9 +203,12 @@ tSTRING tCOLON tGREATER tNUMBER {
$$ = q
}
|
tSTRING tCOLON tGREATER tEQUAL tNUMBER {
tSTRING tCOLON tGREATER tEQUAL posOrNegNumber {
field := $1
min, _ := strconv.ParseFloat($5, 64)
min, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@@ -192,9 +216,12 @@ tSTRING tCOLON tGREATER tEQUAL tNUMBER {
$$ = q
}
|
tSTRING tCOLON tLESS tNUMBER {
tSTRING tCOLON tLESS posOrNegNumber {
field := $1
max, _ := strconv.ParseFloat($4, 64)
max, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@@ -202,9 +229,12 @@ tSTRING tCOLON tLESS tNUMBER {
$$ = q
}
|
tSTRING tCOLON tLESS tEQUAL tNUMBER {
tSTRING tCOLON tLESS tEQUAL posOrNegNumber {
field := $1
max, _ := strconv.ParseFloat($5, 64)
max, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@@ -287,3 +317,12 @@ tBOOST {
}
logDebugGrammar("BOOST %f", boost)
};
posOrNegNumber:
tNUMBER {
$$ = $1
}
|
tMINUS tNUMBER {
$$ = "-" + $2
};

View File

@@ -70,57 +70,58 @@ var yyExca = [...]int{
-2, 5,
}
const yyNprod = 26
const yyNprod = 28
const yyPrivate = 57344
var yyTokenNames []string
var yyStates []string
const yyLast = 31
const yyLast = 42
var yyAct = [...]int{
16, 18, 21, 13, 27, 24, 17, 19, 20, 25,
22, 15, 26, 23, 9, 11, 31, 14, 29, 3,
10, 30, 2, 28, 5, 6, 7, 1, 4, 12,
8,
17, 16, 18, 23, 22, 30, 3, 21, 19, 20,
29, 26, 22, 22, 1, 21, 21, 15, 28, 25,
24, 27, 34, 14, 22, 13, 31, 21, 32, 33,
22, 9, 11, 21, 5, 6, 2, 10, 4, 12,
7, 8,
}
var yyPact = [...]int{
18, -1000, -1000, 18, 10, -1000, -1000, -1000, -6, 3,
-1000, -1000, -1000, -1000, -1000, -4, -12, -1000, -1000, 0,
-1, -1000, -1000, 13, -1000, -1000, 11, -1000, -1000, -1000,
-1000, -1000,
28, -1000, -1000, 28, 27, -1000, -1000, -1000, 16, 9,
-1000, -1000, -1000, -1000, -1000, -3, -11, -1000, -1000, 6,
5, -1000, -5, -1000, -1000, 23, -1000, -1000, 17, -1000,
-1000, -1000, -1000, -1000, -1000,
}
var yyPgo = [...]int{
0, 30, 29, 28, 27, 22, 19,
0, 0, 41, 39, 38, 14, 36, 6,
}
var yyR1 = [...]int{
0, 4, 5, 5, 6, 3, 3, 3, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2,
0, 5, 6, 6, 7, 4, 4, 4, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 3, 3, 1, 1,
}
var yyR2 = [...]int{
0, 1, 2, 1, 3, 0, 1, 1, 1, 2,
4, 1, 1, 3, 3, 3, 4, 5, 4, 5,
4, 5, 4, 5, 0, 1,
4, 5, 4, 5, 0, 1, 1, 2,
}
var yyChk = [...]int{
-1000, -4, -5, -6, -3, 6, 7, -5, -1, 4,
10, 5, -2, 9, 14, 8, 4, 10, 5, 11,
12, 14, 10, 13, 5, 10, 13, 5, 10, 5,
10, 5,
-1000, -5, -6, -7, -4, 6, 7, -6, -2, 4,
10, 5, -3, 9, 14, 8, 4, -1, 5, 11,
12, 10, 7, 14, -1, 13, 5, -1, 13, 5,
10, -1, 5, -1, 5,
}
var yyDef = [...]int{
5, -2, 1, -2, 0, 6, 7, 2, 24, 8,
11, 12, 4, 25, 9, 0, 13, 14, 15, 0,
0, 10, 16, 0, 20, 18, 0, 22, 17, 21,
19, 23,
0, 26, 0, 10, 16, 0, 20, 18, 0, 22,
27, 17, 21, 19, 23,
}
var yyTok1 = [...]int{
@@ -474,25 +475,25 @@ yydefault:
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:39
//line query_string.y:40
{
logDebugGrammar("INPUT")
}
case 2:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:44
//line query_string.y:45
{
logDebugGrammar("SEARCH PARTS")
}
case 3:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:48
//line query_string.y:49
{
logDebugGrammar("SEARCH PART")
}
case 4:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:53
//line query_string.y:54
{
query := yyDollar[2].q
if yyDollar[3].pf != nil {
@@ -511,27 +512,27 @@ yydefault:
}
case 5:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:72
//line query_string.y:73
{
yyVAL.n = queryShould
}
case 6:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:76
//line query_string.y:77
{
logDebugGrammar("PLUS")
yyVAL.n = queryMust
}
case 7:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:81
//line query_string.y:82
{
logDebugGrammar("MINUS")
yyVAL.n = queryMustNot
}
case 8:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:87
//line query_string.y:88
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
@@ -547,7 +548,7 @@ yydefault:
}
case 9:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:101
//line query_string.y:102
{
str := yyDollar[1].s
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64)
@@ -561,7 +562,7 @@ yydefault:
}
case 10:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:113
//line query_string.y:114
{
field := yyDollar[1].s
str := yyDollar[3].s
@@ -577,16 +578,24 @@ yydefault:
}
case 11:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:127
//line query_string.y:128
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
q := NewMatchQuery(str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 12:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:134
//line query_string.y:143
{
phrase := yyDollar[1].s
logDebugGrammar("PHRASE - %s", phrase)
@@ -595,7 +604,7 @@ yydefault:
}
case 13:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:141
//line query_string.y:150
{
field := yyDollar[1].s
str := yyDollar[3].s
@@ -613,18 +622,27 @@ yydefault:
}
case 14:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:157
//line query_string.y:166
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := NewMatchQuery(str)
q.SetField(field)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat(yyDollar[3].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 15:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:166
//line query_string.y:184
{
field := yyDollar[1].s
phrase := yyDollar[3].s
@@ -635,10 +653,13 @@ yydefault:
}
case 16:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:175
//line query_string.y:193
{
field := yyDollar[1].s
min, _ := strconv.ParseFloat(yyDollar[4].s, 64)
min, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@@ -647,10 +668,13 @@ yydefault:
}
case 17:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:185
//line query_string.y:206
{
field := yyDollar[1].s
min, _ := strconv.ParseFloat(yyDollar[5].s, 64)
min, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@@ -659,10 +683,13 @@ yydefault:
}
case 18:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:195
//line query_string.y:219
{
field := yyDollar[1].s
max, _ := strconv.ParseFloat(yyDollar[4].s, 64)
max, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@@ -671,10 +698,13 @@ yydefault:
}
case 19:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:205
//line query_string.y:232
{
field := yyDollar[1].s
max, _ := strconv.ParseFloat(yyDollar[5].s, 64)
max, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@@ -683,7 +713,7 @@ yydefault:
}
case 20:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:215
//line query_string.y:245
{
field := yyDollar[1].s
minInclusive := false
@@ -700,7 +730,7 @@ yydefault:
}
case 21:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:230
//line query_string.y:260
{
field := yyDollar[1].s
minInclusive := true
@@ -717,7 +747,7 @@ yydefault:
}
case 22:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:245
//line query_string.y:275
{
field := yyDollar[1].s
maxInclusive := false
@@ -734,7 +764,7 @@ yydefault:
}
case 23:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:260
//line query_string.y:290
{
field := yyDollar[1].s
maxInclusive := true
@@ -751,13 +781,13 @@ yydefault:
}
case 24:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:276
//line query_string.y:306
{
yyVAL.pf = nil
}
case 25:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:280
//line query_string.y:310
{
yyVAL.pf = nil
boost, err := strconv.ParseFloat(yyDollar[1].s, 64)
@@ -768,6 +798,18 @@ yydefault:
}
logDebugGrammar("BOOST %f", boost)
}
case 26:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:322
{
yyVAL.s = yyDollar[1].s
}
case 27:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:326
{
yyVAL.s = "-" + yyDollar[2].s
}
}
goto yystack /* stack new state and value */
}

View File

@@ -12,7 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//go:generate go tool yacc -o query_string.y.go query_string.y
// as of Go 1.8 this requires the goyacc external tool
// available from golang.org/x/tools/cmd/goyacc
//go:generate goyacc -o query_string.y.go query_string.y
//go:generate sed -i.tmp -e 1d query_string.y.go
//go:generate rm query_string.y.go.tmp
@@ -31,6 +34,9 @@ var debugParser bool
var debugLexer bool
func parseQuerySyntax(query string) (rq Query, err error) {
if query == "" {
return NewMatchNoneQuery(), nil
}
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
doParse(lex)
@@ -66,7 +72,7 @@ type lexerWrapper struct {
func newLexerWrapper(lex yyLexer) *lexerWrapper {
return &lexerWrapper{
lex: lex,
query: NewBooleanQuery(nil, nil, nil),
query: NewBooleanQueryForQueryString(nil, nil, nil),
}
}

View File

@@ -33,7 +33,9 @@ type RegexpQuery struct {
// NewRegexpQuery creates a new Query which finds
// documents containing terms that match the
// specified regular expression.
// specified regular expression. The regexp pattern
// SHOULD NOT include ^ or $ modifiers, the search
// will only match entire terms even without them.
func NewRegexpQuery(regexp string) *RegexpQuery {
return &RegexpQuery{
Regexp: regexp,
@@ -45,7 +47,7 @@ func (q *RegexpQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *RegexpQuery) Boost() float64{
func (q *RegexpQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -53,11 +55,11 @@ func (q *RegexpQuery) SetField(f string) {
q.FieldVal = f
}
func (q *RegexpQuery) Field() string{
func (q *RegexpQuery) Field() string {
return q.FieldVal
}
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
@@ -67,7 +69,7 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, expl
return nil, err
}
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), explain)
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
}
func (q *RegexpQuery) Validate() error {
@@ -76,14 +78,14 @@ func (q *RegexpQuery) Validate() error {
func (q *RegexpQuery) compile() error {
if q.compiled == nil {
// require that pattern be anchored to start and end of term
// require that pattern NOT be anchored to start and end of term
actualRegexp := q.Regexp
if !strings.HasPrefix(actualRegexp, "^") {
actualRegexp = "^" + actualRegexp
}
if !strings.HasSuffix(actualRegexp, "$") {
actualRegexp = actualRegexp + "$"
if strings.HasPrefix(actualRegexp, "^") {
actualRegexp = actualRegexp[1:] // remove leading ^
}
// do not attempt to remove trailing $, it's presence is not
// known to interfere with LiteralPrefix() the way ^ does
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
var err error
q.compiled, err = regexp.Compile(actualRegexp)
if err != nil {

View File

@@ -40,7 +40,7 @@ func (q *TermQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *TermQuery) Boost() float64{
func (q *TermQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -48,14 +48,14 @@ func (q *TermQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermQuery) Field() string{
func (q *TermQuery) Field() string {
return q.FieldVal
}
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), explain)
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), options)
}

View File

@@ -0,0 +1,95 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type TermRangeQuery struct {
Min string `json:"min,omitempty"`
Max string `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermRangeQuery creates a new Query for ranges
// of text term values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *TermRangeQuery {
return NewTermRangeInclusiveQuery(min, max, nil, nil)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery {
return &TermRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *TermRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermRangeQuery) Field() string {
return q.FieldVal
}
func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
var minTerm []byte
if q.Min != "" {
minTerm = []byte(q.Min)
}
var maxTerm []byte
if q.Max != "" {
maxTerm = []byte(q.Max)
}
return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *TermRangeQuery) Validate() error {
if q.Min == "" && q.Min == q.Max {
return fmt.Errorf("term range query must specify min or max")
}
return nil
}

View File

@@ -66,7 +66,7 @@ func (q *WildcardQuery) SetBoost(b float64) {
q.BoostVal = &boost
}
func (q *WildcardQuery) Boost() float64{
func (q *WildcardQuery) Boost() float64 {
return q.BoostVal.Value()
}
@@ -74,11 +74,11 @@ func (q *WildcardQuery) SetField(f string) {
q.FieldVal = f
}
func (q *WildcardQuery) Field() string{
func (q *WildcardQuery) Field() string {
return q.FieldVal
}
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
@@ -91,7 +91,7 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, ex
}
}
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), explain)
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
}
func (q *WildcardQuery) Validate() error {
@@ -101,6 +101,6 @@ func (q *WildcardQuery) Validate() error {
}
func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
regexpString := "^" + wildcardRegexpReplacer.Replace(q.Wildcard) + "$"
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return regexp.Compile(regexpString)
}

View File

@@ -19,26 +19,26 @@ import (
)
type ConjunctionQueryScorer struct {
explain bool
options search.SearcherOptions
}
func NewConjunctionQueryScorer(explain bool) *ConjunctionQueryScorer {
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
explain: explain,
options: options,
}
}
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.explain {
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.explain {
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
@@ -47,7 +47,7 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
}
newScore := sum
var newExpl *search.Explanation
if s.explain {
if s.options.Explain {
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}

View File

@@ -24,15 +24,15 @@ import (
type ConstantScorer struct {
constant float64
boost float64
explain bool
options search.SearcherOptions
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func NewConstantScorer(constant float64, boost float64, explain bool) *ConstantScorer {
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
explain: explain,
options: options,
queryWeight: 1.0,
constant: constant,
boost: boost,
@@ -52,7 +52,7 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
// update the query weight
s.queryWeight = s.boost * s.queryNorm
if s.explain {
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: s.boost,
@@ -75,7 +75,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal
score := s.constant
if s.explain {
if s.options.Explain {
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("ConstantScore()"),
@@ -85,7 +85,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
@@ -100,7 +100,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = id
rv.Score = score
if s.explain {
if s.options.Explain {
rv.Expl = scoreExplanation
}

View File

@@ -21,26 +21,26 @@ import (
)
type DisjunctionQueryScorer struct {
explain bool
options search.SearcherOptions
}
func NewDisjunctionQueryScorer(explain bool) *DisjunctionQueryScorer {
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
explain: explain,
options: options,
}
}
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.explain {
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
var locations []search.FieldTermLocationMap
for i, docMatch := range constituents {
sum += docMatch.Score
if s.explain {
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
if docMatch.Locations != nil {
@@ -49,14 +49,14 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
}
var rawExpl *search.Explanation
if s.explain {
if s.options.Explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
coord := float64(countMatch) / float64(countTotal)
newScore := sum * coord
var newExpl *search.Explanation
if s.explain {
if s.options.Explain {
ce := make([]*search.Explanation, 2)
ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}

View File

@@ -23,20 +23,20 @@ import (
)
type TermQueryScorer struct {
queryTerm string
queryTerm []byte
queryField string
queryBoost float64
docTerm uint64
docTotal uint64
idf float64
explain bool
options search.SearcherOptions
idfExplanation *search.Explanation
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: queryTerm,
queryField: queryField,
@@ -44,11 +44,11 @@ func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain,
options: options,
queryWeight: 1.0,
}
if explain {
if options.Explain {
rv.idfExplanation = &search.Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
@@ -69,7 +69,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
// update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
if s.explain {
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost,
@@ -100,7 +100,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
}
score := tf * termMatch.Norm * s.idf
if s.explain {
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
@@ -121,7 +121,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
@@ -136,44 +136,46 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score
if s.explain {
if s.options.Explain {
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
locs := make([]search.Location, len(termMatch.Vectors))
locsUsed := 0
totalPositions := 0
for _, v := range termMatch.Vectors {
totalPositions += len(v.ArrayPositions)
}
positions := make(search.ArrayPositions, totalPositions)
positionsUsed := 0
rv.Locations = make(search.FieldTermLocationMap)
for _, v := range termMatch.Vectors {
tlm := rv.Locations[v.Field]
if tlm == nil {
tlm = make(search.TermLocationMap)
rv.Locations[v.Field] = tlm
}
loc := search.Location{
Pos: float64(v.Pos),
Start: float64(v.Start),
End: float64(v.End),
}
loc := &locs[locsUsed]
locsUsed++
loc.Pos = v.Pos
loc.Start = v.Start
loc.End = v.End
if len(v.ArrayPositions) > 0 {
loc.ArrayPositions = make([]float64, len(v.ArrayPositions))
loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
for i, ap := range v.ArrayPositions {
loc.ArrayPositions[i] = float64(ap)
loc.ArrayPositions[i] = ap
}
positionsUsed += len(v.ArrayPositions)
}
locations := tlm[s.queryTerm]
if locations == nil {
locations = make(search.Locations, 1)
locations[0] = &loc
} else {
locations = append(locations, &loc)
}
tlm[s.queryTerm] = locations
rv.Locations[v.Field] = tlm
tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
}
}
return rv

View File

@@ -21,27 +21,32 @@ import (
"github.com/blevesearch/bleve/index"
)
type Location struct {
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
ArrayPositions []float64 `json:"array_positions"`
}
type ArrayPositions []uint64
// SameArrayElement returns true if two locations are point to
// the same array element
func (l *Location) SameArrayElement(other *Location) bool {
if len(l.ArrayPositions) != len(other.ArrayPositions) {
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
if len(ap) != len(other) {
return false
}
for i, elem := range l.ArrayPositions {
if other.ArrayPositions[i] != elem {
for i := range ap {
if ap[i] != other[i] {
return false
}
}
return true
}
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
// Start and End are the byte offsets of the term in the field
Start uint64 `json:"start"`
End uint64 `json:"end"`
// ArrayPositions contains the positions of the term within any elements.
ArrayPositions ArrayPositions `json:"array_positions"`
}
type Locations []*Location
type TermLocationMap map[string]Locations
@@ -69,10 +74,6 @@ type DocumentMatch struct {
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// as we learn field terms, we can cache important ones for later use
// for example, sorting and building facets need these values
CachedFieldTerms index.FieldTerms `json:"-"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`
@@ -138,6 +139,11 @@ type Searcher interface {
DocumentMatchPoolSize() int
}
type SearcherOptions struct {
Explain bool
IncludeTermVectors bool
}
// SearchContext represents the context around a single search
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool

View File

@@ -38,14 +38,14 @@ type BooleanSearcher struct {
initialized bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) {
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorer.NewConjunctionQueryScorer(explain),
scorer: scorer.NewConjunctionQueryScorer(options),
matches: make([]*search.DocumentMatch, 2),
}
rv.computeQueryNorm()

View File

@@ -31,10 +31,10 @@ type ConjunctionSearcher struct {
maxIDIdx int
scorer *scorer.ConjunctionQueryScorer
initialized bool
explain bool
options search.SearcherOptions
}
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) {
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
@@ -45,10 +45,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
explain: explain,
options: options,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewConjunctionQueryScorer(explain),
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
return &rv, nil

View File

@@ -50,11 +50,22 @@ func tooManyClauses(count int) bool {
}
func tooManyClausesErr() error {
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount)
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
DisjunctionMaxClauseCount)
}
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
if tooManyClauses(len(qsearchers)) {
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
*DisjunctionSearcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options,
true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr()
}
// build the downstream searchers
@@ -70,7 +81,7 @@ func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
searchers: searchers,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(explain),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
@@ -161,7 +172,8 @@ func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
}
}
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@@ -199,7 +211,8 @@ func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
return rv, nil
}
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {

View File

@@ -28,13 +28,13 @@ type DocIDSearcher struct {
}
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
explain bool) (searcher *DocIDSearcher, err error) {
options search.SearcherOptions) (searcher *DocIDSearcher, err error) {
reader, err := indexReader.DocIDReaderOnly(ids)
if err != nil {
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, explain)
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &DocIDSearcher{
scorer: scorer,
reader: reader,

View File

@@ -0,0 +1,88 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
type FilteringSearcher struct {
child search.Searcher
accept FilterFunc
}
func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher {
return &FilteringSearcher{
child: s,
accept: filter,
}
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(next) {
return next, nil
}
next, err = f.child.Next(ctx)
}
return nil, err
}
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
adv, err := f.child.Advance(ctx, ID)
if err != nil {
return nil, err
}
if adv == nil {
return nil, nil
}
if f.accept(adv) {
return adv, nil
}
return f.Next(ctx)
}
func (f *FilteringSearcher) Close() error {
return f.child.Close()
}
func (f *FilteringSearcher) Weight() float64 {
return f.child.Weight()
}
func (f *FilteringSearcher) SetQueryNorm(n float64) {
f.child.SetQueryNorm(n)
}
func (f *FilteringSearcher) Count() uint64 {
return f.child.Count()
}
func (f *FilteringSearcher) Min() int {
return f.child.Min()
}
func (f *FilteringSearcher) DocumentMatchPoolSize() int {
return f.child.DocumentMatchPoolSize()
}

View File

@@ -19,17 +19,9 @@ import (
"github.com/blevesearch/bleve/search"
)
type FuzzySearcher struct {
indexReader index.IndexReader
term string
prefix int
fuzziness int
field string
explain bool
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
@@ -40,46 +32,18 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
}
}
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, field, prefixTerm)
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
qsearchersClose()
return nil, err
}
return &FuzzySearcher{
indexReader: indexReader,
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
explain: explain,
searcher: searcher,
}, nil
return NewMultiTermSearcher(indexReader, candidateTerms, field,
boost, options, true)
}
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzziness int, field, prefixTerm string) (rv []string, err error) {
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
@@ -108,36 +72,3 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzzine
return rv, err
}
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *FuzzySearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}
func (s *FuzzySearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

View File

@@ -0,0 +1,173 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
var onBoundarySearcher search.Searcher
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader,
notOnBoundaryTerms, field, boost, options, false)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
return NewMatchNoneSearcher(indexReader)
}
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
}
return nil, nil
}
func buildRectFilter(indexReader index.IndexReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
}
return false
}
}

View File

@@ -0,0 +1,117 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.RectFromPointDistance(centerLon, centerLat, dist)
if err != nil {
return nil, err
}
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options)
if err != nil {
return nil, err
}
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher,
buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
// if the desired box crosses the dateline, it is automatically split into
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
boxSearcher, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{leftSearcher, rightSearcher}, 0, options)
if err != nil {
_ = leftSearcher.Close()
_ = rightSearcher.Close()
return nil, err
}
return boxSearcher, nil
}
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
func buildDistFilter(indexReader index.IndexReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
dist := geo.Haversin(lon, lat, centerLon, centerLat)
if dist <= maxDist/1000 {
return true
}
}
return false
}
}

View File

@@ -27,7 +27,7 @@ type MatchAllSearcher struct {
count uint64
}
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) {
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReaderAll()
if err != nil {
return nil, err
@@ -37,7 +37,7 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain b
_ = reader.Close()
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, explain)
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,

View File

@@ -0,0 +1,85 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcherBytes(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherBytes(indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
}

View File

@@ -17,22 +17,16 @@ package searcher
import (
"bytes"
"math"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
type NumericRangeSearcher struct {
indexReader index.IndexReader
min *float64
max *float64
field string
explain bool
searcher *DisjunctionSearcher
}
func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, explain bool) (*NumericRangeSearcher, error) {
func NewNumericRangeSearcher(indexReader index.IndexReader,
min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
@@ -62,64 +56,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *f
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
var err error
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr()
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
func filterCandidateTerms(indexReader index.IndexReader,
terms [][]byte, field string) (rv [][]byte, err error) {
fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
if err != nil {
qsearchersClose()
return nil, err
}
return &NumericRangeSearcher{
indexReader: indexReader,
min: min,
max: max,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
// enumerate the terms and check against list of terms
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
termBytes := []byte(tfd.Term)
i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 {
rv = append(rv, terms[i])
}
terms = terms[i:]
tfd, err = fieldDict.Next()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *NumericRangeSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *NumericRangeSearcher) Close() error {
return s.searcher.Close()
return rv, err
}
type termRange struct {
@@ -190,7 +169,8 @@ func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
@@ -225,11 +205,3 @@ func newRangeBytes(minBytes, maxBytes []byte) *termRange {
endTerm: maxBytes,
}
}
func (s *NumericRangeSearcher) Min() int {
return 0
}
func (s *NumericRangeSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

View File

@@ -15,6 +15,7 @@
package searcher
import (
"fmt"
"math"
"github.com/blevesearch/bleve/index"
@@ -27,11 +28,72 @@ type PhraseSearcher struct {
queryNorm float64
currMust *search.DocumentMatch
slop int
terms []string
terms [][]string
initialized bool
}
func NewPhraseSearcher(indexReader index.IndexReader, mustSearcher *ConjunctionSearcher, terms []string) (*PhraseSearcher, error) {
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
for i, term := range terms {
mterms[i] = []string{term}
}
return NewMultiPhraseSearcher(indexReader, mterms, field, options)
}
func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
var termPositionSearchers []search.Searcher
for _, termPos := range terms {
if len(termPos) == 1 && termPos[0] != "" {
// single term
ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, ts)
} else if len(termPos) > 1 {
// multiple terms
var termSearchers []search.Searcher
for _, term := range termPos {
if term == "" {
continue
}
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
}
disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, disjunction)
}
}
mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
}
// build our searcher
rv := PhraseSearcher{
indexReader: indexReader,
@@ -96,68 +158,150 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
}
}
var rv *search.DocumentMatch
for s.currMust != nil {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
firstTerm := s.terms[0]
for field, termLocMap := range s.currMust.Locations {
rvtlm := make(search.TermLocationMap, 0)
locations, ok := termLocMap[firstTerm]
if ok {
OUTER:
for _, location := range locations {
crvtlm := make(search.TermLocationMap, 0)
INNER:
for i := 0; i < len(s.terms); i++ {
nextTerm := s.terms[i]
if nextTerm != "" {
// look through all these term locations
// to try and find the correct offsets
nextLocations, ok := termLocMap[nextTerm]
if ok {
for _, nextLocation := range nextLocations {
if nextLocation.Pos == location.Pos+float64(i) && nextLocation.SameArrayElement(location) {
// found a location match for this term
crvtlm.AddLocation(nextTerm, nextLocation)
continue INNER
}
}
// if we got here we didn't find a location match for this term
continue OUTER
} else {
continue OUTER
}
}
}
// if we got here all the terms matched
freq++
search.MergeTermLocationMaps(rvtlm, crvtlm)
rvftlm[field] = rvtlm
}
}
}
if freq > 0 {
// return match
rv = s.currMust
rv.Locations = rvftlm
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
return rv, nil
}
// check this match against phrase constraints
rv := s.checkCurrMustMatch(ctx)
// prepare for next iteration (either loop or subsequent call to Next())
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
// if match satisfied phrase constraints return it as a hit
if rv != nil {
return rv, nil
}
}
return nil, nil
}
// checkCurrMustMatch is soley concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher)
// also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
rvftlm := make(search.FieldTermLocationMap, 0)
freq := 0
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
for field, tlm := range s.currMust.Locations {
f, rvtlm := s.checkCurrMustMatchField(ctx, tlm)
if f > 0 {
freq += f
rvftlm[field] = rvtlm
}
}
if freq > 0 {
// return match
rv := s.currMust
rv.Locations = rvftlm
return rv
}
return nil
}
// checkCurrMustMatchField is soley concerned with determining if one particular
// field within the currMust DocumentMatch Locations satisfies the phase
// constraints (possibly more than once). if so, the number of times it was
// satisfied, and these locations are returned. otherwise 0 and either
// a nil or empty TermLocationMap
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
rv := make(search.TermLocationMap, len(s.terms))
for _, p := range paths {
p.MergeInto(rv)
}
return len(paths), rv
}
type phrasePart struct {
term string
loc *search.Location
}
type phrasePath []*phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
in[pp.term] = append(in[pp.term], pp.loc)
}
}
// findPhrasePaths is a function to identify phase matches from a set of known
// term locations. the implementation is recursive, so care must be taken
// with arguments and return values.
//
// prev - the previous location, nil on first invocation
// phraseTerms - slice containing the phrase terms themselves
// may contain empty string as placeholder (don't care)
// tlm - the Term Location Map containing all relevant term locations
// offset - the offset from the previous that this next term must match
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
//
// returns slice of paths, or nil if invocation did not find any successul paths
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
return []phrasePath{p}
}
car := phraseTerms[0]
cdr := phraseTerms[1:]
// empty term is treated as match (continue)
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
nextPos := prevPos + 1
if prevPos == 0 {
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
}
var rv []phrasePath
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
continue
}
// compute distance from previous phrase term
dist := 0
if prevPos != 0 {
dist = editDistance(prevPos+1, loc.Pos)
}
// if enough slop reamining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
// this location works, add it to the path (but not for empty term)
px := append(p, &phrasePart{term: carTerm, loc: loc})
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
}
}
}
return rv
}
func editDistance(p1, p2 uint64) int {
dist := int(p1 - p2)
if dist < 0 {
return -dist
}
return dist
}
func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)

View File

@@ -21,7 +21,14 @@ import (
"github.com/blevesearch/bleve/search"
)
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (search.Searcher, error) {
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
var candidateTerms []string
@@ -30,39 +37,19 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, fi
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm)
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
qsearchersClose()
return nil, err
}
return searcher, err
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true)
}
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
@@ -79,7 +66,8 @@ func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Reg
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if pattern.MatchString(tfd.Term) {
matchPos := pattern.FindStringIndex(tfd.Term)
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr()

View File

@@ -22,16 +22,13 @@ import (
type TermSearcher struct {
indexReader index.IndexReader
term string
field string
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
explain bool
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, true)
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
@@ -40,12 +37,27 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), explain)
scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
term: term,
field: field,
explain: explain,
reader: reader,
scorer: scorer,
}, nil

View File

@@ -19,93 +19,21 @@ import (
"github.com/blevesearch/bleve/search"
)
type TermPrefixSearcher struct {
indexReader index.IndexReader
prefix string
field string
explain bool
searcher *DisjunctionSearcher
}
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
var qsearcher *TermSearcher
qsearcher, err = NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
if err != nil {
qsearchersClose()
_ = fieldDict.Close()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
err = fieldDict.Close()
if err != nil {
qsearchersClose()
return nil, err
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil {
qsearchersClose()
return nil, err
}
return &TermPrefixSearcher{
indexReader: indexReader,
prefix: prefix,
field: field,
explain: explain,
searcher: searcher,
}, nil
}
func (s *TermPrefixSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *TermPrefixSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *TermPrefixSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *TermPrefixSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *TermPrefixSearcher) Close() error {
return s.searcher.Close()
}
func (s *TermPrefixSearcher) Min() int {
return 0
}
func (s *TermPrefixSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View File

@@ -0,0 +1,79 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewTermRangeSearcher(indexReader index.IndexReader,
min, max []byte, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
if min == nil {
min = []byte{}
}
rangeMax := max
if rangeMax != nil {
// the term dictionary range end has an unfortunate implementation
rangeMax = append(rangeMax, 0)
}
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictRange(field, min, rangeMax)
if err != nil {
return nil, err
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
// check again, as we might have removed only entry
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
}
// if our term list included the max, it would be the last item
if !*inclusiveMax && max != nil && string(max) == terms[len(terms)-1] {
terms = terms[:len(terms)-1]
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View File

@@ -17,9 +17,11 @@ package search
import (
"encoding/json"
"fmt"
"math"
"sort"
"strings"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/numeric"
)
@@ -27,12 +29,15 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
UpdateVisitor(field string, term []byte)
Value(a *DocumentMatch) string
Descending() bool
RequiresDocID() bool
RequiresScoring() bool
RequiresFields() []string
Copy() SearchSort
}
func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
@@ -50,6 +55,31 @@ func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
return &SortScore{
Desc: descending,
}, nil
case "geo_distance":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode geo_distance must specify field")
}
lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"])
if !foundLocation {
return nil, fmt.Errorf("unable to parse geo_distance location")
}
rvd := &SortGeoDistance{
Field: field,
Desc: descending,
Lon: lon,
Lat: lat,
unitMult: 1.0,
}
if distUnit, ok := input["unit"].(string); ok {
var err error
rvd.unitMult, err = geo.ParseDistanceUnit(distUnit)
if err != nil {
return nil, err
}
rvd.Unit = distUnit
}
return rvd, nil
case "field":
field, ok := input["field"].(string)
if !ok {
@@ -171,6 +201,20 @@ func (so SortOrder) Value(doc *DocumentMatch) {
}
}
func (so SortOrder) UpdateVisitor(field string, term []byte) {
for _, soi := range so {
soi.UpdateVisitor(field, term)
}
}
func (so SortOrder) Copy() SortOrder {
rv := make(SortOrder, len(so))
for i, soi := range so {
rv[i] = soi.Copy()
}
return rv
}
// Compare will compare two document matches using the specified sort order
// if both are numbers, we avoid converting back to term
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
@@ -300,13 +344,24 @@ type SortField struct {
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
values []string
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortField) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, string(term))
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortField for
// processing the next document
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := i.CachedFieldTerms[s.Field]
iTerms = s.filterTermsByType(iTerms)
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
s.values = nil
return iTerm
}
@@ -368,7 +423,7 @@ func (s *SortField) filterTermsByType(terms []string) []string {
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTerm(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero)
termsWithShiftZero = append(termsWithShiftZero, term)
}
}
terms = termsWithShiftZero
@@ -430,11 +485,23 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
return json.Marshal(sfm)
}
func (s *SortField) Copy() SearchSort {
var rv SortField
rv = *s
return &rv
}
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
}
// UpdateVisitor is a no-op for SortDocID as it's value
// is not dependent on any field terms
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
@@ -461,11 +528,23 @@ func (s *SortDocID) MarshalJSON() ([]byte, error) {
return json.Marshal("_id")
}
func (s *SortDocID) Copy() SearchSort {
var rv SortDocID
rv = *s
return &rv
}
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
}
// UpdateVisitor is a no-op for SortScore as it's value
// is not dependent on any field terms
func (s *SortScore) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"
@@ -491,3 +570,142 @@ func (s *SortScore) MarshalJSON() ([]byte, error) {
}
return json.Marshal("_score")
}
func (s *SortScore) Copy() SearchSort {
var rv SortScore
rv = *s
return &rv
}
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by
// their distance from the specified point.
func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
*SortGeoDistance, error) {
rv := &SortGeoDistance{
Field: field,
Desc: desc,
Unit: unit,
Lon: lon,
Lat: lat,
}
var err error
rv.unitMult, err = geo.ParseDistanceUnit(unit)
if err != nil {
return nil, err
}
return rv, nil
}
// SortGeoDistance will sort results by the distance of an
// indexed geo point, from the provided location.
// Field is the name of the field
// Descending reverse the sort order (default false)
type SortGeoDistance struct {
Field string
Desc bool
Unit string
values []string
Lon float64
Lat float64
unitMult float64
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, string(term))
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortField for
// processing the next document
func (s *SortGeoDistance) Value(i *DocumentMatch) string {
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
s.values = nil
if iTerm == "" {
return maxDistance
}
i64, err := numeric.PrefixCoded(iTerm).Int64()
if err != nil {
return maxDistance
}
docLon := geo.MortonUnhashLon(uint64(i64))
docLat := geo.MortonUnhashLat(uint64(i64))
dist := geo.Haversin(s.Lon, s.Lat, docLon, docLat)
// dist is returned in km, so convert to m
dist *= 1000
if s.unitMult != 0 {
dist /= s.unitMult
}
distInt64 := numeric.Float64ToInt64(dist)
return string(numeric.MustNewPrefixCodedInt64(distInt64, 0))
}
// Descending determines the order of the sort
func (s *SortGeoDistance) Descending() bool {
return s.Desc
}
func (s *SortGeoDistance) filterTermsByMode(terms []string) string {
if len(terms) >= 1 {
return terms[0]
}
return ""
}
// filterTermsByType attempts to make one pass on the terms
// return only valid prefix coded numbers with shift of 0
func (s *SortGeoDistance) filterTermsByType(terms []string) []string {
var termsWithShiftZero []string
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTerm(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
}
}
return termsWithShiftZero
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortGeoDistance) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortGeoDistance) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore requires the specified stored field
func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} }
func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
sfm := map[string]interface{}{
"by": "geo_distance",
"field": s.Field,
"location": map[string]interface{}{
"lon": s.Lon,
"lat": s.Lat,
},
}
if s.Unit != "" {
sfm["unit"] = s.Unit
}
if s.Desc {
sfm["desc"] = true
}
return json.Marshal(sfm)
}
func (s *SortGeoDistance) Copy() SearchSort {
var rv SortGeoDistance
rv = *s
return &rv
}

508
vendor/golang.org/x/text/unicode/norm/composition.go generated vendored Normal file
View File

@@ -0,0 +1,508 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
const (
maxNonStarters = 30
// The maximum number of characters needed for a buffer is
// maxNonStarters + 1 for the starter + 1 for the GCJ
maxBufferSize = maxNonStarters + 2
maxNFCExpansion = 3 // NFC(0x1D160)
maxNFKCExpansion = 18 // NFKC(0xFDFA)
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)
// ssState is used for reporting the segment state after inserting a rune.
// It is returned by streamSafe.next.
type ssState int
const (
// Indicates a rune was successfully added to the segment.
ssSuccess ssState = iota
// Indicates a rune starts a new segment and should not be added.
ssStarter
// Indicates a rune caused a segment overflow and a CGJ should be inserted.
ssOverflow
)
// streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8
// first inserts the first rune of a segment. It is a faster version of next if
// it is known p represents the first rune in a segment.
func (ss *streamSafe) first(p Properties) {
*ss = streamSafe(p.nTrailingNonStarters())
}
// insert returns a ssState value to indicate whether a rune represented by p
// can be inserted.
func (ss *streamSafe) next(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
n := p.nLeadingNonStarters()
if *ss += streamSafe(n); *ss > maxNonStarters {
*ss = 0
return ssOverflow
}
// The Stream-Safe Text Processing prescribes that the counting can stop
// as soon as a starter is encountered. However, there are some starters,
// like Jamo V and T, that can combine with other runes, leaving their
// successive non-starters appended to the previous, possibly causing an
// overflow. We will therefore consider any rune with a non-zero nLead to
// be a non-starter. Note that it always hold that if nLead > 0 then
// nLead == nTrail.
if n == 0 {
*ss = streamSafe(p.nTrailingNonStarters())
return ssStarter
}
return ssSuccess
}
// backwards is used for checking for overflow and segment starts
// when traversing a string backwards. Users do not need to call first
// for the first rune. The state of the streamSafe retains the count of
// the non-starters loaded.
func (ss *streamSafe) backwards(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
c := *ss + streamSafe(p.nTrailingNonStarters())
if c > maxNonStarters {
return ssOverflow
}
*ss = c
if p.nLeadingNonStarters() == 0 {
return ssStarter
}
return ssSuccess
}
func (ss streamSafe) isMax() bool {
return ss == maxNonStarters
}
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"
// reorderBuffer is used to normalize a single segment. Characters inserted with
// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters. Note that the byte buffer does not hold
// the UTF-8 characters in order. Only the rune array is maintained in sorted
// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]Properties // Per character info.
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
nbyte uint8 // Number or bytes.
ss streamSafe // For limiting length of non-starter sequence.
nrune int // Number of runeInfos.
f formInfo
src input
nsrc int
tmpBytes input
out []byte
flushF func(*reorderBuffer) bool
}
func (rb *reorderBuffer) init(f Form, src []byte) {
rb.f = *formTable[f]
rb.src.setBytes(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) initString(f Form, src string) {
rb.f = *formTable[f]
rb.src.setString(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
rb.out = out
rb.flushF = f
}
// reset discards all characters from the buffer.
func (rb *reorderBuffer) reset() {
rb.nrune = 0
rb.nbyte = 0
}
func (rb *reorderBuffer) doFlush() bool {
if rb.f.composing {
rb.compose()
}
res := rb.flushF(rb)
rb.reset()
return res
}
// appendFlush appends the normalized segment to rb.out.
func appendFlush(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
rb.out = append(rb.out, rb.byte[start:end]...)
}
return true
}
// flush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byte {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
out = append(out, rb.byte[start:end]...)
}
rb.reset()
return out
}
// flushCopy copies the normalized segment to buf and resets rb.
// It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) int {
p := 0
for i := 0; i < rb.nrune; i++ {
runep := rb.rune[i]
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
}
rb.reset()
return p
}
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
// It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info Properties) {
n := rb.nrune
b := rb.rune[:]
cc := info.ccc
if cc > 0 {
// Find insertion position + move elements to make room.
for ; n > 0; n-- {
if b[n-1].ccc <= cc {
break
}
b[n] = b[n-1]
}
}
rb.nrune += 1
pos := uint8(rb.nbyte)
rb.nbyte += utf8.UTFMax
info.pos = pos
b[n] = info
}
// insertErr is an error code returned by insert. Using this type instead
// of error improves performance up to 20% for many of the benchmarks.
type insertErr int
const (
iSuccess insertErr = -iota
iShortDst
iShortSrc
)
// insertFlush inserts the given rune in the buffer ordered by CCC.
// If a decomposition with multiple segments are encountered, they leading
// ones are flushed.
// It returns a non-zero error code if the rune was not inserted.
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
return iSuccess
}
if info.hasDecomposition() {
return rb.insertDecomposed(info.Decomposition())
}
rb.insertSingle(src, i, info)
return iSuccess
}
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
// It is assumed there is sufficient space to hold the runes. It is the
// responsibility of the caller to ensure this. This can be done by checking
// the state returned by the streamSafe type.
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
}
if info.hasDecomposition() {
// TODO: inline.
rb.insertDecomposed(info.Decomposition())
} else {
rb.insertSingle(src, i, info)
}
}
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
// It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
rb.tmpBytes.setBytes(dcomp)
// As the streamSafe accounting already handles the counting for modifiers,
// we don't have to call next. However, we do need to keep the accounting
// intact when flushing the buffer.
for i := 0; i < len(dcomp); {
info := rb.f.info(rb.tmpBytes, i)
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
return iShortDst
}
i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)])
rb.insertOrdered(info)
}
return iSuccess
}
// insertSingle inserts an entry in the reorderBuffer for the rune at
// position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size))
rb.insertOrdered(info)
}
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func (rb *reorderBuffer) insertCGJ() {
rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune) {
bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) rune {
inf := rb.rune[n]
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return r
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
// It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []byte {
inf := rb.rune[n]
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
}
// For Hangul we combine algorithmically, instead of using tables.
const (
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
hangulBase0 = 0xEA
hangulBase1 = 0xB0
hangulBase2 = 0x80
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
hangulEnd0 = 0xED
hangulEnd1 = 0x9E
hangulEnd2 = 0xA4
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
jamoLBase0 = 0xE1
jamoLBase1 = 0x84
jamoLEnd = 0x1113
jamoVBase = 0x1161
jamoVEnd = 0x1176
jamoTBase = 0x11A7
jamoTEnd = 0x11C3
jamoTCount = 28
jamoVCount = 21
jamoVTCount = 21 * 28
jamoLVTCount = 19 * 21 * 28
)
const hangulUTF8Size = 3
func isHangul(b []byte) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
func isHangulString(b string) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
// Caller must ensure len(b) >= 2.
func isJamoVT(b []byte) bool {
// True if (rune & 0xff00) == jamoLBase
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
}
func isHangulWithoutJamoT(b []byte) bool {
c, _ := utf8.DecodeRune(b)
c -= hangulBase
return c < jamoLVTCount && c%jamoTCount == 0
}
// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int {
const JamoUTF8Len = 3
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
if x != 0 {
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
return 3 * JamoUTF8Len
}
return 2 * JamoUTF8Len
}
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune) {
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
rb.appendRune(jamoLBase + r/jamoVCount)
rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
}
// combineHangul algorithmically combines Jamo character components into Hangul.
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
s = k - 1
}
if s != k-1 && cccB >= cccC {
// b[i] is blocked by greater-equal cccX below it
b[k] = b[i]
k++
} else {
l := rb.runeAt(s) // also used to compare to hangulBase
v := rb.runeAt(i) // also used to compare to jamoT
switch {
case jamoLBase <= l && l < jamoLEnd &&
jamoVBase <= v && v < jamoVEnd:
// 11xx plus 116x to LV
rb.assignRune(s, hangulBase+
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
case hangulBase <= l && l < hangulEnd &&
jamoTBase < v && v < jamoTEnd &&
((l-hangulBase)%jamoTCount) == 0:
// ACxx plus 11Ax to LVT
rb.assignRune(s, l+v-jamoTBase)
default:
b[k] = b[i]
k++
}
}
}
rb.nrune = k
}
// compose recombines the runes in the buffer.
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose() {
// UAX #15, section X5 , including Corrigendum #5
// "In any character sequence beginning with starter S, a character C is
// blocked from S if and only if there is some character B between S
// and C, and either B is a starter or it has the same or higher
// combining class as C."
bn := rb.nrune
if bn == 0 {
return
}
k := 1
b := rb.rune[:]
for s, i := 0, 1; i < bn; i++ {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
rb.combineHangul(s, i, k)
return
}
ii := b[i]
// We can only use combineForward as a filter if we later
// get the info for the combined character. This is more
// expensive than using the filter. Using combinesBackward()
// is safe.
if ii.combinesBackward() {
cccB := b[k-1].ccc
cccC := ii.ccc
blocked := false // b[i] blocked by starter or greater or equal CCC?
if cccB == 0 {
s = k - 1
} else {
blocked = s != k-1 && cccB >= cccC
}
if !blocked {
combined := combine(rb.runeAt(s), rb.runeAt(i))
if combined != 0 {
rb.assignRune(s, combined)
continue
}
}
}
b[k] = b[i]
k++
}
rb.nrune = k
}

259
vendor/golang.org/x/text/unicode/norm/forminfo.go generated vendored Normal file
View File

@@ -0,0 +1,259 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
// This file contains Form-specific logic and wrappers for data in tables.go.
// Rune info is stored in a separate trie per composing form. A composing form
// and its corresponding decomposing form share the same trie. Each trie maps
// a rune to a uint16. The values take two forms. For v >= 0x8000:
// bits
// 15: 1 (inverse of NFD_QC bit of qcInfo)
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
// 6..0: ccc (compressed CCC value).
// For v < 0x8000, the respective rune has a decomposition and v is an index
// into a byte array of UTF-8 decomposition sequences and additional info and
// has the form:
// <header> <decomp_byte>* [<tccc> [<lccc>]]
// The header contains the number of bytes in the decomposition (excluding this
// length byte). The two most significant bits of this length byte correspond
// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
// The byte sequence is followed by a trailing and leading CCC if the values
// for these are not zero. The value of v determines which ccc are appended
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
// there is an additional leading ccc. The value of tccc itself is the
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
// are the number of trailing non-starters.
const (
qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo
headerLenMask = 0x3F // extract the length value from the header byte
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
)
// Properties provides access to normalization properties of a rune.
type Properties struct {
pos uint8 // start position in reorderBuffer; used in composition.go
size uint8 // length of UTF-8 encoding of this rune
ccc uint8 // leading canonical combining class (ccc if not decomposition)
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
nLead uint8 // number of leading non-starters.
flags qcInfo // quick check flags
index uint16
}
// functions dispatchable per form
type lookupFunc func(b input, i int) Properties
// formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing, compatibility bool // form type
info lookupFunc
nextMain iterFunc
}
var formTable = []*formInfo{{
form: NFC,
composing: true,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextComposed,
}, {
form: NFD,
composing: false,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextDecomposed,
}, {
form: NFKC,
composing: true,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextComposed,
}, {
form: NFKD,
composing: false,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextDecomposed,
}}
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user. For example, in NFD, there is a boundary
// after 'a'. However, 'a' might combine with modifiers, so from the application's
// perspective it is not a good boundary. We will therefore always use the
// boundaries for the combining variants.
// BoundaryBefore returns true if this rune starts a new segment and
// cannot combine with any rune on the left.
func (p Properties) BoundaryBefore() bool {
if p.ccc == 0 && !p.combinesBackward() {
return true
}
// We assume that the CCC of the first character in a decomposition
// is always non-zero if different from info.ccc and that we can return
// false at this point. This is verified by maketables.
return false
}
// BoundaryAfter returns true if runes cannot combine with or otherwise
// interact with this or previous runes.
func (p Properties) BoundaryAfter() bool {
// TODO: loosen these conditions.
return p.isInert()
}
// We pack quick check data in 4 bits:
// 5: Combines forward (0 == false, 1 == true)
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..0: Number of trailing non-starters.
//
// When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization.
type qcInfo uint8
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 }
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 }
func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 }
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
func (p Properties) isInert() bool {
return p.flags&qcInfoMask == 0 && p.ccc == 0
}
func (p Properties) multiSegment() bool {
return p.index >= firstMulti && p.index < endMulti
}
func (p Properties) nLeadingNonStarters() uint8 {
return p.nLead
}
func (p Properties) nTrailingNonStarters() uint8 {
return uint8(p.flags & 0x03)
}
// Decomposition returns the decomposition for the underlying rune
// or nil if there is none.
func (p Properties) Decomposition() []byte {
// TODO: create the decomposition for Hangul?
if p.index == 0 {
return nil
}
i := p.index
n := decomps[i] & headerLenMask
i++
return decomps[i : i+uint16(n)]
}
// Size returns the length of UTF-8 encoding of the rune.
func (p Properties) Size() int {
return int(p.size)
}
// CCC returns the canonical combining class of the underlying rune.
func (p Properties) CCC() uint8 {
if p.index >= firstCCCZeroExcept {
return 0
}
return ccc[p.ccc]
}
// LeadCCC returns the CCC of the first rune in the decomposition.
// If there is no decomposition, LeadCCC equals CCC.
func (p Properties) LeadCCC() uint8 {
return ccc[p.ccc]
}
// TrailCCC returns the CCC of the last rune in the decomposition.
// If there is no decomposition, TrailCCC equals CCC.
func (p Properties) TrailCCC() uint8 {
return ccc[p.tccc]
}
// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
// result in a collision. In the unlikely event that changes to
// UnicodeData.txt introduce collisions, the compiler will catch it.
// Note that the recomposition map for NFC and NFKC are identical.
// combine returns the combined rune or 0 if it doesn't exist.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
return recompMap[key]
}
func lookupInfoNFC(b input, i int) Properties {
v, sz := b.charinfoNFC(i)
return compInfo(v, sz)
}
func lookupInfoNFKC(b input, i int) Properties {
v, sz := b.charinfoNFKC(i)
return compInfo(v, sz)
}
// Properties returns properties for the first rune in s.
func (f Form) Properties(s []byte) Properties {
if f == NFC || f == NFD {
return compInfo(nfcData.lookup(s))
}
return compInfo(nfkcData.lookup(s))
}
// PropertiesString returns properties for the first rune in s.
func (f Form) PropertiesString(s string) Properties {
if f == NFC || f == NFD {
return compInfo(nfcData.lookupString(s))
}
return compInfo(nfkcData.lookupString(s))
}
// compInfo converts the information contained in v and sz
// to a Properties. See the comment at the top of the file
// for more information on the format.
func compInfo(v uint16, sz int) Properties {
if v == 0 {
return Properties{size: uint8(sz)}
} else if v >= 0x8000 {
p := Properties{
size: uint8(sz),
ccc: uint8(v),
tccc: uint8(v),
flags: qcInfo(v >> 8),
}
if p.ccc > 0 || p.combinesBackward() {
p.nLead = uint8(p.flags & 0x3)
}
return p
}
// has decomposition
h := decomps[v]
f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4
p := Properties{size: uint8(sz), flags: f, index: v}
if v >= firstCCC {
v += uint16(h&headerLenMask) + 1
c := decomps[v]
p.tccc = c >> 2
p.flags |= qcInfo(c & 0x3)
if v >= firstLeadingCCC {
p.nLead = c & 0x3
if v >= firstStarterWithNLead {
// We were tricked. Remove the decomposition.
p.flags &= 0x03
p.index = 0
return p
}
p.ccc = decomps[v+1]
}
}
return p
}

109
vendor/golang.org/x/text/unicode/norm/input.go generated vendored Normal file
View File

@@ -0,0 +1,109 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
type input struct {
str string
bytes []byte
}
func inputBytes(str []byte) input {
return input{bytes: str}
}
func inputString(str string) input {
return input{str: str}
}
func (in *input) setBytes(str []byte) {
in.str = ""
in.bytes = str
}
func (in *input) setString(str string) {
in.str = str
in.bytes = nil
}
func (in *input) _byte(p int) byte {
if in.bytes == nil {
return in.str[p]
}
return in.bytes[p]
}
func (in *input) skipASCII(p, max int) int {
if in.bytes == nil {
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
}
} else {
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
}
}
return p
}
func (in *input) skipContinuationBytes(p int) int {
if in.bytes == nil {
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
}
} else {
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
}
}
return p
}
func (in *input) appendSlice(buf []byte, b, e int) []byte {
if in.bytes != nil {
return append(buf, in.bytes[b:e]...)
}
for i := b; i < e; i++ {
buf = append(buf, in.str[i])
}
return buf
}
func (in *input) copySlice(buf []byte, b, e int) int {
if in.bytes == nil {
return copy(buf, in.str[b:e])
}
return copy(buf, in.bytes[b:e])
}
func (in *input) charinfoNFC(p int) (uint16, int) {
if in.bytes == nil {
return nfcData.lookupString(in.str[p:])
}
return nfcData.lookup(in.bytes[p:])
}
func (in *input) charinfoNFKC(p int) (uint16, int) {
if in.bytes == nil {
return nfkcData.lookupString(in.str[p:])
}
return nfkcData.lookup(in.bytes[p:])
}
func (in *input) hangul(p int) (r rune) {
var size int
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
r, size = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
r, size = utf8.DecodeRune(in.bytes[p:])
}
if size != hangulUTF8Size {
return 0
}
return r
}

457
vendor/golang.org/x/text/unicode/norm/iter.go generated vendored Normal file
View File

@@ -0,0 +1,457 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"unicode/utf8"
)
// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
// sequence of starter and non-starter runes for the purpose of normalization.
const MaxSegmentSize = maxByteBufferSize
// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties // first character saved from previous iteration
next iterFunc // implementation of next depends on form
asciiF iterFunc
p int // current position in input source
multiSeg []byte // remainder of multi-segment decomposition
}
type iterFunc func(*Iter) []byte
// Init initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.init(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.initString(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the
// start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
case 0:
abs = offset
case 1:
abs = int64(i.p) + offset
case 2:
abs = int64(i.rb.nsrc) + offset
default:
return 0, fmt.Errorf("norm: invalid whence")
}
if abs < 0 {
return 0, fmt.Errorf("norm: negative position")
}
if int(abs) >= i.rb.nsrc {
i.setDone()
return int64(i.p), nil
}
i.p = int(abs)
i.multiSeg = nil
i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
return abs, nil
}
// returnSlice returns a slice of the underlying input type as a byte slice.
// If the underlying is of type []byte, it will simply return a slice.
// If the underlying is of type string, it will copy the slice to the buffer
// and return that.
func (i *Iter) returnSlice(a, b int) []byte {
if i.rb.src.bytes == nil {
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
}
return i.rb.src.bytes[a:b]
}
// Pos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() int {
return i.p
}
func (i *Iter) setDone() {
i.next = nextDone
i.p = i.rb.nsrc
}
// Done returns true if there is no more input to process.
func (i *Iter) Done() bool {
return i.p >= i.rb.nsrc
}
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
// For any input a and b for which f(a) == f(b), subsequent calls
// to Next will return the same segments.
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
// Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byte {
return i.next(i)
}
func nextASCIIBytes(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.setDone()
return i.rb.src.bytes[i.p:p]
}
if i.rb.src.bytes[p] < utf8.RuneSelf {
p0 := i.p
i.p = p
return i.rb.src.bytes[p0:p]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextASCIIString(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.buf[0] = i.rb.src.str[i.p]
i.setDone()
return i.buf[:1]
}
if i.rb.src.str[p] < utf8.RuneSelf {
i.buf[0] = i.rb.src.str[i.p]
i.p = p
return i.buf[:1]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextHangul(i *Iter) []byte {
p := i.p
next := p + hangulUTF8Size
if next >= i.rb.nsrc {
i.setDone()
} else if i.rb.src.hangul(next) == 0 {
i.rb.ss.next(i.info)
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
i.p = next
return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
}
func nextDone(i *Iter) []byte {
return nil
}
// nextMulti is used for iterating over multi-segment decompositions
// for decomposing normal forms.
func nextMulti(i *Iter) []byte {
j := 0
d := i.multiSeg
// skip first rune
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
}
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.multiSeg = d[j:]
return d[:j]
}
j += int(info.size)
}
// treat last segment as normal decomposition
i.next = i.rb.f.nextMain
return i.next(i)
}
// nextMultiNorm is used for iterating over multi-segment decompositions
// for composing normal forms.
func nextMultiNorm(i *Iter) []byte {
j := 0
d := i.multiSeg
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
i.rb.insertUnsafe(input{bytes: d}, j, info)
i.multiSeg = d[j+int(info.size):]
return seg
}
i.rb.insertUnsafe(input{bytes: d}, j, info)
j += int(info.size)
}
i.multiSeg = nil
i.next = nextComposed
return doNormComposed(i)
}
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter) (next []byte) {
outp := 0
inCopyStart, outCopyStart := i.p, 0
for {
if sz := int(i.info.size); sz <= 1 {
i.rb.ss = 0
p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc {
i.setDone()
return i.returnSlice(p, i.p)
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
return i.returnSlice(p, i.p)
}
outp++
} else if d := i.info.Decomposition(); d != nil {
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
// Case 1: there is a leftover to copy. In this case the decomposition
// must begin with a modifier and should always be appended.
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
p := outp + len(d)
if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
// TODO: this condition should not be possible, but we leave it
// in for defensive purposes.
if p > len(i.buf) {
return i.buf[:outp]
}
} else if i.info.multiSegment() {
// outp must be 0 as multi-segment decompositions always
// start a new segment.
if i.multiSeg == nil {
i.multiSeg = d
i.next = nextMulti
return nextMulti(i)
}
// We are in the last segment. Treat as normal decomposition.
d = i.multiSeg
i.multiSeg = nil
p = len(d)
}
prevCC := i.info.tccc
if i.p += sz; i.p >= i.rb.nsrc {
i.setDone()
i.info = Properties{} // Force BoundaryBefore to succeed.
} else {
i.info = i.rb.f.info(i.rb.src, i.p)
}
switch i.rb.ss.next(i.info) {
case ssOverflow:
i.next = nextCGJDecompose
fallthrough
case ssStarter:
if outp > 0 {
copy(i.buf[outp:], d)
return i.buf[:p]
}
return d
}
copy(i.buf[outp:], d)
outp = p
inCopyStart, outCopyStart = i.p, outp
if i.info.ccc < prevCC {
goto doNorm
}
continue
} else if r := i.rb.src.hangul(i.p); r != 0 {
outp = decomposeHangul(i.buf[:], r)
i.p += hangulUTF8Size
inCopyStart, outCopyStart = i.p, outp
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src.hangul(i.p) != 0 {
i.next = nextHangul
return i.buf[:outp]
}
} else {
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
}
if i.p >= i.rb.nsrc {
i.setDone()
break
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJDecompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
if outCopyStart == 0 {
return i.returnSlice(inCopyStart, i.p)
} else if inCopyStart < i.p {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
}
return i.buf[:outp]
doNorm:
// Insert what we have decomposed so far in the reorderBuffer.
// As we will only reorder, there will always be enough room.
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
i.rb.insertDecomposed(i.buf[0:outp])
return doNormDecomposed(i)
}
func doNormDecomposed(i *Iter) []byte {
for {
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.ccc == 0 {
break
}
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
}
// new segment or too many combining characters: exit normalization
return i.buf[:i.rb.flushCopy(i.buf[:])]
}
func nextCGJDecompose(i *Iter) []byte {
i.rb.ss = 0
i.rb.insertCGJ()
i.next = nextDecomposed
i.rb.ss.first(i.info)
buf := doNormDecomposed(i)
return buf
}
// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p
var prevCC uint8
for {
if !i.info.isYesC() {
goto doNorm
}
prevCC = i.info.tccc
sz := int(i.info.size)
if sz == 0 {
sz = 1 // illegal rune: copy byte-by-byte
}
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.rb.ss = 0
i.next = i.asciiF
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJCompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
return i.returnSlice(startp, i.p)
doNorm:
// reset to start position
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
if i.info.multiSegment() {
d := i.info.Decomposition()
info := i.rb.f.info(input{bytes: d}, 0)
i.rb.insertUnsafe(input{bytes: d}, 0, info)
i.multiSeg = d[int(info.size):]
i.next = nextMultiNorm
return nextMultiNorm(i)
}
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}
func doNormComposed(i *Iter) []byte {
// First rune should already be inserted.
for {
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if s := i.rb.ss.next(i.info); s == ssStarter {
break
} else if s == ssOverflow {
i.next = nextCGJCompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
}
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
return seg
}
func nextCGJCompose(i *Iter) []byte {
i.rb.ss = 0 // instead of first
i.rb.insertCGJ()
i.next = nextComposed
// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
// If we ever change that, insert a check here.
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}

976
vendor/golang.org/x/text/unicode/norm/maketables.go generated vendored Normal file
View File

@@ -0,0 +1,976 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Normalization table generator.
// Data read from the web.
// See forminfo.go for a description of the trie values associated with each rune.
package main
import (
"bytes"
"flag"
"fmt"
"io"
"log"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
)
func main() {
gen.Init()
loadUnicodeData()
compactCCC()
loadCompositionExclusions()
completeCharFields(FCanonical)
completeCharFields(FCompatibility)
computeNonStarterCounts()
verifyComputed()
printChars()
testDerived()
printTestdata()
makeTables()
}
var (
tablelist = flag.String("tables",
"all",
"comma-separated list of which tables to generate; "+
"can be 'decomp', 'recomp', 'info' and 'all'")
test = flag.Bool("test",
false,
"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
verbose = flag.Bool("verbose",
false,
"write data to stdout as it is parsed")
)
const MaxChar = 0x10FFFF // anything above this shouldn't exist
// Quick Check properties of runes allow us to quickly
// determine whether a rune may occur in a normal form.
// For a given normal form, a rune may be guaranteed to occur
// verbatim (QC=Yes), may or may not combine with another
// rune (QC=Maybe), or may not occur (QC=No).
type QCResult int
const (
QCUnknown QCResult = iota
QCYes
QCNo
QCMaybe
)
func (r QCResult) String() string {
switch r {
case QCYes:
return "Yes"
case QCNo:
return "No"
case QCMaybe:
return "Maybe"
}
return "***UNKNOWN***"
}
const (
FCanonical = iota // NFC or NFD
FCompatibility // NFKC or NFKD
FNumberOfFormTypes
)
const (
MComposed = iota // NFC or NFKC
MDecomposed // NFD or NFKD
MNumberOfModes
)
// This contains only the properties we're interested in.
type Char struct {
name string
codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
origCCC uint8
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
nTrailingNonStarters uint8
nLeadingNonStarters uint8 // must be equal to trailing if non-zero
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
state State
}
var chars = make([]Char, MaxChar+1)
var cccMap = make(map[uint8]uint8)
func (c Char) String() string {
buf := new(bytes.Buffer)
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
fmt.Fprintf(buf, " state: %v\n", c.state)
fmt.Fprintf(buf, " NFC:\n")
fmt.Fprint(buf, c.forms[FCanonical])
fmt.Fprintf(buf, " NFKC:\n")
fmt.Fprint(buf, c.forms[FCompatibility])
return buf.String()
}
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
// parseCharacter keeps a state variable indicating the weirdness.
type State int
const (
SNormal State = iota // known to be zero for the type
SFirst
SLast
SMissing
)
var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
}
type FormInfo struct {
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
combinesForward bool // May combine with rune on the right
combinesBackward bool // May combine with rune on the left
isOneWay bool // Never appears in result
inDecomp bool // Some decompositions result in this char.
decomp Decomposition
expandedDecomp Decomposition
}
func (f FormInfo) String() string {
buf := bytes.NewBuffer(make([]byte, 0))
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
return buf.String()
}
type Decomposition []rune
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
}
for _, d := range decomp {
point, err := strconv.ParseUint(d, 16, 64)
if err != nil {
return a, err
}
a = append(a, rune(point))
}
return a, nil
}
func loadUnicodeData() {
f := gen.OpenUCDFile("UnicodeData.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
r := p.Rune(ucd.CodePoint)
char := &chars[r]
char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
decmap := p.String(ucd.DecompMapping)
exp, err := parseDecomposition(decmap, false)
isCompat := false
if err != nil {
if len(decmap) > 0 {
exp, err = parseDecomposition(decmap, true)
if err != nil {
log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
}
isCompat = true
}
}
char.name = p.String(ucd.Name)
char.codePoint = r
char.forms[FCompatibility].decomp = exp
if !isCompat {
char.forms[FCanonical].decomp = exp
} else {
char.compatDecomp = true
}
if len(decmap) > 0 {
char.forms[FCompatibility].decomp = exp
}
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
}
// compactCCC converts the sparse set of CCC values to a continguous one,
// reducing the number of bits needed from 8 to 6.
func compactCCC() {
m := make(map[uint8]uint8)
for i := range chars {
c := &chars[i]
m[c.ccc] = 0
}
cccs := []int{}
for v, _ := range m {
cccs = append(cccs, int(v))
}
sort.Ints(cccs)
for i, c := range cccs {
cccMap[uint8(i)] = uint8(c)
m[uint8(c)] = uint8(i)
}
for i := range chars {
c := &chars[i]
c.origCCC = c.ccc
c.ccc = m[c.ccc]
}
if len(m) >= 1<<6 {
log.Fatalf("too many difference CCC values: %d >= 64", len(m))
}
}
// CompositionExclusions.txt has form:
// 0958 # ...
// See http://unicode.org/reports/tr44/ for full explanation
func loadCompositionExclusions() {
f := gen.OpenUCDFile("CompositionExclusions.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
c := &chars[p.Rune(0)]
if c.excludeInComp {
log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
}
c.excludeInComp = true
}
if e := p.Err(); e != nil {
log.Fatal(e)
}
}
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
func hasCompatDecomp(r rune) bool {
c := &chars[r]
if c.compatDecomp {
return true
}
for _, d := range c.forms[FCompatibility].decomp {
if hasCompatDecomp(d) {
return true
}
}
return false
}
// Hangul related constants.
const (
HangulBase = 0xAC00
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
JamoLBase = 0x1100
JamoLEnd = 0x1113
JamoVBase = 0x1161
JamoVEnd = 0x1176
JamoTBase = 0x11A8
JamoTEnd = 0x11C3
JamoLVTCount = 19 * 21 * 28
JamoTCount = 28
)
func isHangul(r rune) bool {
return HangulBase <= r && r < HangulEnd
}
func isHangulWithoutJamoT(r rune) bool {
if !isHangul(r) {
return false
}
r -= HangulBase
return r < JamoLVTCount && r%JamoTCount == 0
}
func ccc(r rune) uint8 {
return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
if ccc(b[n-1]) <= cc {
break
}
b[n] = b[n-1]
}
}
b[n] = r
return b
}
// Recursively decompose.
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
}
return d
}
func completeCharFields(form int) {
// Phase 0: pre-expand decomposition.
for i := range chars {
f := &chars[i].forms[form]
if len(f.decomp) == 0 {
continue
}
exp := make(Decomposition, 0)
for _, c := range f.decomp {
exp = decomposeRecursive(form, c, exp)
}
f.expandedDecomp = exp
}
// Phase 1: composition exclusion, mark decomposition.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
// Marks script-specific exclusions and version restricted.
f.isOneWay = c.excludeInComp
// Singletons
f.isOneWay = f.isOneWay || len(f.decomp) == 1
// Non-starter decompositions
if len(f.decomp) > 1 {
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
f.isOneWay = f.isOneWay || chk
}
// Runes that decompose into more than two runes.
f.isOneWay = f.isOneWay || len(f.decomp) > 2
if form == FCompatibility {
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
for _, r := range f.decomp {
chars[r].forms[form].inDecomp = true
}
}
// Phase 2: forward and backward combining.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
if !f.isOneWay && len(f.decomp) == 2 {
f0 := &chars[f.decomp[0]].forms[form]
f1 := &chars[f.decomp[1]].forms[form]
if !f0.isOneWay {
f0.combinesForward = true
}
if !f1.isOneWay {
f1.combinesBackward = true
}
}
if isHangulWithoutJamoT(rune(i)) {
f.combinesForward = true
}
}
// Phase 3: quick check values.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
}
switch {
case f.isOneWay:
f.quickCheck[MComposed] = QCNo
case (i & 0xffff00) == JamoLBase:
f.quickCheck[MComposed] = QCYes
if JamoLBase <= i && i < JamoLEnd {
f.combinesForward = true
}
if JamoVBase <= i && i < JamoVEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
f.combinesForward = true
}
if JamoTBase <= i && i < JamoTEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
}
case !f.combinesBackward:
f.quickCheck[MComposed] = QCYes
default:
f.quickCheck[MComposed] = QCMaybe
}
}
}
func computeNonStarterCounts() {
// Phase 4: leading and trailing non-starter count
for i := range chars {
c := &chars[i]
runes := []rune{rune(i)}
// We always use FCompatibility so that the CGJ insertion points do not
// change for repeated normalizations with different forms.
if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
runes = exp
}
// We consider runes that combine backwards to be non-starters for the
// purpose of Stream-Safe Text Processing.
for _, r := range runes {
if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
break
}
c.nLeadingNonStarters++
}
for i := len(runes) - 1; i >= 0; i-- {
if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
break
}
c.nTrailingNonStarters++
}
if c.nTrailingNonStarters > 3 {
log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
}
if isHangul(rune(i)) {
c.nTrailingNonStarters = 2
if isHangulWithoutJamoT(rune(i)) {
c.nTrailingNonStarters = 1
}
}
if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
}
if t := c.nTrailingNonStarters; t > 3 {
log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
}
}
}
func printBytes(w io.Writer, b []byte, name string) {
fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
fmt.Fprintf(w, "var %s = [...]byte {", name)
for i, c := range b {
switch {
case i%64 == 0:
fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
case i%8 == 0:
fmt.Fprintf(w, "\n")
}
fmt.Fprintf(w, "0x%.2X, ", c)
}
fmt.Fprint(w, "\n}\n\n")
}
// See forminfo.go for format.
func makeEntry(f *FormInfo, c *Char) uint16 {
e := uint16(0)
if r := c.codePoint; HangulBase <= r && r < HangulEnd {
e |= 0x40
}
if f.combinesForward {
e |= 0x20
}
if f.quickCheck[MDecomposed] == QCNo {
e |= 0x4
}
switch f.quickCheck[MComposed] {
case QCYes:
case QCNo:
e |= 0x10
case QCMaybe:
e |= 0x18
default:
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
}
e |= uint16(c.nTrailingNonStarters)
return e
}
// decompSet keeps track of unique decompositions, grouped by whether
// the decomposition is followed by a trailing and/or leading CCC.
type decompSet [7]map[string]bool
const (
normalDecomp = iota
firstMulti
firstCCC
endMulti
firstLeadingCCC
firstCCCZeroExcept
firstStarterWithNLead
lastDecomp
)
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
func makeDecompSet() decompSet {
m := decompSet{}
for i := range m {
m[i] = make(map[string]bool)
}
return m
}
func (m *decompSet) insert(key int, s string) {
m[key][s] = true
}
func printCharInfoTables(w io.Writer) int {
mkstr := func(r rune, f *FormInfo) (int, string) {
d := f.expandedDecomp
s := string([]rune(d))
if max := 1 << 6; len(s) >= max {
const msg = "%U: too many bytes in decomposition: %d >= %d"
log.Fatalf(msg, r, len(s), max)
}
head := uint8(len(s))
if f.quickCheck[MComposed] != QCYes {
head |= 0x40
}
if f.combinesForward {
head |= 0x80
}
s = string([]byte{head}) + s
lccc := ccc(d[0])
tccc := ccc(d[len(d)-1])
cc := ccc(r)
if cc != 0 && lccc == 0 && tccc == 0 {
log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
}
if tccc < lccc && lccc != 0 {
const msg = "%U: lccc (%d) must be <= tcc (%d)"
log.Fatalf(msg, r, lccc, tccc)
}
index := normalDecomp
nTrail := chars[r].nTrailingNonStarters
nLead := chars[r].nLeadingNonStarters
if tccc > 0 || lccc > 0 || nTrail > 0 {
tccc <<= 2
tccc |= nTrail
s += string([]byte{tccc})
index = endMulti
for _, r := range d[1:] {
if ccc(r) == 0 {
index = firstCCC
}
}
if lccc > 0 || nLead > 0 {
s += string([]byte{lccc})
if index == firstCCC {
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
}
index = firstLeadingCCC
}
if cc != lccc {
if cc != 0 {
log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
}
index = firstCCCZeroExcept
}
} else if len(d) > 1 {
index = firstMulti
}
return index, s
}
decompSet := makeDecompSet()
const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
decompSet.insert(firstStarterWithNLead, nLeadStr)
// Store the uniqued decompositions in a byte buffer,
// preceded by their byte length.
for _, c := range chars {
for _, f := range c.forms {
if len(f.expandedDecomp) == 0 {
continue
}
if f.combinesBackward {
log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
}
index, s := mkstr(c.codePoint, &f)
decompSet.insert(index, s)
}
}
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
size := 0
positionMap := make(map[string]uint16)
decompositions.WriteString("\000")
fmt.Fprintln(w, "const (")
for i, m := range decompSet {
sa := []string{}
for s := range m {
sa = append(sa, s)
}
sort.Strings(sa)
for _, s := range sa {
p := decompositions.Len()
decompositions.WriteString(s)
positionMap[s] = uint16(p)
}
if cname[i] != "" {
fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
}
}
fmt.Fprintln(w, "maxDecomp = 0x8000")
fmt.Fprintln(w, ")")
b := decompositions.Bytes()
printBytes(w, b, "decomps")
size += len(b)
varnames := []string{"nfc", "nfkc"}
for i := 0; i < FNumberOfFormTypes; i++ {
trie := triegen.NewTrie(varnames[i])
for r, c := range chars {
f := c.forms[i]
d := f.expandedDecomp
if len(d) != 0 {
_, key := mkstr(c.codePoint, &f)
trie.Insert(rune(r), uint64(positionMap[key]))
if c.ccc != ccc(d[0]) {
// We assume the lead ccc of a decomposition !=0 in this case.
if ccc(d[0]) == 0 {
log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
}
}
} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
// Handle cases where it can't be detected that the nLead should be equal
// to nTrail.
trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
trie.Insert(c.codePoint, uint64(0x8000|v))
}
}
sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
if err != nil {
log.Fatal(err)
}
size += sz
}
return size
}
func contains(sa []string, s string) bool {
for _, a := range sa {
if a == s {
return true
}
}
return false
}
func makeTables() {
w := &bytes.Buffer{}
size := 0
if *tablelist == "" {
return
}
list := strings.Split(*tablelist, ",")
if *tablelist == "all" {
list = []string{"recomp", "info"}
}
// Compute maximum decomposition size.
max := 0
for _, c := range chars {
if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
max = n
}
}
fmt.Fprintln(w, "const (")
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
fmt.Fprintln(w)
fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
fmt.Fprintln(w, ")\n")
// Print the CCC remap table.
size += len(cccMap)
fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
for i := 0; i < len(cccMap); i++ {
if i%8 == 0 {
fmt.Fprintln(w)
}
fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
}
fmt.Fprintln(w, "\n}\n")
if contains(list, "info") {
size += printCharInfoTables(w)
}
if contains(list, "recomp") {
// Note that we use 32 bit keys, instead of 64 bit.
// This clips the bits of three entries, but we know
// this won't cause a collision. The compiler will catch
// any changes made to UnicodeData.txt that introduces
// a collision.
// Note that the recomposition map for NFC and NFKC
// are identical.
// Recomposition map
nrentries := 0
for _, c := range chars {
f := c.forms[FCanonical]
if !f.isOneWay && len(f.decomp) > 0 {
nrentries++
}
}
sz := nrentries * 8
size += sz
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
for i, c := range chars {
f := c.forms[FCanonical]
d := f.decomp
if !f.isOneWay && len(d) > 0 {
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
}
}
fmt.Fprintf(w, "}\n\n")
}
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
gen.WriteGoFile("tables.go", "norm", w.Bytes())
}
func printChars() {
if *verbose {
for _, c := range chars {
if !c.isValid() || c.state == SMissing {
continue
}
fmt.Println(c)
}
}
}
// verifyComputed does various consistency tests.
func verifyComputed() {
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
}
isMaybe := f.quickCheck[MComposed] == QCMaybe
if f.combinesBackward != isMaybe {
log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
}
if len(f.decomp) > 0 && f.combinesForward && isMaybe {
log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
}
if len(f.expandedDecomp) != 0 {
continue
}
if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
// We accept these runes to be treated differently (it only affects
// segment breaking in iteration, most likely on improper use), but
// reconsider if more characters are added.
// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
log.Fatalf("%U: nLead was %v; want %v", i, a, b)
}
}
}
nfc := c.forms[FCanonical]
nfkc := c.forms[FCompatibility]
if nfc.combinesBackward != nfkc.combinesBackward {
log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
}
}
}
// Use values in DerivedNormalizationProps.txt to compare against the
// values we computed.
// DerivedNormalizationProps.txt has form:
// 00C0..00C5 ; NFD_QC; N # ...
// 0374 ; NFD_QC; N # ...
// See http://unicode.org/reports/tr44/ for full explanation
func testDerived() {
f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
defer f.Close()
p := ucd.New(f)
for p.Next() {
r := p.Rune(0)
c := &chars[r]
var ftype, mode int
qt := p.String(1)
switch qt {
case "NFC_QC":
ftype, mode = FCanonical, MComposed
case "NFD_QC":
ftype, mode = FCanonical, MDecomposed
case "NFKC_QC":
ftype, mode = FCompatibility, MComposed
case "NFKD_QC":
ftype, mode = FCompatibility, MDecomposed
default:
continue
}
var qr QCResult
switch p.String(2) {
case "Y":
qr = QCYes
case "N":
qr = QCNo
case "M":
qr = QCMaybe
default:
log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
}
if got := c.forms[ftype].quickCheck[mode]; got != qr {
log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
}
c.forms[ftype].verified[mode] = true
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
// Any unspecified value must be QCYes. Verify this.
for i, c := range chars {
for j, fd := range c.forms {
for k, qr := range fd.quickCheck {
if !fd.verified[k] && qr != QCYes {
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
log.Printf(m, i, j, k, qr, c.name)
}
}
}
}
}
var testHeader = `const (
Yes = iota
No
Maybe
)
type formData struct {
qc uint8
combinesForward bool
decomposition string
}
type runeData struct {
r rune
ccc uint8
nLead uint8
nTrail uint8
f [2]formData // 0: canonical; 1: compatibility
}
func f(qc uint8, cf bool, dec string) [2]formData {
return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
}
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
}
var testData = []runeData{
`
func printTestdata() {
type lastInfo struct {
ccc uint8
nLead uint8
nTrail uint8
f string
}
last := lastInfo{}
w := &bytes.Buffer{}
fmt.Fprintf(w, testHeader)
for r, c := range chars {
f := c.forms[FCanonical]
qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
f = c.forms[FCompatibility]
qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
s := ""
if d == dk && qc == qck && cf == cfk {
s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
} else {
s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
}
current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
if last != current {
fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
last = current
}
}
fmt.Fprintln(w, "}")
gen.WriteGoFile("data_test.go", "norm", w.Bytes())
}

609
vendor/golang.org/x/text/unicode/norm/normalize.go generated vendored Normal file
View File

@@ -0,0 +1,609 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Note: the file data_test.go that is generated should not be checked in.
//go:generate go run maketables.go triegen.go
//go:generate go test -tags test
// Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
//
// NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD
//
// For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: http://unicode.org/reports/tr15/ and
// http://unicode.org/notes/tn5/.
type Form int
const (
NFC Form = iota
NFD
NFKC
NFKD
)
// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
src := inputBytes(b)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return b
}
out := make([]byte, n, len(b))
copy(out, b[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
return doAppendInner(&rb, n)
}
// String returns f(s).
func (f Form) String(s string) string {
src := inputString(s)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return s
}
out := make([]byte, n, len(s))
copy(out, s[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
return string(doAppendInner(&rb, n))
}
// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
src := inputBytes(b)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
rb.setFlusher(nil, cmpNormalBytes)
for bp < len(b) {
rb.out = b[bp:]
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
}
return true
}
func cmpNormalBytes(rb *reorderBuffer) bool {
b := rb.out
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if int(info.size) > len(b) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if b[0] != rb.byte[p] {
return false
}
b = b[1:]
}
}
return true
}
// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
src := inputString(s)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
rb.setFlusher(nil, func(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if bp+int(info.size) > len(s) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if s[bp] != rb.byte[p] {
return false
}
bp++
}
}
return true
})
for bp < len(s) {
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
}
return true
}
// patchTail fixes a case where a rune may be incorrectly normalized
// if it is followed by illegal continuation bytes. It returns the
// patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) bool {
info, p := lastRuneStart(&rb.f, rb.out)
if p == -1 || info.size == 0 {
return true
}
end := p + int(info.size)
extra := len(rb.out) - end
if extra > 0 {
// Potentially allocating memory. However, this only
// happens with ill-formed UTF-8.
x := make([]byte, 0)
x = append(x, rb.out[len(rb.out)-extra:]...)
rb.out = rb.out[:end]
decomposeToLastBoundary(rb)
rb.doFlush()
rb.out = append(rb.out, x...)
return false
}
buf := rb.out[p:]
rb.out = rb.out[:p]
decomposeToLastBoundary(rb)
if s := rb.ss.next(info); s == ssStarter {
rb.doFlush()
rb.ss.first(info)
} else if s == ssOverflow {
rb.doFlush()
rb.insertCGJ()
rb.ss = 0
}
rb.insertUnsafe(inputBytes(buf), 0, info)
return true
}
func appendQuick(rb *reorderBuffer, i int) int {
if rb.nsrc == i {
return i
}
end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
rb.out = rb.src.appendSlice(rb.out, i, end)
return end
}
// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
return f.doAppend(out, inputBytes(src), len(src))
}
func (f Form) doAppend(out []byte, src input, n int) []byte {
if n == 0 {
return out
}
ft := formTable[f]
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
if len(out) == 0 {
p, _ := ft.quickSpan(src, 0, n, true)
out = src.appendSlice(out, 0, p)
if p == n {
return out
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
return doAppendInner(&rb, p)
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
return doAppend(&rb, out, 0)
}
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
rb.setFlusher(out, appendFlush)
src, n := rb.src, rb.nsrc
doMerge := len(out) > 0
if q := src.skipContinuationBytes(p); q > p {
// Move leading non-starters to destination.
rb.out = src.appendSlice(rb.out, p, q)
p = q
doMerge = patchTail(rb)
}
fd := &rb.f
if doMerge {
var info Properties
if p < n {
info = fd.info(src, p)
if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
if p == 0 {
decomposeToLastBoundary(rb)
}
p = decomposeSegment(rb, p, true)
}
}
if info.size == 0 {
rb.doFlush()
// Append incomplete UTF-8 encoding.
return src.appendSlice(rb.out, p, n)
}
if rb.nrune > 0 {
return doAppendInner(rb, p)
}
}
p = appendQuick(rb, p)
return doAppendInner(rb, p)
}
func doAppendInner(rb *reorderBuffer, p int) []byte {
for n := rb.nsrc; p < n; {
p = decomposeSegment(rb, p, true)
p = appendQuick(rb, p)
}
return rb.out
}
// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
return f.doAppend(out, inputString(src), len(src))
}
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int {
n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
return n
}
// Span implements transform.SpanningTransformer. It returns a boundary n such
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
if n < len(b) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
if n < len(s) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
// non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
var lastCC uint8
ss := streamSafe(0)
lastSegStart := i
for n = end; i < n; {
if j := src.skipASCII(i, n); i != j {
i = j
lastSegStart = i - 1
lastCC = 0
ss = 0
continue
}
info := f.info(src, i)
if info.size == 0 {
if atEOF {
// include incomplete runes
return n, true
}
return lastSegStart, true
}
// This block needs to be before the next, because it is possible to
// have an overflow for runes that are starters (e.g. with U+FF9E).
switch ss.next(info) {
case ssStarter:
lastSegStart = i
case ssOverflow:
return lastSegStart, false
case ssSuccess:
if lastCC > info.ccc {
return lastSegStart, false
}
}
if f.composing {
if !info.isYesC() {
break
}
} else {
if !info.isYesD() {
break
}
}
lastCC = info.ccc
i += int(info.size)
}
if i == n {
if !atEOF {
n = lastSegStart
}
return n, true
}
return lastSegStart, false
}
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
return n
}
// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
return f.firstBoundary(inputBytes(b), len(b))
}
func (f Form) firstBoundary(src input, nsrc int) int {
i := src.skipContinuationBytes(0)
if i >= nsrc {
return -1
}
fd := formTable[f]
ss := streamSafe(0)
// We should call ss.first here, but we can't as the first rune is
// skipped already. This means FirstBoundary can't really determine
// CGJ insertion points correctly. Luckily it doesn't have to.
for {
info := fd.info(src, i)
if info.size == 0 {
return -1
}
if s := ss.next(info); s != ssSuccess {
return i
}
i += int(info.size)
if i >= nsrc {
if !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
}
}
// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
return f.firstBoundary(inputString(s), len(s))
}
// NextBoundary reports the index of the boundary between the first and next
// segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) int {
return f.nextBoundary(inputBytes(b), len(b), atEOF)
}
// NextBoundaryInString reports the index of the boundary between the first and
// next segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
return f.nextBoundary(inputString(s), len(s), atEOF)
}
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
if nsrc == 0 {
if atEOF {
return 0
}
return -1
}
fd := formTable[f]
info := fd.info(src, 0)
if info.size == 0 {
if atEOF {
return 1
}
return -1
}
ss := streamSafe(0)
ss.first(info)
for i := int(info.size); i < nsrc; i += int(info.size) {
info = fd.info(src, i)
if info.size == 0 {
if atEOF {
return i
}
return -1
}
// TODO: Using streamSafe to determine the boundary isn't the same as
// using BoundaryBefore. Determine which should be used.
if s := ss.next(info); s != ssSuccess {
return i
}
}
if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {
return lastBoundary(formTable[f], b)
}
func lastBoundary(fd *formInfo, b []byte) int {
i := len(b)
info, p := lastRuneStart(fd, b)
if p == -1 {
return -1
}
if info.size == 0 { // ends with incomplete rune
if p == 0 { // starts with incomplete rune
return -1
}
i = p
info, p = lastRuneStart(fd, b[:i])
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
return i
}
}
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
return i
}
if info.BoundaryAfter() {
return i
}
ss := streamSafe(0)
v := ss.backwards(info)
for i = p; i >= 0 && v != ssStarter; i = p {
info, p = lastRuneStart(fd, b[:i])
if v = ss.backwards(info); v == ssOverflow {
break
}
if p+int(info.size) != i {
if p == -1 { // no boundary found
return -1
}
return i // boundary after an illegal UTF-8 encoding
}
}
return i
}
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
// Force one character to be consumed.
info := rb.f.info(rb.src, sp)
if info.size == 0 {
return 0
}
if s := rb.ss.next(info); s == ssStarter {
// TODO: this could be removed if we don't support merging.
if rb.nrune > 0 {
goto end
}
} else if s == ssOverflow {
rb.insertCGJ()
goto end
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
for {
sp += int(info.size)
if sp >= rb.nsrc {
if !atEOF && !info.BoundaryAfter() {
return int(iShortSrc)
}
break
}
info = rb.f.info(rb.src, sp)
if info.size == 0 {
if !atEOF {
return int(iShortSrc)
}
break
}
if s := rb.ss.next(info); s == ssStarter {
break
} else if s == ssOverflow {
rb.insertCGJ()
break
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
}
end:
if !rb.doFlush() {
return int(iShortDst)
}
return sp
}
// lastRuneStart returns the runeInfo and position of the last
// rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
p := len(buf) - 1
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
}
if p < 0 {
return Properties{}, -1
}
return fd.info(inputBytes(buf), p), p
}
// decomposeToLastBoundary finds an open segment at the end of the buffer
// and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer) {
fd := &rb.f
info, i := lastRuneStart(fd, rb.out)
if int(info.size) != len(rb.out)-i {
// illegal trailing continuation bytes
return
}
if info.BoundaryAfter() {
return
}
var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
padd := 0
ss := streamSafe(0)
p := len(rb.out)
for {
add[padd] = info
v := ss.backwards(info)
if v == ssOverflow {
// Note that if we have an overflow, it the string we are appending to
// is not correctly normalized. In this case the behavior is undefined.
break
}
padd++
p -= int(info.size)
if v == ssStarter || p < 0 {
break
}
info, i = lastRuneStart(fd, rb.out[:p])
if int(info.size) != p-i {
break
}
}
rb.ss = ss
// Copy bytes for insertion as we may need to overwrite rb.out.
var buf [maxBufferSize * utf8.UTFMax]byte
cp := buf[:copy(buf[:], rb.out[p:])]
rb.out = rb.out[:p]
for padd--; padd >= 0; padd-- {
info = add[padd]
rb.insertUnsafe(inputBytes(cp), 0, info)
cp = cp[info.size:]
}
}

Some files were not shown because too many files have changed in this diff Show More