mirror of
https://github.com/go-gitea/gitea
synced 2024-09-27 14:54:05 +00:00
12a1f914f4
* update github.com/alecthomas/chroma v0.8.0 -> v0.8.1 * github.com/blevesearch/bleve v1.0.10 -> v1.0.12 * editorconfig-core-go v2.1.1 -> v2.3.7 * github.com/gliderlabs/ssh v0.2.2 -> v0.3.1 * migrate editorconfig.ParseBytes to Parse * github.com/shurcooL/vfsgen to 0d455de96546 * github.com/go-git/go-git/v5 v5.1.0 -> v5.2.0 * github.com/google/uuid v1.1.1 -> v1.1.2 * github.com/huandu/xstrings v1.3.0 -> v1.3.2 * github.com/klauspost/compress v1.10.11 -> v1.11.1 * github.com/markbates/goth v1.61.2 -> v1.65.0 * github.com/mattn/go-sqlite3 v1.14.0 -> v1.14.4 * github.com/mholt/archiver v3.3.0 -> v3.3.2 * github.com/microcosm-cc/bluemonday 4f7140c49acb -> v1.0.4 * github.com/minio/minio-go v7.0.4 -> v7.0.5 * github.com/olivere/elastic v7.0.9 -> v7.0.20 * github.com/urfave/cli v1.20.0 -> v1.22.4 * github.com/prometheus/client_golang v1.1.0 -> v1.8.0 * github.com/xanzy/go-gitlab v0.37.0 -> v0.38.1 * mvdan.cc/xurls v2.1.0 -> v2.2.0 Co-authored-by: Lauris BH <lauris@nix.lv>
415 lines
12 KiB
Go
Vendored
415 lines
12 KiB
Go
Vendored
// Copyright 2012-present Oliver Eilhard. All rights reserved.
|
|
// Use of this source code is governed by a MIT-license.
|
|
// See http://olivere.mit-license.org/license.txt for details.
|
|
|
|
package elastic
|
|
|
|
import "errors"
|
|
|
|
// MoreLikeThis query (MLT Query) finds documents that are "like" a given
|
|
// set of documents. In order to do so, MLT selects a set of representative
|
|
// terms of these input documents, forms a query using these terms, executes
|
|
// the query and returns the results. The user controls the input documents,
|
|
// how the terms should be selected and how the query is formed.
|
|
//
|
|
// For more details, see
|
|
// https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-mlt-query.html
|
|
type MoreLikeThisQuery struct {
|
|
fields []string
|
|
docs []*MoreLikeThisQueryItem
|
|
unlikeDocs []*MoreLikeThisQueryItem
|
|
include *bool
|
|
minimumShouldMatch string
|
|
minTermFreq *int
|
|
maxQueryTerms *int
|
|
stopWords []string
|
|
minDocFreq *int
|
|
maxDocFreq *int
|
|
minWordLength *int
|
|
maxWordLength *int
|
|
boostTerms *float64
|
|
boost *float64
|
|
analyzer string
|
|
failOnUnsupportedField *bool
|
|
queryName string
|
|
}
|
|
|
|
// NewMoreLikeThisQuery creates and initializes a new MoreLikeThisQuery.
|
|
func NewMoreLikeThisQuery() *MoreLikeThisQuery {
|
|
return &MoreLikeThisQuery{
|
|
fields: make([]string, 0),
|
|
stopWords: make([]string, 0),
|
|
docs: make([]*MoreLikeThisQueryItem, 0),
|
|
unlikeDocs: make([]*MoreLikeThisQueryItem, 0),
|
|
}
|
|
}
|
|
|
|
// Field adds one or more field names to the query.
|
|
func (q *MoreLikeThisQuery) Field(fields ...string) *MoreLikeThisQuery {
|
|
q.fields = append(q.fields, fields...)
|
|
return q
|
|
}
|
|
|
|
// StopWord sets the stopwords. Any word in this set is considered
|
|
// "uninteresting" and ignored. Even if your Analyzer allows stopwords,
|
|
// you might want to tell the MoreLikeThis code to ignore them, as for
|
|
// the purposes of document similarity it seems reasonable to assume that
|
|
// "a stop word is never interesting".
|
|
func (q *MoreLikeThisQuery) StopWord(stopWords ...string) *MoreLikeThisQuery {
|
|
q.stopWords = append(q.stopWords, stopWords...)
|
|
return q
|
|
}
|
|
|
|
// LikeText sets the text to use in order to find documents that are "like" this.
|
|
func (q *MoreLikeThisQuery) LikeText(likeTexts ...string) *MoreLikeThisQuery {
|
|
for _, s := range likeTexts {
|
|
item := NewMoreLikeThisQueryItem().LikeText(s)
|
|
q.docs = append(q.docs, item)
|
|
}
|
|
return q
|
|
}
|
|
|
|
// LikeItems sets the documents to use in order to find documents that are "like" this.
|
|
func (q *MoreLikeThisQuery) LikeItems(docs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
|
|
q.docs = append(q.docs, docs...)
|
|
return q
|
|
}
|
|
|
|
// IgnoreLikeText sets the text from which the terms should not be selected from.
|
|
func (q *MoreLikeThisQuery) IgnoreLikeText(ignoreLikeText ...string) *MoreLikeThisQuery {
|
|
for _, s := range ignoreLikeText {
|
|
item := NewMoreLikeThisQueryItem().LikeText(s)
|
|
q.unlikeDocs = append(q.unlikeDocs, item)
|
|
}
|
|
return q
|
|
}
|
|
|
|
// IgnoreLikeItems sets the documents from which the terms should not be selected from.
|
|
func (q *MoreLikeThisQuery) IgnoreLikeItems(ignoreDocs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
|
|
q.unlikeDocs = append(q.unlikeDocs, ignoreDocs...)
|
|
return q
|
|
}
|
|
|
|
// Ids sets the document ids to use in order to find documents that are "like" this.
|
|
func (q *MoreLikeThisQuery) Ids(ids ...string) *MoreLikeThisQuery {
|
|
for _, id := range ids {
|
|
item := NewMoreLikeThisQueryItem().Id(id)
|
|
q.docs = append(q.docs, item)
|
|
}
|
|
return q
|
|
}
|
|
|
|
// Include specifies whether the input documents should also be included
|
|
// in the results returned. Defaults to false.
|
|
func (q *MoreLikeThisQuery) Include(include bool) *MoreLikeThisQuery {
|
|
q.include = &include
|
|
return q
|
|
}
|
|
|
|
// MinimumShouldMatch sets the number of terms that must match the generated
|
|
// query expressed in the common syntax for minimum should match.
|
|
// The default value is "30%".
|
|
//
|
|
// This used to be "PercentTermsToMatch" in Elasticsearch versions before 2.0.
|
|
func (q *MoreLikeThisQuery) MinimumShouldMatch(minimumShouldMatch string) *MoreLikeThisQuery {
|
|
q.minimumShouldMatch = minimumShouldMatch
|
|
return q
|
|
}
|
|
|
|
// MinTermFreq is the frequency below which terms will be ignored in the
|
|
// source doc. The default frequency is 2.
|
|
func (q *MoreLikeThisQuery) MinTermFreq(minTermFreq int) *MoreLikeThisQuery {
|
|
q.minTermFreq = &minTermFreq
|
|
return q
|
|
}
|
|
|
|
// MaxQueryTerms sets the maximum number of query terms that will be included
|
|
// in any generated query. It defaults to 25.
|
|
func (q *MoreLikeThisQuery) MaxQueryTerms(maxQueryTerms int) *MoreLikeThisQuery {
|
|
q.maxQueryTerms = &maxQueryTerms
|
|
return q
|
|
}
|
|
|
|
// MinDocFreq sets the frequency at which words will be ignored which do
|
|
// not occur in at least this many docs. The default is 5.
|
|
func (q *MoreLikeThisQuery) MinDocFreq(minDocFreq int) *MoreLikeThisQuery {
|
|
q.minDocFreq = &minDocFreq
|
|
return q
|
|
}
|
|
|
|
// MaxDocFreq sets the maximum frequency for which words may still appear.
|
|
// Words that appear in more than this many docs will be ignored.
|
|
// It defaults to unbounded.
|
|
func (q *MoreLikeThisQuery) MaxDocFreq(maxDocFreq int) *MoreLikeThisQuery {
|
|
q.maxDocFreq = &maxDocFreq
|
|
return q
|
|
}
|
|
|
|
// MinWordLength sets the minimum word length below which words will be
|
|
// ignored. It defaults to 0.
|
|
func (q *MoreLikeThisQuery) MinWordLength(minWordLength int) *MoreLikeThisQuery {
|
|
q.minWordLength = &minWordLength
|
|
return q
|
|
}
|
|
|
|
// MaxWordLength sets the maximum word length above which words will be ignored.
|
|
// Defaults to unbounded (0).
|
|
func (q *MoreLikeThisQuery) MaxWordLength(maxWordLength int) *MoreLikeThisQuery {
|
|
q.maxWordLength = &maxWordLength
|
|
return q
|
|
}
|
|
|
|
// BoostTerms sets the boost factor to use when boosting terms.
|
|
// It defaults to 1.
|
|
func (q *MoreLikeThisQuery) BoostTerms(boostTerms float64) *MoreLikeThisQuery {
|
|
q.boostTerms = &boostTerms
|
|
return q
|
|
}
|
|
|
|
// Analyzer specifies the analyzer that will be use to analyze the text.
|
|
// Defaults to the analyzer associated with the field.
|
|
func (q *MoreLikeThisQuery) Analyzer(analyzer string) *MoreLikeThisQuery {
|
|
q.analyzer = analyzer
|
|
return q
|
|
}
|
|
|
|
// Boost sets the boost for this query.
|
|
func (q *MoreLikeThisQuery) Boost(boost float64) *MoreLikeThisQuery {
|
|
q.boost = &boost
|
|
return q
|
|
}
|
|
|
|
// FailOnUnsupportedField indicates whether to fail or return no result
|
|
// when this query is run against a field which is not supported such as
|
|
// a binary/numeric field.
|
|
func (q *MoreLikeThisQuery) FailOnUnsupportedField(fail bool) *MoreLikeThisQuery {
|
|
q.failOnUnsupportedField = &fail
|
|
return q
|
|
}
|
|
|
|
// QueryName sets the query name for the filter that can be used when
|
|
// searching for matched_filters per hit.
|
|
func (q *MoreLikeThisQuery) QueryName(queryName string) *MoreLikeThisQuery {
|
|
q.queryName = queryName
|
|
return q
|
|
}
|
|
|
|
// Source creates the source for the MLT query.
|
|
// It may return an error if the caller forgot to specify any documents to
|
|
// be "liked" in the MoreLikeThisQuery.
|
|
func (q *MoreLikeThisQuery) Source() (interface{}, error) {
|
|
// {
|
|
// "match_all" : { ... }
|
|
// }
|
|
if len(q.docs) == 0 {
|
|
return nil, errors.New(`more_like_this requires some documents to be "liked"`)
|
|
}
|
|
|
|
source := make(map[string]interface{})
|
|
|
|
params := make(map[string]interface{})
|
|
source["more_like_this"] = params
|
|
|
|
if len(q.fields) > 0 {
|
|
params["fields"] = q.fields
|
|
}
|
|
|
|
var likes []interface{}
|
|
for _, doc := range q.docs {
|
|
src, err := doc.Source()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
likes = append(likes, src)
|
|
}
|
|
params["like"] = likes
|
|
|
|
if len(q.unlikeDocs) > 0 {
|
|
var dontLikes []interface{}
|
|
for _, doc := range q.unlikeDocs {
|
|
src, err := doc.Source()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
dontLikes = append(dontLikes, src)
|
|
}
|
|
params["unlike"] = dontLikes
|
|
}
|
|
|
|
if q.minimumShouldMatch != "" {
|
|
params["minimum_should_match"] = q.minimumShouldMatch
|
|
}
|
|
if q.minTermFreq != nil {
|
|
params["min_term_freq"] = *q.minTermFreq
|
|
}
|
|
if q.maxQueryTerms != nil {
|
|
params["max_query_terms"] = *q.maxQueryTerms
|
|
}
|
|
if len(q.stopWords) > 0 {
|
|
params["stop_words"] = q.stopWords
|
|
}
|
|
if q.minDocFreq != nil {
|
|
params["min_doc_freq"] = *q.minDocFreq
|
|
}
|
|
if q.maxDocFreq != nil {
|
|
params["max_doc_freq"] = *q.maxDocFreq
|
|
}
|
|
if q.minWordLength != nil {
|
|
params["min_word_length"] = *q.minWordLength
|
|
}
|
|
if q.maxWordLength != nil {
|
|
params["max_word_length"] = *q.maxWordLength
|
|
}
|
|
if q.boostTerms != nil {
|
|
params["boost_terms"] = *q.boostTerms
|
|
}
|
|
if q.boost != nil {
|
|
params["boost"] = *q.boost
|
|
}
|
|
if q.analyzer != "" {
|
|
params["analyzer"] = q.analyzer
|
|
}
|
|
if q.failOnUnsupportedField != nil {
|
|
params["fail_on_unsupported_field"] = *q.failOnUnsupportedField
|
|
}
|
|
if q.queryName != "" {
|
|
params["_name"] = q.queryName
|
|
}
|
|
if q.include != nil {
|
|
params["include"] = *q.include
|
|
}
|
|
|
|
return source, nil
|
|
}
|
|
|
|
// -- MoreLikeThisQueryItem --
|
|
|
|
// MoreLikeThisQueryItem represents a single item of a MoreLikeThisQuery
|
|
// to be "liked" or "unliked".
|
|
type MoreLikeThisQueryItem struct {
|
|
likeText string
|
|
|
|
index string
|
|
typ string
|
|
id string
|
|
doc interface{}
|
|
fields []string
|
|
routing string
|
|
fsc *FetchSourceContext
|
|
version int64
|
|
versionType string
|
|
}
|
|
|
|
// NewMoreLikeThisQueryItem creates and initializes a MoreLikeThisQueryItem.
|
|
func NewMoreLikeThisQueryItem() *MoreLikeThisQueryItem {
|
|
return &MoreLikeThisQueryItem{
|
|
version: -1,
|
|
}
|
|
}
|
|
|
|
// LikeText represents a text to be "liked".
|
|
func (item *MoreLikeThisQueryItem) LikeText(likeText string) *MoreLikeThisQueryItem {
|
|
item.likeText = likeText
|
|
return item
|
|
}
|
|
|
|
// Index represents the index of the item.
|
|
func (item *MoreLikeThisQueryItem) Index(index string) *MoreLikeThisQueryItem {
|
|
item.index = index
|
|
return item
|
|
}
|
|
|
|
// Type represents the document type of the item.
|
|
//
|
|
// Deprecated: Types are in the process of being removed.
|
|
func (item *MoreLikeThisQueryItem) Type(typ string) *MoreLikeThisQueryItem {
|
|
item.typ = typ
|
|
return item
|
|
}
|
|
|
|
// Id represents the document id of the item.
|
|
func (item *MoreLikeThisQueryItem) Id(id string) *MoreLikeThisQueryItem {
|
|
item.id = id
|
|
return item
|
|
}
|
|
|
|
// Doc represents a raw document template for the item.
|
|
func (item *MoreLikeThisQueryItem) Doc(doc interface{}) *MoreLikeThisQueryItem {
|
|
item.doc = doc
|
|
return item
|
|
}
|
|
|
|
// Fields represents the list of fields of the item.
|
|
func (item *MoreLikeThisQueryItem) Fields(fields ...string) *MoreLikeThisQueryItem {
|
|
item.fields = append(item.fields, fields...)
|
|
return item
|
|
}
|
|
|
|
// Routing sets the routing associated with the item.
|
|
func (item *MoreLikeThisQueryItem) Routing(routing string) *MoreLikeThisQueryItem {
|
|
item.routing = routing
|
|
return item
|
|
}
|
|
|
|
// FetchSourceContext represents the fetch source of the item which controls
|
|
// if and how _source should be returned.
|
|
func (item *MoreLikeThisQueryItem) FetchSourceContext(fsc *FetchSourceContext) *MoreLikeThisQueryItem {
|
|
item.fsc = fsc
|
|
return item
|
|
}
|
|
|
|
// Version specifies the version of the item.
|
|
func (item *MoreLikeThisQueryItem) Version(version int64) *MoreLikeThisQueryItem {
|
|
item.version = version
|
|
return item
|
|
}
|
|
|
|
// VersionType represents the version type of the item.
|
|
func (item *MoreLikeThisQueryItem) VersionType(versionType string) *MoreLikeThisQueryItem {
|
|
item.versionType = versionType
|
|
return item
|
|
}
|
|
|
|
// Source returns the JSON-serializable fragment of the entity.
|
|
func (item *MoreLikeThisQueryItem) Source() (interface{}, error) {
|
|
if item.likeText != "" {
|
|
return item.likeText, nil
|
|
}
|
|
|
|
source := make(map[string]interface{})
|
|
|
|
if item.index != "" {
|
|
source["_index"] = item.index
|
|
}
|
|
if item.typ != "" {
|
|
source["_type"] = item.typ
|
|
}
|
|
if item.id != "" {
|
|
source["_id"] = item.id
|
|
}
|
|
if item.doc != nil {
|
|
source["doc"] = item.doc
|
|
}
|
|
if len(item.fields) > 0 {
|
|
source["fields"] = item.fields
|
|
}
|
|
if item.routing != "" {
|
|
source["routing"] = item.routing
|
|
}
|
|
if item.fsc != nil {
|
|
src, err := item.fsc.Source()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
source["_source"] = src
|
|
}
|
|
if item.version >= 0 {
|
|
source["_version"] = item.version
|
|
}
|
|
if item.versionType != "" {
|
|
source["_version_type"] = item.versionType
|
|
}
|
|
|
|
return source, nil
|
|
}
|