// Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package meilisearch import ( "context" "errors" "strconv" "strings" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/meilisearch/meilisearch-go" ) const ( issueIndexerLatestVersion = 3 // TODO: make this configurable if necessary maxTotalHits = 10000 ) // ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types. var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { inner *inner_meilisearch.Indexer indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much } // NewIndexer creates a new meilisearch indexer func NewIndexer(url, apiKey, indexerName string) *Indexer { settings := &meilisearch.Settings{ // The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"] // So even if we specify the sort order, it could not be respected because the priority of "sort" is so low. // So we need to specify the ranking rules to make sure the sort order is respected. // See https://www.meilisearch.com/docs/learn/core_concepts/relevancy RankingRules: []string{"sort", // make sure "sort" has the highest priority "words", "typo", "proximity", "attribute", "exactness"}, SearchableAttributes: []string{ "title", "content", "comments", }, DisplayedAttributes: []string{ "id", "title", "content", "comments", }, FilterableAttributes: []string{ "repo_id", "is_public", "is_pull", "is_closed", "label_ids", "no_label", "milestone_id", "project_id", "project_board_id", "poster_id", "assignee_id", "mention_ids", "reviewed_ids", "review_requested_ids", "subscriber_ids", "updated_unix", }, SortableAttributes: []string{ "updated_unix", "created_unix", "deadline_unix", "comment_count", "id", }, Pagination: &meilisearch.Pagination{ MaxTotalHits: maxTotalHits, }, } inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings) indexer := &Indexer{ inner: inner, Indexer: inner, } return indexer } // Index will save the index data func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error { if len(issues) == 0 { return nil } for _, issue := range issues { _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue) if err != nil { return err } } // TODO: bulk send index data return nil } // Delete deletes indexes by ids func (b *Indexer) Delete(_ context.Context, ids ...int64) error { if len(ids) == 0 { return nil } for _, id := range ids { _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10)) if err != nil { return err } } // TODO: bulk send deletes return nil } // Search searches for issues by given conditions. // Returns the matching issue IDs func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { query := inner_meilisearch.FilterAnd{} if len(options.RepoIDs) > 0 { q := &inner_meilisearch.FilterOr{} q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...)) if options.AllPublic { q.Or(inner_meilisearch.NewFilterEq("is_public", true)) } query.And(q) } if options.IsPull.Has() { query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value())) } if options.IsClosed.Has() { query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value())) } if options.NoLabelOnly { query.And(inner_meilisearch.NewFilterEq("no_label", true)) } else { if len(options.IncludedLabelIDs) > 0 { q := &inner_meilisearch.FilterAnd{} for _, labelID := range options.IncludedLabelIDs { q.And(inner_meilisearch.NewFilterEq("label_ids", labelID)) } query.And(q) } else if len(options.IncludedAnyLabelIDs) > 0 { query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...)) } if len(options.ExcludedLabelIDs) > 0 { q := &inner_meilisearch.FilterAnd{} for _, labelID := range options.ExcludedLabelIDs { q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID))) } query.And(q) } } if len(options.MilestoneIDs) > 0 { query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...)) } if options.ProjectID != nil { query.And(inner_meilisearch.NewFilterEq("project_id", *options.ProjectID)) } if options.ProjectBoardID != nil { query.And(inner_meilisearch.NewFilterEq("project_board_id", *options.ProjectBoardID)) } if options.PosterID != nil { query.And(inner_meilisearch.NewFilterEq("poster_id", *options.PosterID)) } if options.AssigneeID != nil { query.And(inner_meilisearch.NewFilterEq("assignee_id", *options.AssigneeID)) } if options.MentionID != nil { query.And(inner_meilisearch.NewFilterEq("mention_ids", *options.MentionID)) } if options.ReviewedID != nil { query.And(inner_meilisearch.NewFilterEq("reviewed_ids", *options.ReviewedID)) } if options.ReviewRequestedID != nil { query.And(inner_meilisearch.NewFilterEq("review_requested_ids", *options.ReviewRequestedID)) } if options.SubscriberID != nil { query.And(inner_meilisearch.NewFilterEq("subscriber_ids", *options.SubscriberID)) } if options.UpdatedAfterUnix != nil { query.And(inner_meilisearch.NewFilterGte("updated_unix", *options.UpdatedAfterUnix)) } if options.UpdatedBeforeUnix != nil { query.And(inner_meilisearch.NewFilterLte("updated_unix", *options.UpdatedBeforeUnix)) } if options.SortBy == "" { options.SortBy = internal.SortByCreatedAsc } sortBy := []string{ parseSortBy(options.SortBy), "id:desc", } skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits) searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(options.Keyword, &meilisearch.SearchRequest{ Filter: query.Statement(), Limit: int64(limit), Offset: int64(skip), Sort: sortBy, MatchingStrategy: "all", }) if err != nil { return nil, err } hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword) if err != nil { return nil, err } return &internal.SearchResult{ Total: searchRes.EstimatedTotalHits, Hits: hits, }, nil } func parseSortBy(sortBy internal.SortBy) string { field := strings.TrimPrefix(string(sortBy), "-") if strings.HasPrefix(string(sortBy), "-") { return field + ":desc" } return field + ":asc" } // nonFuzzyWorkaround is needed as meilisearch does not have an exact search // and you can only change "typo tolerance" per index. So we have to post-filter the results // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance // TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) { hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { hit, ok := hit.(map[string]any) if !ok { return nil, ErrMalformedResponse } if !isFuzzy { keyword = strings.ToLower(keyword) // declare a anon func to check if the title, content or at least one comment contains the keyword found, err := func() (bool, error) { // check if title match first title, ok := hit["title"].(string) if !ok { return false, ErrMalformedResponse } else if strings.Contains(strings.ToLower(title), keyword) { return true, nil } // check if content has a match content, ok := hit["content"].(string) if !ok { return false, ErrMalformedResponse } else if strings.Contains(strings.ToLower(content), keyword) { return true, nil } // now check for each comment if one has a match // so we first try to cast and skip if there are no comments comments, ok := hit["comments"].([]any) if !ok { return false, ErrMalformedResponse } else if len(comments) == 0 { return false, nil } // now we iterate over all and report as soon as we detect one match for i := range comments { comment, ok := comments[i].(string) if !ok { return false, ErrMalformedResponse } if strings.Contains(strings.ToLower(comment), keyword) { return true, nil } } // we got no match return false, nil }() if err != nil { return nil, err } else if !found { continue } } issueID, ok := hit["id"].(float64) if !ok { return nil, ErrMalformedResponse } hits = append(hits, internal.Match{ ID: int64(issueID), }) } return hits, nil }