mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-03 21:08:25 +00:00 
			
		
		
		
	Backport #15213 `enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		
							
								
								
									
										70
									
								
								modules/analyze/vendor.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								modules/analyze/vendor.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,70 @@
 | 
				
			|||||||
 | 
					// Copyright 2021 The Gitea Authors. All rights reserved.
 | 
				
			||||||
 | 
					// Use of this source code is governed by a MIT-style
 | 
				
			||||||
 | 
					// license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package analyze
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"regexp"
 | 
				
			||||||
 | 
						"sort"
 | 
				
			||||||
 | 
						"strings"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/go-enry/go-enry/v2/data"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var isVendorRegExp *regexp.Regexp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func init() {
 | 
				
			||||||
 | 
						matchers := data.VendorMatchers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						caretStrings := make([]string, 0, 10)
 | 
				
			||||||
 | 
						caretShareStrings := make([]string, 0, 10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						matcherStrings := make([]string, 0, len(matchers))
 | 
				
			||||||
 | 
						for _, matcher := range matchers {
 | 
				
			||||||
 | 
							str := matcher.String()
 | 
				
			||||||
 | 
							if str[0] == '^' {
 | 
				
			||||||
 | 
								caretStrings = append(caretStrings, str[1:])
 | 
				
			||||||
 | 
							} else if str[0:5] == "(^|/)" {
 | 
				
			||||||
 | 
								caretShareStrings = append(caretShareStrings, str[5:])
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								matcherStrings = append(matcherStrings, str)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sort.Strings(caretShareStrings)
 | 
				
			||||||
 | 
						sort.Strings(caretStrings)
 | 
				
			||||||
 | 
						sort.Strings(matcherStrings)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb := &strings.Builder{}
 | 
				
			||||||
 | 
						sb.WriteString("(?:^(?:")
 | 
				
			||||||
 | 
						sb.WriteString(caretStrings[0])
 | 
				
			||||||
 | 
						for _, matcher := range caretStrings[1:] {
 | 
				
			||||||
 | 
							sb.WriteString(")|(?:")
 | 
				
			||||||
 | 
							sb.WriteString(matcher)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						sb.WriteString("))")
 | 
				
			||||||
 | 
						sb.WriteString("|")
 | 
				
			||||||
 | 
						sb.WriteString("(?:(?:^|/)(?:")
 | 
				
			||||||
 | 
						sb.WriteString(caretShareStrings[0])
 | 
				
			||||||
 | 
						for _, matcher := range caretShareStrings[1:] {
 | 
				
			||||||
 | 
							sb.WriteString(")|(?:")
 | 
				
			||||||
 | 
							sb.WriteString(matcher)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						sb.WriteString("))")
 | 
				
			||||||
 | 
						sb.WriteString("|")
 | 
				
			||||||
 | 
						sb.WriteString("(?:")
 | 
				
			||||||
 | 
						sb.WriteString(matcherStrings[0])
 | 
				
			||||||
 | 
						for _, matcher := range matcherStrings[1:] {
 | 
				
			||||||
 | 
							sb.WriteString(")|(?:")
 | 
				
			||||||
 | 
							sb.WriteString(matcher)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						sb.WriteString(")")
 | 
				
			||||||
 | 
						combined := sb.String()
 | 
				
			||||||
 | 
						isVendorRegExp = regexp.MustCompile(combined)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// IsVendor returns whether or not path is a vendor path.
 | 
				
			||||||
 | 
					func IsVendor(path string) bool {
 | 
				
			||||||
 | 
						return isVendorRegExp.MatchString(path)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										42
									
								
								modules/analyze/vendor_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								modules/analyze/vendor_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
				
			|||||||
 | 
					// Copyright 2021 The Gitea Authors. All rights reserved.
 | 
				
			||||||
 | 
					// Use of this source code is governed by a MIT-style
 | 
				
			||||||
 | 
					// license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package analyze
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import "testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestIsVendor(t *testing.T) {
 | 
				
			||||||
 | 
						tests := []struct {
 | 
				
			||||||
 | 
							path string
 | 
				
			||||||
 | 
							want bool
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{"cache/", true},
 | 
				
			||||||
 | 
							{"random/cache/", true},
 | 
				
			||||||
 | 
							{"cache", false},
 | 
				
			||||||
 | 
							{"dependencies/", true},
 | 
				
			||||||
 | 
							{"Dependencies/", true},
 | 
				
			||||||
 | 
							{"dependency/", false},
 | 
				
			||||||
 | 
							{"dist/", true},
 | 
				
			||||||
 | 
							{"dist", false},
 | 
				
			||||||
 | 
							{"random/dist/", true},
 | 
				
			||||||
 | 
							{"random/dist", false},
 | 
				
			||||||
 | 
							{"deps/", true},
 | 
				
			||||||
 | 
							{"configure", true},
 | 
				
			||||||
 | 
							{"a/configure", true},
 | 
				
			||||||
 | 
							{"config.guess", true},
 | 
				
			||||||
 | 
							{"config.guess/", false},
 | 
				
			||||||
 | 
							{".vscode/", true},
 | 
				
			||||||
 | 
							{"doc/_build/", true},
 | 
				
			||||||
 | 
							{"a/docs/_build/", true},
 | 
				
			||||||
 | 
							{"a/dasdocs/_build-vsdoc.js", true},
 | 
				
			||||||
 | 
							{"a/dasdocs/_build-vsdoc.j", false},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for _, tt := range tests {
 | 
				
			||||||
 | 
							t.Run(tt.path, func(t *testing.T) {
 | 
				
			||||||
 | 
								if got := IsVendor(tt.path); got != tt.want {
 | 
				
			||||||
 | 
									t.Errorf("IsVendor() = %v, want %v", got, tt.want)
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -44,7 +44,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	sizes := make(map[string]int64)
 | 
						sizes := make(map[string]int64)
 | 
				
			||||||
	err = tree.Files().ForEach(func(f *object.File) error {
 | 
						err = tree.Files().ForEach(func(f *object.File) error {
 | 
				
			||||||
		if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
 | 
							if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
 | 
				
			||||||
			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
 | 
								enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
 | 
				
			||||||
			return nil
 | 
								return nil
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -175,7 +175,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
 | 
					func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
 | 
				
			||||||
	// Ignore vendored files in code search
 | 
						// Ignore vendored files in code search
 | 
				
			||||||
	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
 | 
						if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
 | 
				
			||||||
		return nil
 | 
							return nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -170,7 +170,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
 | 
					func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
 | 
				
			||||||
	// Ignore vendored files in code search
 | 
						// Ignore vendored files in code search
 | 
				
			||||||
	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
 | 
						if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
 | 
				
			||||||
		return nil, nil
 | 
							return nil, nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user