mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-31 03:18:24 +00:00 
			
		
		
		
	Backport #15213 `enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		
							
								
								
									
										70
									
								
								modules/analyze/vendor.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								modules/analyze/vendor.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | |||||||
|  | // Copyright 2021 The Gitea Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a MIT-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  |  | ||||||
|  | package analyze | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"regexp" | ||||||
|  | 	"sort" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
|  | 	"github.com/go-enry/go-enry/v2/data" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var isVendorRegExp *regexp.Regexp | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	matchers := data.VendorMatchers | ||||||
|  |  | ||||||
|  | 	caretStrings := make([]string, 0, 10) | ||||||
|  | 	caretShareStrings := make([]string, 0, 10) | ||||||
|  |  | ||||||
|  | 	matcherStrings := make([]string, 0, len(matchers)) | ||||||
|  | 	for _, matcher := range matchers { | ||||||
|  | 		str := matcher.String() | ||||||
|  | 		if str[0] == '^' { | ||||||
|  | 			caretStrings = append(caretStrings, str[1:]) | ||||||
|  | 		} else if str[0:5] == "(^|/)" { | ||||||
|  | 			caretShareStrings = append(caretShareStrings, str[5:]) | ||||||
|  | 		} else { | ||||||
|  | 			matcherStrings = append(matcherStrings, str) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sort.Strings(caretShareStrings) | ||||||
|  | 	sort.Strings(caretStrings) | ||||||
|  | 	sort.Strings(matcherStrings) | ||||||
|  |  | ||||||
|  | 	sb := &strings.Builder{} | ||||||
|  | 	sb.WriteString("(?:^(?:") | ||||||
|  | 	sb.WriteString(caretStrings[0]) | ||||||
|  | 	for _, matcher := range caretStrings[1:] { | ||||||
|  | 		sb.WriteString(")|(?:") | ||||||
|  | 		sb.WriteString(matcher) | ||||||
|  | 	} | ||||||
|  | 	sb.WriteString("))") | ||||||
|  | 	sb.WriteString("|") | ||||||
|  | 	sb.WriteString("(?:(?:^|/)(?:") | ||||||
|  | 	sb.WriteString(caretShareStrings[0]) | ||||||
|  | 	for _, matcher := range caretShareStrings[1:] { | ||||||
|  | 		sb.WriteString(")|(?:") | ||||||
|  | 		sb.WriteString(matcher) | ||||||
|  | 	} | ||||||
|  | 	sb.WriteString("))") | ||||||
|  | 	sb.WriteString("|") | ||||||
|  | 	sb.WriteString("(?:") | ||||||
|  | 	sb.WriteString(matcherStrings[0]) | ||||||
|  | 	for _, matcher := range matcherStrings[1:] { | ||||||
|  | 		sb.WriteString(")|(?:") | ||||||
|  | 		sb.WriteString(matcher) | ||||||
|  | 	} | ||||||
|  | 	sb.WriteString(")") | ||||||
|  | 	combined := sb.String() | ||||||
|  | 	isVendorRegExp = regexp.MustCompile(combined) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // IsVendor returns whether or not path is a vendor path. | ||||||
|  | func IsVendor(path string) bool { | ||||||
|  | 	return isVendorRegExp.MatchString(path) | ||||||
|  | } | ||||||
							
								
								
									
										42
									
								
								modules/analyze/vendor_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								modules/analyze/vendor_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | |||||||
|  | // Copyright 2021 The Gitea Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a MIT-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  |  | ||||||
|  | package analyze | ||||||
|  |  | ||||||
|  | import "testing" | ||||||
|  |  | ||||||
|  | func TestIsVendor(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		path string | ||||||
|  | 		want bool | ||||||
|  | 	}{ | ||||||
|  | 		{"cache/", true}, | ||||||
|  | 		{"random/cache/", true}, | ||||||
|  | 		{"cache", false}, | ||||||
|  | 		{"dependencies/", true}, | ||||||
|  | 		{"Dependencies/", true}, | ||||||
|  | 		{"dependency/", false}, | ||||||
|  | 		{"dist/", true}, | ||||||
|  | 		{"dist", false}, | ||||||
|  | 		{"random/dist/", true}, | ||||||
|  | 		{"random/dist", false}, | ||||||
|  | 		{"deps/", true}, | ||||||
|  | 		{"configure", true}, | ||||||
|  | 		{"a/configure", true}, | ||||||
|  | 		{"config.guess", true}, | ||||||
|  | 		{"config.guess/", false}, | ||||||
|  | 		{".vscode/", true}, | ||||||
|  | 		{"doc/_build/", true}, | ||||||
|  | 		{"a/docs/_build/", true}, | ||||||
|  | 		{"a/dasdocs/_build-vsdoc.js", true}, | ||||||
|  | 		{"a/dasdocs/_build-vsdoc.j", false}, | ||||||
|  | 	} | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		t.Run(tt.path, func(t *testing.T) { | ||||||
|  | 			if got := IsVendor(tt.path); got != tt.want { | ||||||
|  | 				t.Errorf("IsVendor() = %v, want %v", got, tt.want) | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -44,7 +44,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||||||
|  |  | ||||||
| 	sizes := make(map[string]int64) | 	sizes := make(map[string]int64) | ||||||
| 	err = tree.Files().ForEach(func(f *object.File) error { | 	err = tree.Files().ForEach(func(f *object.File) error { | ||||||
| 		if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | 		if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | ||||||
| 			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | 			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | ||||||
| 			return nil | 			return nil | ||||||
| 		} | 		} | ||||||
|   | |||||||
| @@ -175,7 +175,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { | |||||||
|  |  | ||||||
| func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||||
| 	// Ignore vendored files in code search | 	// Ignore vendored files in code search | ||||||
| 	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | 	if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
| @@ -170,7 +170,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) { | |||||||
|  |  | ||||||
| func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | ||||||
| 	// Ignore vendored files in code search | 	// Ignore vendored files in code search | ||||||
| 	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | 	if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | ||||||
| 		return nil, nil | 		return nil, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user