1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Refactor Git Attribute & performance optimization (#34154)

This PR moved git attributes related code to `modules/git/attribute` sub
package and moved language stats related code to
`modules/git/languagestats` sub package to make it easier to maintain.

And it also introduced a performance improvement which use the `git
check-attr --source` which can be run in a bare git repository so that
we don't need to create a git index file. The new parameter need a git
version >= 2.40 . If git version less than 2.40, it will fall back to
previous implementation.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: yp05327 <576951401@qq.com>
This commit is contained in:
Lunny Xiao
2025-04-11 06:41:29 -07:00
committed by GitHub
parent d725b78824
commit ae0af8ea5b
28 changed files with 875 additions and 587 deletions

View File

@@ -25,6 +25,7 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
@@ -1237,24 +1238,21 @@ func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOp
return nil, err
}
checker, deferrable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
defer deferrable()
checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage})
if err != nil {
return nil, err
}
defer checker.Close()
for _, diffFile := range diff.Files {
isVendored := optional.None[bool]()
isGenerated := optional.None[bool]()
if checker != nil {
attrs, err := checker.CheckPath(diffFile.Name)
if err == nil {
isVendored = git.AttributeToBool(attrs, git.AttributeLinguistVendored)
isGenerated = git.AttributeToBool(attrs, git.AttributeLinguistGenerated)
language := git.TryReadLanguageAttribute(attrs)
if language.Has() {
diffFile.Language = language.Value()
}
} else {
checker = nil // CheckPath fails, it's not impossible to "check" anymore
attrs, err := checker.CheckPath(diffFile.Name)
if err == nil {
isVendored, isGenerated = attrs.GetVendored(), attrs.GetGenerated()
language := attrs.GetLanguage()
if language.Has() {
diffFile.Language = language.Value()
}
}

View File

@@ -14,13 +14,13 @@ import (
"code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unit"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git/languagestats"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
gitea_context "code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/repository/files"
)
func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePreviewOptions) (template.HTML, error) {
@@ -61,7 +61,7 @@ func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePrevie
return "", err
}
language, _ := files.TryGetContentLanguage(gitRepo, opts.CommitID, opts.FilePath)
language, _ := languagestats.GetFileLanguage(ctx, gitRepo, opts.CommitID, opts.FilePath)
blob, err := commit.GetBlobByPath(opts.FilePath)
if err != nil {
return "", err

View File

@@ -277,28 +277,3 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
Content: content,
}, nil
}
// TryGetContentLanguage tries to get the (linguist) language of the file content
func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) (string, error) {
indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(commitID)
if err != nil {
return "", err
}
defer deleteTemporaryFile()
filename2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{
CachedOnly: true,
Attributes: []string{git.AttributeLinguistLanguage, git.AttributeGitlabLanguage},
Filenames: []string{treePath},
IndexFile: indexFilename,
WorkTree: worktree,
})
if err != nil {
return "", err
}
language := git.TryReadLanguageAttribute(filename2attribute2info[treePath])
return language.Value(), nil
}

View File

@@ -15,6 +15,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
@@ -488,16 +489,15 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
var lfsMetaObject *git_model.LFSMetaObject
if setting.LFS.StartServer && hasOldBranch {
// Check there is no way this can return multiple infos
filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"filter"},
attributesMap, err := attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{
Attributes: []string{attribute.Filter},
Filenames: []string{file.Options.treePath},
CachedOnly: true,
})
if err != nil {
return err
}
if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" {
if attributesMap[file.Options.treePath] != nil && attributesMap[file.Options.treePath].Get(attribute.Filter).ToString().Value() == "lfs" {
// OK so we are supposed to LFS this data!
pointer, err := lfs.GeneratePointer(treeObjectContentReader)
if err != nil {

View File

@@ -14,6 +14,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
)
@@ -105,12 +106,11 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
}
}
var filename2attribute2info map[string]map[string]string
var attributesMap map[string]*attribute.Attributes
if setting.LFS.StartServer {
filename2attribute2info, err = t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"filter"},
attributesMap, err = attribute.CheckAttributes(ctx, t.gitRepo, "" /* use temp repo's working dir */, attribute.CheckAttributeOpts{
Attributes: []string{attribute.Filter},
Filenames: names,
CachedOnly: true,
})
if err != nil {
return err
@@ -119,7 +119,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
// Copy uploaded files into repository.
for i := range infos {
if err := copyUploadedLFSFileIntoRepository(ctx, &infos[i], filename2attribute2info, t, opts.TreePath); err != nil {
if err := copyUploadedLFSFileIntoRepository(ctx, &infos[i], attributesMap, t, opts.TreePath); err != nil {
return err
}
}
@@ -176,7 +176,7 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
return repo_model.DeleteUploads(ctx, uploads...)
}
func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, filename2attribute2info map[string]map[string]string, t *TemporaryUploadRepository, treePath string) error {
func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, attributesMap map[string]*attribute.Attributes, t *TemporaryUploadRepository, treePath string) error {
file, err := os.Open(info.upload.LocalPath())
if err != nil {
return err
@@ -184,7 +184,7 @@ func copyUploadedLFSFileIntoRepository(ctx context.Context, info *uploadInfo, fi
defer file.Close()
var objectHash string
if setting.LFS.StartServer && filename2attribute2info[info.upload.Name] != nil && filename2attribute2info[info.upload.Name]["filter"] == "lfs" {
if setting.LFS.StartServer && attributesMap[info.upload.Name] != nil && attributesMap[info.upload.Name].Get(attribute.Filter).ToString().Value() == "lfs" {
// Handle LFS
// FIXME: Inefficient! this should probably happen in models.Upload
pointer, err := lfs.GeneratePointer(file)