mirror of
https://github.com/go-gitea/gitea
synced 2025-07-09 20:17:21 +00:00
Use git attributes to determine generated and vendored status for language stats and diffs (#16773)
Replaces #16262 Replaces #16250 Replaces #14833 This PR first implements a `git check-attr` pipe reader - using `git check-attr --stdin -z --cached` - taking account of the change in the output format in git 1.8.5 and creates a helper function to read a tree into a temporary index file for that pipe reader. It then wires this in to the language stats helper and into the git diff generation. Files which are marked generated will be folded by default. Fixes #14786 Fixes #12653
This commit is contained in:
@ -10,6 +10,7 @@ package git
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"math"
|
||||
|
||||
@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var checker *CheckAttributeReader
|
||||
|
||||
if CheckGitVersionAtLeast("1.7.8") == nil {
|
||||
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
|
||||
if err == nil {
|
||||
defer deleteTemporaryFile()
|
||||
|
||||
checker = &CheckAttributeReader{
|
||||
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
|
||||
Repo: repo,
|
||||
IndexFile: indexFilename,
|
||||
}
|
||||
ctx, cancel := context.WithCancel(DefaultContext)
|
||||
if err := checker.Init(ctx); err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
} else {
|
||||
go func() {
|
||||
err = checker.Run()
|
||||
if err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
}
|
||||
defer cancel()
|
||||
}
|
||||
}
|
||||
|
||||
contentBuf := bytes.Buffer{}
|
||||
var content []byte
|
||||
sizes := make(map[string]int64)
|
||||
for _, f := range entries {
|
||||
contentBuf.Reset()
|
||||
content = contentBuf.Bytes()
|
||||
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
|
||||
|
||||
if f.Size() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
notVendored := false
|
||||
notGenerated := false
|
||||
|
||||
if checker != nil {
|
||||
attrs, err := checker.CheckPath(f.Name())
|
||||
if err == nil {
|
||||
if vendored, has := attrs["linguist-vendored"]; has {
|
||||
if vendored == "set" || vendored == "true" {
|
||||
continue
|
||||
}
|
||||
notVendored = vendored == "false"
|
||||
}
|
||||
if generated, has := attrs["linguist-generated"]; has {
|
||||
if generated == "set" || generated == "true" {
|
||||
continue
|
||||
}
|
||||
notGenerated = generated == "false"
|
||||
}
|
||||
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
|
||||
// group languages, such as Pug -> HTML; SCSS -> CSS
|
||||
group := enry.GetLanguageGroup(language)
|
||||
if len(group) == 0 {
|
||||
language = group
|
||||
}
|
||||
|
||||
sizes[language] += f.Size()
|
||||
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
|
||||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
|
||||
continue
|
||||
}
|
||||
@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if enry.IsGenerated(f.Name(), content) {
|
||||
if !notGenerated && enry.IsGenerated(f.Name(), content) {
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: Use .gitattributes file for linguist overrides
|
||||
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
|
||||
// - eg. do the all the detection tests using filename first before reading content.
|
||||
language := analyze.GetCodeLanguage(f.Name(), content)
|
||||
|
Reference in New Issue
Block a user