From ae7e6cd474747dce1f65c0b1c6e1d6b09ab0bccb Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 17 Feb 2021 21:32:25 +0000 Subject: [PATCH] Reduce calls to git cat-file -s (#14682) * Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sized --- modules/git/parse_gogit.go | 16 ++++-- modules/git/parse_gogit_test.go | 13 +++-- modules/git/parse_nogogit.go | 16 ++++-- modules/git/parse_nogogit_test.go | 70 ++++++++++++++++++++++++++ modules/git/tree_entry_nogogit.go | 2 + modules/git/tree_nogogit.go | 4 +- modules/indexer/code/bleve.go | 20 +++++--- modules/indexer/code/elastic_search.go | 20 +++++--- modules/indexer/code/git.go | 8 ++- 9 files changed, 141 insertions(+), 28 deletions(-) create mode 100644 modules/git/parse_nogogit_test.go diff --git a/modules/git/parse_gogit.go b/modules/git/parse_gogit.go index 434fb4160f..a50ebec3dd 100644 --- a/modules/git/parse_gogit.go +++ b/modules/git/parse_gogit.go @@ -10,12 +10,13 @@ import ( "bytes" "fmt" "strconv" + "strings" "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/object" ) -// ParseTreeEntries parses the output of a `git ls-tree` command. +// ParseTreeEntries parses the output of a `git ls-tree -l` command. func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { return parseTreeEntries(data, nil) } @@ -23,7 +24,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) for pos := 0; pos < len(data); { - // expect line to be of the form " \t" + // expect line to be of the form " \t" entry := new(TreeEntry) entry.gogitTreeEntry = &object.TreeEntry{} entry.ptree = ptree @@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entry.gogitTreeEntry.Hash = id pos += 41 // skip over sha and trailing space - end := pos + bytes.IndexByte(data[pos:], '\n') + end := pos + bytes.IndexByte(data[pos:], '\t') + if end < pos { + return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) + } + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) + entry.sized = true + + pos = end + 1 + + end = pos + bytes.IndexByte(data[pos:], '\n') if end < pos { return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) } diff --git a/modules/git/parse_gogit_test.go b/modules/git/parse_gogit_test.go index cf38c29932..c6374133c0 100644 --- a/modules/git/parse_gogit_test.go +++ b/modules/git/parse_gogit_test.go @@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) { Expected: []*TreeEntry{}, }, { - Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n", + Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 1022\texample/file2.txt\n", Expected: []*TreeEntry{ { ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), @@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) { Name: "example/file2.txt", Mode: filemode.Regular, }, + size: 1022, + sized: true, }, }, }, { - Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" + - "040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n", + Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 234131\t\"example/\\n.txt\"\n" + + "040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8 -\texample\n", Expected: []*TreeEntry{ { ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), @@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) { Name: "example/\n.txt", Mode: filemode.Symlink, }, + size: 234131, + sized: true, }, { - ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), + ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), + sized: true, gogitTreeEntry: &object.TreeEntry{ Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), Name: "example", diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index 26dd700af7..e9e93f66fd 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -10,9 +10,10 @@ import ( "bytes" "fmt" "strconv" + "strings" ) -// ParseTreeEntries parses the output of a `git ls-tree` command. +// ParseTreeEntries parses the output of a `git ls-tree -l` command. func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { return parseTreeEntries(data, nil) } @@ -20,7 +21,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, 10) for pos := 0; pos < len(data); { - // expect line to be of the form " \t" + // expect line to be of the form " \t" entry := new(TreeEntry) entry.ptree = ptree if pos+6 > len(data) { @@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entry.ID = id pos += 41 // skip over sha and trailing space - end := pos + bytes.IndexByte(data[pos:], '\n') + end := pos + bytes.IndexByte(data[pos:], '\t') + if end < pos { + return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) + } + entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) + entry.sized = true + + pos = end + 1 + + end = pos + bytes.IndexByte(data[pos:], '\n') if end < pos { return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) } diff --git a/modules/git/parse_nogogit_test.go b/modules/git/parse_nogogit_test.go new file mode 100644 index 0000000000..a9e7dcc7f8 --- /dev/null +++ b/modules/git/parse_nogogit_test.go @@ -0,0 +1,70 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build !gogit + +package git + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseTreeEntries(t *testing.T) { + + testCases := []struct { + Input string + Expected []*TreeEntry + }{ + { + Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af 8218 README.md +100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 4681 README_ZH.md +100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 429 SECURITY.md +040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d - assets +`, + Expected: []*TreeEntry{ + { + ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), + name: "README.md", + entryMode: EntryModeBlob, + size: 8218, + sized: true, + }, + { + ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"), + name: "README_ZH.md", + entryMode: EntryModeBlob, + size: 4681, + sized: true, + }, + { + ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"), + name: "SECURITY.md", + entryMode: EntryModeBlob, + size: 429, + sized: true, + }, + { + ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), + name: "assets", + entryMode: EntryModeTree, + sized: true, + }, + }, + }, + } + for _, testCase := range testCases { + entries, err := ParseTreeEntries([]byte(testCase.Input)) + assert.NoError(t, err) + assert.EqualValues(t, len(testCase.Expected), len(entries)) + for i, entry := range entries { + assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) + assert.EqualValues(t, testCase.Expected[i].name, entry.name) + assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) + assert.EqualValues(t, testCase.Expected[i].sized, entry.sized) + assert.EqualValues(t, testCase.Expected[i].size, entry.size) + } + } +} diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index f18daee778..fd60de36f5 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob { ID: te.ID, repoPath: te.ptree.repo.Path, name: te.Name(), + size: te.size, + gotSize: te.sized, } } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index e78115b777..3ebdf10631 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) { return t.entries, nil } - stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path) + stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path) if err != nil { if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { return nil, ErrNotExist{ @@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { if t.entriesRecursiveParsed { return t.entriesRecursive, nil } - stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) + stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) if err != nil { return nil, err } diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 826efde4c1..1ebc74c43a 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode return nil } - stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). - RunInDir(repo.RepoPath()) - if err != nil { - return err + size := update.Size + + if !update.Sized { + stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). + RunInDir(repo.RepoPath()) + if err != nil { + return err + } + if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { + return fmt.Errorf("Misformatted git cat-file output: %v", err) + } } - if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { - return fmt.Errorf("Misformatted git cat-file output: %v", err) - } else if int64(size) > setting.Indexer.MaxIndexerFileSize { + + if size > setting.Indexer.MaxIndexerFileSize { return b.addDelete(update.Filename, repo, batch) } diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go index f81dbb34d4..c9d604b694 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elastic_search.go @@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo return nil, nil } - stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). - RunInDir(repo.RepoPath()) - if err != nil { - return nil, err + size := update.Size + + if !update.Sized { + stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). + RunInDir(repo.RepoPath()) + if err != nil { + return nil, err + } + if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { + return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) + } } - if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { - return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) - } else if int64(size) > setting.Indexer.MaxIndexerFileSize { + + if size > setting.Indexer.MaxIndexerFileSize { return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 37ab5ac3d3..919d785406 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -17,6 +17,8 @@ import ( type fileUpdate struct { Filename string BlobSha string + Size int64 + Sized bool } // repoChanges changes (file additions/updates/removals) to a repo @@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { updates[idxCount] = fileUpdate{ Filename: entry.Name(), BlobSha: entry.ID.String(), + Size: entry.Size(), + Sized: true, } idxCount++ } @@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { // genesisChanges get changes to add repo to the indexer for the first time func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) { var changes repoChanges - stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). + stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision). RunInDirBytes(repo.RepoPath()) if err != nil { return nil, err @@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, } } - cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") + cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--") cmd.AddArguments(updatedFilenames...) lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath()) if err != nil {