Cache last commit when pushing for big repository (#10109)

* Cache last commit when pushing for big repository

* Fix bug

* detect force push

* Refactor cache push

* Finish cache last commit info when push

* Some improvements

* Fix lint

* Remove unused changes

* Move pull request test before cache

* Fix test mysql

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
Lunny Xiao 2020-10-09 06:17:23 +08:00 committed by GitHub
parent f06ee37e24
commit 9b81193166
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 148 additions and 11 deletions

View File

@ -40,7 +40,7 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom
return nil, nil, err
}
if len(unHitPaths) > 0 {
revs2, err := getLastCommitForPaths(c, treePath, unHitPaths)
revs2, err := GetLastCommitForPaths(c, treePath, unHitPaths)
if err != nil {
return nil, nil, err
}
@ -53,7 +53,7 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom
}
}
} else {
revs, err = getLastCommitForPaths(c, treePath, entryPaths)
revs, err = GetLastCommitForPaths(c, treePath, entryPaths)
}
if err != nil {
return nil, nil, err
@ -170,7 +170,8 @@ func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cac
return results, unHitEntryPaths, nil
}
func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) {
// GetLastCommitForPaths returns last commit information
func GetLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) {
// We do a tree traversal with nodes sorted by commit time
heap := binaryheap.NewWith(func(a, b interface{}) int {
if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) {

View File

@ -70,7 +70,7 @@ func GetNote(repo *Repository, commitID string, note *Note) error {
return err
}
lastCommits, err := getLastCommitForPaths(commitNode, "", []string{path})
lastCommits, err := GetLastCommitForPaths(commitNode, "", []string{path})
if err != nil {
return err
}

View File

@ -0,0 +1,97 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"path"
"strings"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
cgobject "github.com/go-git/go-git/v5/plumbing/object/commitgraph"
)
func recusiveCache(gitRepo *git.Repository, c cgobject.CommitNode, tree *git.Tree, treePath string, ca *cache.LastCommitCache, level int) error {
if level == 0 {
return nil
}
entries, err := tree.ListEntries()
if err != nil {
return err
}
entryPaths := make([]string, len(entries))
entryMap := make(map[string]*git.TreeEntry)
for i, entry := range entries {
entryPaths[i] = entry.Name()
entryMap[entry.Name()] = entry
}
commits, err := git.GetLastCommitForPaths(c, treePath, entryPaths)
if err != nil {
return err
}
for entry, cm := range commits {
if err := ca.Put(c.ID().String(), path.Join(treePath, entry), cm.ID().String()); err != nil {
return err
}
if entryMap[entry].IsDir() {
subTree, err := tree.SubTree(entry)
if err != nil {
return err
}
if err := recusiveCache(gitRepo, c, subTree, entry, ca, level-1); err != nil {
return err
}
}
}
return nil
}
func getRefName(fullRefName string) string {
if strings.HasPrefix(fullRefName, git.TagPrefix) {
return fullRefName[len(git.TagPrefix):]
} else if strings.HasPrefix(fullRefName, git.BranchPrefix) {
return fullRefName[len(git.BranchPrefix):]
}
return ""
}
// CacheRef cachhe last commit information of the branch or the tag
func CacheRef(repo *models.Repository, gitRepo *git.Repository, fullRefName string) error {
if !setting.CacheService.LastCommit.Enabled {
return nil
}
commit, err := gitRepo.GetCommit(fullRefName)
if err != nil {
return err
}
commitsCount, err := cache.GetInt64(repo.GetCommitsCountCacheKey(getRefName(fullRefName), true), commit.CommitsCount)
if err != nil {
return err
}
if commitsCount < setting.CacheService.LastCommit.CommitsCount {
return nil
}
commitNodeIndex, _ := gitRepo.CommitNodeIndex()
c, err := commitNodeIndex.Get(commit.ID)
if err != nil {
return err
}
ca := cache.NewLastCommitCache(repo.FullName(), gitRepo, int64(setting.CacheService.LastCommit.TTL.Seconds()))
return recusiveCache(gitRepo, c, &commit.Tree, "", ca, 1)
}

View File

@ -30,7 +30,7 @@ type PushUpdateOptions struct {
PusherName string
RepoUserName string
RepoName string
RefFullName string
RefFullName string // branch, tag or other name to push
OldCommitID string
NewCommitID string
}
@ -95,11 +95,36 @@ func (opts PushUpdateOptions) BranchName() string {
return opts.RefFullName[len(git.BranchPrefix):]
}
// RefName returns simple name for ref
func (opts PushUpdateOptions) RefName() string {
if strings.HasPrefix(opts.RefFullName, git.TagPrefix) {
return opts.RefFullName[len(git.TagPrefix):]
} else if strings.HasPrefix(opts.RefFullName, git.BranchPrefix) {
return opts.RefFullName[len(git.BranchPrefix):]
}
return ""
}
// RepoFullName returns repo full name
func (opts PushUpdateOptions) RepoFullName() string {
return opts.RepoUserName + "/" + opts.RepoName
}
// isForcePush detect if a push is a force push
func isForcePush(repoPath string, opts *PushUpdateOptions) (bool, error) {
if !opts.IsUpdateBranch() {
return false, nil
}
output, err := git.NewCommand("rev-list", "--max-count=1", opts.OldCommitID, "^"+opts.NewCommitID).RunInDir(repoPath)
if err != nil {
return false, err
} else if len(output) > 0 {
return true, nil
}
return false, nil
}
// pushQueue represents a queue to handle update pull request tests
var pushQueue queue.Queue
@ -184,7 +209,6 @@ func pushUpdates(optsList []*PushUpdateOptions) error {
if opts.IsDelRef() {
delTags = append(delTags, tagName)
} else { // is new tag
cache.Remove(repo.GetCommitsCountCacheKey(tagName, true))
addTags = append(addTags, tagName)
}
} else if opts.IsBranch() { // If is branch reference
@ -197,8 +221,8 @@ func pushUpdates(optsList []*PushUpdateOptions) error {
branch := opts.BranchName()
if !opts.IsDelRef() {
// Clear cache for branch commit count
cache.Remove(repo.GetCommitsCountCacheKey(opts.BranchName(), true))
log.Trace("TriggerTask '%s/%s' by %s", repo.Name, branch, pusher.Name)
go pull_service.AddTestPullRequestTask(pusher, repo.ID, branch, true, opts.OldCommitID, opts.NewCommitID)
newCommit, err := gitRepo.GetCommit(opts.NewCommitID)
if err != nil {
@ -217,6 +241,20 @@ func pushUpdates(optsList []*PushUpdateOptions) error {
if err != nil {
return fmt.Errorf("newCommit.CommitsBeforeUntil: %v", err)
}
isForce, err := isForcePush(repo.RepoPath(), opts)
if err != nil {
log.Error("isForcePush %s/%s failed: %v", repo.ID, branch, err)
}
if isForce {
log.Trace("Push %s is a force push", opts.NewCommitID)
cache.Remove(repo.GetCommitsCountCacheKey(opts.RefName(), true))
} else {
// TODO: increment update the commit count cache but not remove
cache.Remove(repo.GetCommitsCountCacheKey(opts.RefName(), true))
}
}
commits = repo_module.ListToPushCommits(l)
@ -225,9 +263,10 @@ func pushUpdates(optsList []*PushUpdateOptions) error {
log.Error("models.RemoveDeletedBranch %s/%s failed: %v", repo.ID, branch, err)
}
log.Trace("TriggerTask '%s/%s' by %s", repo.Name, branch, pusher.Name)
go pull_service.AddTestPullRequestTask(pusher, repo.ID, branch, true, opts.OldCommitID, opts.NewCommitID)
// Cache for big repository
if err := repo_module.CacheRef(repo, gitRepo, opts.RefFullName); err != nil {
log.Error("repo_module.CacheRef %s/%s failed: %v", repo.ID, branch, err)
}
} else if err = pull_service.CloseBranchPulls(pusher, repo.ID, branch); err != nil {
// close all related pulls
log.Error("close related pull request failed: %v", err)