1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 10:18:38 +00:00

Add doctor command for full GC of LFS (#21978)

The recent PR adding orphaned checks to the LFS storage is not
sufficient to completely GC LFS, as it is possible for LFSMetaObjects to
remain associated with repos but still need to be garbage collected.

Imagine a situation where a branch is uploaded containing LFS files but
that branch is later completely deleted. The LFSMetaObjects will remain
associated with the Repository but the Repository will no longer contain
any pointers to the object.

This PR adds a second doctor command to perform a full GC.

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath
2022-12-15 20:44:16 +00:00
committed by GitHub
parent 3243dbe1a9
commit 651fe4bb7d
5 changed files with 245 additions and 39 deletions

View File

@@ -22,8 +22,8 @@ import (
"xorm.io/builder"
)
// GitFsck calls 'git fsck' to check repository health.
func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
// GitFsckRepos calls 'git fsck' to check repository health.
func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Doing: GitFsck")
if err := db.Iterate(
@@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return db.ErrCancelledf("before fsck of %s", repo.FullName())
default:
}
log.Trace("Running health check on repository %v", repo)
repoPath := repo.RepoPath()
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
log.Warn("Failed to health check repository (%v): %v", repo, err)
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
}
return nil
return GitFsckRepo(ctx, repo, timeout, args)
},
); err != nil {
log.Trace("Error: GitFsck: %v", err)
@@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return nil
}
// GitFsckRepo calls 'git fsck' to check an individual repository's health.
func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Running health check on repository %-v", repo)
repoPath := repo.RepoPath()
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
log.Warn("Failed to health check repository (%-v): %v", repo, err)
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
}
return nil
}
// GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error {
log.Trace("Doing: GitGcRepos")
@@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return db.ErrCancelledf("before GC of %s", repo.FullName())
default:
}
log.Trace("Running git gc on %v", repo)
command := git.NewCommand(ctx, args...).
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
var stdout string
var err error
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
if err != nil {
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}
// Now update the size of the repository
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}
return nil
return GitGcRepo(ctx, repo, timeout, args)
},
); err != nil {
return err
@@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return nil
}
// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Running git gc on %-v", repo)
command := git.NewCommand(ctx, args...).
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
var stdout string
var err error
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})
if err != nil {
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}
// Now update the size of the repository
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}
return nil
}
func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) {
repos := make([]*repo_model.Repository, 0, 10)
if err := db.Iterate(