mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-26 00:48:29 +00:00 
			
		
		
		
	improve performance of diffs (#32393)
This has two major changes that significantly reduce the amount of work done for large diffs: * Kill a running git process when reaching the maximum number of files in a diff, preventing it from processing the entire diff. * When loading a diff with the URL param `file-only=true`, skip loading stats. This speeds up loading both hidden files of a diff and sections of a diff when clicking the "Show More" button. A couple of minor things from profiling are also included: * Reuse existing repo in `PrepareViewPullInfo` if head and base are the same. The performance impact is going to depend heavily on the individual diff and the hardware it runs on, but when testing locally on a diff changing 100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction in time to load the result of "Show More" --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
		| @@ -328,6 +328,7 @@ func Diff(ctx *context.Context) { | |||||||
| 		MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters, | 		MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters, | ||||||
| 		MaxFiles:           maxFiles, | 		MaxFiles:           maxFiles, | ||||||
| 		WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)), | 		WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)), | ||||||
|  | 		FileOnly:           fileOnly, | ||||||
| 	}, files...) | 	}, files...) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		ctx.NotFound("GetDiff", err) | 		ctx.NotFound("GetDiff", err) | ||||||
|   | |||||||
| @@ -611,6 +611,8 @@ func PrepareCompareDiff( | |||||||
| 		maxLines, maxFiles = -1, -1 | 		maxLines, maxFiles = -1, -1 | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	fileOnly := ctx.FormBool("file-only") | ||||||
|  |  | ||||||
| 	diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo, | 	diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo, | ||||||
| 		&gitdiff.DiffOptions{ | 		&gitdiff.DiffOptions{ | ||||||
| 			BeforeCommitID:     beforeCommitID, | 			BeforeCommitID:     beforeCommitID, | ||||||
| @@ -621,6 +623,7 @@ func PrepareCompareDiff( | |||||||
| 			MaxFiles:           maxFiles, | 			MaxFiles:           maxFiles, | ||||||
| 			WhitespaceBehavior: whitespaceBehavior, | 			WhitespaceBehavior: whitespaceBehavior, | ||||||
| 			DirectComparison:   ci.DirectComparison, | 			DirectComparison:   ci.DirectComparison, | ||||||
|  | 			FileOnly:           fileOnly, | ||||||
| 		}, ctx.FormStrings("files")...) | 		}, ctx.FormStrings("files")...) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err) | 		ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err) | ||||||
|   | |||||||
| @@ -395,12 +395,12 @@ func PrepareViewPullInfo(ctx *context.Context, issue *issues_model.Issue) *git.C | |||||||
| 	var headBranchSha string | 	var headBranchSha string | ||||||
| 	// HeadRepo may be missing | 	// HeadRepo may be missing | ||||||
| 	if pull.HeadRepo != nil { | 	if pull.HeadRepo != nil { | ||||||
| 		headGitRepo, err := gitrepo.OpenRepository(ctx, pull.HeadRepo) | 		headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pull.HeadRepo) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			ctx.ServerError("OpenRepository", err) | 			ctx.ServerError("RepositoryFromContextOrOpen", err) | ||||||
| 			return nil | 			return nil | ||||||
| 		} | 		} | ||||||
| 		defer headGitRepo.Close() | 		defer closer.Close() | ||||||
|  |  | ||||||
| 		if pull.Flow == issues_model.PullRequestFlowGithub { | 		if pull.Flow == issues_model.PullRequestFlowGithub { | ||||||
| 			headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch) | 			headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch) | ||||||
| @@ -747,6 +747,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi | |||||||
| 		MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters, | 		MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters, | ||||||
| 		MaxFiles:           maxFiles, | 		MaxFiles:           maxFiles, | ||||||
| 		WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)), | 		WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)), | ||||||
|  | 		FileOnly:           fileOnly, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if !willShowSpecifiedCommit { | 	if !willShowSpecifiedCommit { | ||||||
|   | |||||||
| @@ -380,18 +380,11 @@ func (diffFile *DiffFile) GetType() int { | |||||||
| } | } | ||||||
|  |  | ||||||
| // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file | // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file | ||||||
| func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection { | func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommit, rightCommit *git.Commit) *DiffSection { | ||||||
| 	if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { | 	if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
| 	leftCommit, err := gitRepo.GetCommit(leftCommitID) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	rightCommit, err := gitRepo.GetCommit(rightCommitID) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	lastSection := diffFile.Sections[len(diffFile.Sections)-1] | 	lastSection := diffFile.Sections[len(diffFile.Sections)-1] | ||||||
| 	lastLine := lastSection.Lines[len(lastSection.Lines)-1] | 	lastLine := lastSection.Lines[len(lastSection.Lines)-1] | ||||||
| 	leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) | 	leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) | ||||||
| @@ -536,11 +529,6 @@ parsingLoop: | |||||||
| 			lastFile := createDiffFile(diff, line) | 			lastFile := createDiffFile(diff, line) | ||||||
| 			diff.End = lastFile.Name | 			diff.End = lastFile.Name | ||||||
| 			diff.IsIncomplete = true | 			diff.IsIncomplete = true | ||||||
| 			_, err := io.Copy(io.Discard, reader) |  | ||||||
| 			if err != nil { |  | ||||||
| 				// By the definition of io.Copy this never returns io.EOF |  | ||||||
| 				return diff, fmt.Errorf("error during io.Copy: %w", err) |  | ||||||
| 			} |  | ||||||
| 			break parsingLoop | 			break parsingLoop | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -1101,6 +1089,7 @@ type DiffOptions struct { | |||||||
| 	MaxFiles           int | 	MaxFiles           int | ||||||
| 	WhitespaceBehavior git.TrustedCmdArgs | 	WhitespaceBehavior git.TrustedCmdArgs | ||||||
| 	DirectComparison   bool | 	DirectComparison   bool | ||||||
|  | 	FileOnly           bool | ||||||
| } | } | ||||||
|  |  | ||||||
| // GetDiff builds a Diff between two commits of a repository. | // GetDiff builds a Diff between two commits of a repository. | ||||||
| @@ -1109,12 +1098,16 @@ type DiffOptions struct { | |||||||
| func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { | func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { | ||||||
| 	repoPath := gitRepo.Path | 	repoPath := gitRepo.Path | ||||||
|  |  | ||||||
|  | 	var beforeCommit *git.Commit | ||||||
| 	commit, err := gitRepo.GetCommit(opts.AfterCommitID) | 	commit, err := gitRepo.GetCommit(opts.AfterCommitID) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	cmdDiff := git.NewCommand(gitRepo.Ctx) | 	cmdCtx, cmdCancel := context.WithCancel(ctx) | ||||||
|  | 	defer cmdCancel() | ||||||
|  |  | ||||||
|  | 	cmdDiff := git.NewCommand(cmdCtx) | ||||||
| 	objectFormat, err := gitRepo.GetObjectFormat() | 	objectFormat, err := gitRepo.GetObjectFormat() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| @@ -1136,6 +1129,12 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi | |||||||
| 			AddArguments(opts.WhitespaceBehavior...). | 			AddArguments(opts.WhitespaceBehavior...). | ||||||
| 			AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID) | 			AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID) | ||||||
| 		opts.BeforeCommitID = actualBeforeCommitID | 		opts.BeforeCommitID = actualBeforeCommitID | ||||||
|  |  | ||||||
|  | 		var err error | ||||||
|  | 		beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file | 	// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file | ||||||
| @@ -1163,14 +1162,16 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi | |||||||
| 			Dir:     repoPath, | 			Dir:     repoPath, | ||||||
| 			Stdout:  writer, | 			Stdout:  writer, | ||||||
| 			Stderr:  stderr, | 			Stderr:  stderr, | ||||||
| 		}); err != nil { | 		}); err != nil && err.Error() != "signal: killed" { | ||||||
| 			log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String()) | 			log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String()) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		_ = writer.Close() | 		_ = writer.Close() | ||||||
| 	}() | 	}() | ||||||
|  |  | ||||||
| 	diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile) | 	diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile) | ||||||
|  | 	// Ensure the git process is killed if it didn't exit already | ||||||
|  | 	cmdCancel() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, fmt.Errorf("unable to ParsePatch: %w", err) | 		return nil, fmt.Errorf("unable to ParsePatch: %w", err) | ||||||
| 	} | 	} | ||||||
| @@ -1205,37 +1206,28 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi | |||||||
| 		} | 		} | ||||||
| 		diffFile.IsGenerated = isGenerated.Value() | 		diffFile.IsGenerated = isGenerated.Value() | ||||||
|  |  | ||||||
| 		tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID) | 		tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit) | ||||||
| 		if tailSection != nil { | 		if tailSection != nil { | ||||||
| 			diffFile.Sections = append(diffFile.Sections, tailSection) | 			diffFile.Sections = append(diffFile.Sections, tailSection) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	separator := "..." | 	if opts.FileOnly { | ||||||
| 	if opts.DirectComparison { | 		return diff, nil | ||||||
| 		separator = ".." |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID} | 	stats, err := GetPullDiffStats(gitRepo, opts) | ||||||
| 	if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() { |  | ||||||
| 		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID} |  | ||||||
| 	} |  | ||||||
| 	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) |  | ||||||
| 	if err != nil && strings.Contains(err.Error(), "no merge base") { |  | ||||||
| 		// git >= 2.28 now returns an error if base and head have become unrelated. |  | ||||||
| 		// previously it would return the results of git diff --shortstat base head so let's try that... |  | ||||||
| 		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID} |  | ||||||
| 		diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) |  | ||||||
| 	} |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion = stats.NumFiles, stats.TotalAddition, stats.TotalDeletion | ||||||
|  |  | ||||||
| 	return diff, nil | 	return diff, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| type PullDiffStats struct { | type PullDiffStats struct { | ||||||
| 	TotalAddition, TotalDeletion int | 	NumFiles, TotalAddition, TotalDeletion int | ||||||
| } | } | ||||||
|  |  | ||||||
| // GetPullDiffStats | // GetPullDiffStats | ||||||
| @@ -1259,12 +1251,12 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat | |||||||
| 		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID} | 		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) | 	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) | ||||||
| 	if err != nil && strings.Contains(err.Error(), "no merge base") { | 	if err != nil && strings.Contains(err.Error(), "no merge base") { | ||||||
| 		// git >= 2.28 now returns an error if base and head have become unrelated. | 		// git >= 2.28 now returns an error if base and head have become unrelated. | ||||||
| 		// previously it would return the results of git diff --shortstat base head so let's try that... | 		// previously it would return the results of git diff --shortstat base head so let's try that... | ||||||
| 		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID} | 		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID} | ||||||
| 		_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) | 		diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) | ||||||
| 	} | 	} | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
|   | |||||||
| @@ -358,6 +358,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) { | |||||||
| 			Stderr:  stderr, | 			Stderr:  stderr, | ||||||
| 			PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { | 			PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { | ||||||
| 				_ = stdoutWriter.Close() | 				_ = stdoutWriter.Close() | ||||||
|  | 				defer cancel() | ||||||
| 				diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "") | 				diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "") | ||||||
| 				if finalErr != nil { | 				if finalErr != nil { | ||||||
| 					log.Error("ParsePatch: %v", finalErr) | 					log.Error("ParsePatch: %v", finalErr) | ||||||
| @@ -371,11 +372,15 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) { | |||||||
| 			log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr) | 			log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr) | ||||||
| 			return nil, finalErr | 			return nil, finalErr | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		// If the process exited early, don't error | ||||||
|  | 		if err != context.Canceled { | ||||||
| 			log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s", | 			log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s", | ||||||
| 				t.repo.FullName(), t.basePath, err, stderr) | 				t.repo.FullName(), t.basePath, err, stderr) | ||||||
| 			return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s", | 			return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s", | ||||||
| 				t.repo.FullName(), err, stderr) | 				t.repo.FullName(), err, stderr) | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD") | 	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD") | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user