diff --git a/modules/setting/repository.go b/modules/setting/repository.go index 318cf41108..90c4f22ad2 100644 --- a/modules/setting/repository.go +++ b/modules/setting/repository.go @@ -54,6 +54,12 @@ var ( AllowForkWithoutMaximumLimit bool AllowForkIntoSameOwner bool + // StreamArchives makes Gitea stream git archive files to the client directly instead of creating an archive first. + // Ideally all users should use this streaming method. However, at the moment we don't know whether there are + // any users who still need the old behavior, so we introduce this option, intentionally not documenting it. + // After one or two releases, if no one complains, we will remove this option and always use streaming. + StreamArchives bool + // Repository editor settings Editor struct { LineWrapExtensions []string @@ -167,6 +173,7 @@ var ( DisableStars: false, DefaultBranch: "main", AllowForkWithoutMaximumLimit: true, + StreamArchives: true, // Repository editor settings Editor: struct { diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index bada1f6b76..66afede218 100644 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -1247,7 +1247,7 @@ func Routes() *web.Router { }, reqToken()) m.Get("/raw/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFile) m.Get("/media/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFileOrLFS) - m.Methods("HEAD,GET", "/archive/*", reqRepoReader(unit.TypeCode), repo.GetArchive) + m.Methods("HEAD,GET", "/archive/*", reqRepoReader(unit.TypeCode), context.ReferencesGitRepo(true), repo.GetArchive) m.Combo("/forks").Get(repo.ListForks). Post(reqToken(), reqRepoReader(unit.TypeCode), bind(api.CreateForkOption{}), repo.CreateFork) m.Post("/merge-upstream", reqToken(), mustNotBeArchived, reqRepoWriter(unit.TypeCode), bind(api.MergeUpstreamRequest{}), repo.MergeUpstream) @@ -1466,7 +1466,7 @@ func Routes() *web.Router { m.Delete("", repo.DeleteAvatar) }, reqAdmin(), reqToken()) - m.Methods("HEAD,GET", "/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), repo.DownloadArchive) + m.Methods("HEAD,GET", "/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), context.ReferencesGitRepo(true), repo.DownloadArchive) }, repoAssignment(), checkTokenPublicOnly()) }, tokenRequiresScopes(auth_model.AccessTokenScopeCategoryRepository)) diff --git a/routers/api/v1/repo/download.go b/routers/api/v1/repo/download.go index acd93ecf2e..c16605d6cd 100644 --- a/routers/api/v1/repo/download.go +++ b/routers/api/v1/repo/download.go @@ -4,14 +4,29 @@ package repo import ( + "errors" "net/http" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/services/context" archiver_service "code.gitea.io/gitea/services/repository/archiver" ) +func serveRepoArchive(ctx *context.APIContext, reqFileName string) { + aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, reqFileName) + if err != nil { + if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) { + ctx.APIError(http.StatusBadRequest, err) + } else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) { + ctx.APIError(http.StatusNotFound, err) + } else { + ctx.APIErrorInternal(err) + } + return + } + archiver_service.ServeRepoArchive(ctx.Base, ctx.Repo.Repository, ctx.Repo.GitRepo, aReq) +} + func DownloadArchive(ctx *context.APIContext) { var tp git.ArchiveType switch ballType := ctx.PathParam("ball_type"); ballType { @@ -25,27 +40,5 @@ func DownloadArchive(ctx *context.APIContext) { ctx.APIError(http.StatusBadRequest, "Unknown archive type: "+ballType) return } - - if ctx.Repo.GitRepo == nil { - var err error - ctx.Repo.GitRepo, err = gitrepo.RepositoryFromRequestContextOrOpen(ctx, ctx.Repo.Repository) - if err != nil { - ctx.APIErrorInternal(err) - return - } - } - - r, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*")+"."+tp.String()) - if err != nil { - ctx.APIErrorInternal(err) - return - } - - archive, err := r.Await(ctx) - if err != nil { - ctx.APIErrorInternal(err) - return - } - - download(ctx, r.GetArchiveName(), archive) + serveRepoArchive(ctx, ctx.PathParam("*")+"."+tp.String()) } diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index a85dda79d0..cd787b9da3 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -15,9 +15,7 @@ import ( "time" git_model "code.gitea.io/gitea/models/git" - repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/httpcache" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/lfs" @@ -31,7 +29,6 @@ import ( "code.gitea.io/gitea/routers/common" "code.gitea.io/gitea/services/context" pull_service "code.gitea.io/gitea/services/pull" - archiver_service "code.gitea.io/gitea/services/repository/archiver" files_service "code.gitea.io/gitea/services/repository/files" ) @@ -282,74 +279,7 @@ func GetArchive(ctx *context.APIContext) { // "404": // "$ref": "#/responses/notFound" - if ctx.Repo.GitRepo == nil { - var err error - ctx.Repo.GitRepo, err = gitrepo.RepositoryFromRequestContextOrOpen(ctx, ctx.Repo.Repository) - if err != nil { - ctx.APIErrorInternal(err) - return - } - } - - archiveDownload(ctx) -} - -func archiveDownload(ctx *context.APIContext) { - aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*")) - if err != nil { - if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) { - ctx.APIError(http.StatusBadRequest, err) - } else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) { - ctx.APIError(http.StatusNotFound, err) - } else { - ctx.APIErrorInternal(err) - } - return - } - - archiver, err := aReq.Await(ctx) - if err != nil { - ctx.APIErrorInternal(err) - return - } - - download(ctx, aReq.GetArchiveName(), archiver) -} - -func download(ctx *context.APIContext, archiveName string, archiver *repo_model.RepoArchiver) { - downloadName := ctx.Repo.Repository.Name + "-" + archiveName - - // Add nix format link header so tarballs lock correctly: - // https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md - ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`, - ctx.Repo.Repository.APIURL(), - archiver.CommitID, - archiver.Type.String(), - archiver.CommitID, - )) - - rPath := archiver.RelativePath() - if setting.RepoArchive.Storage.ServeDirect() { - // If we have a signed url (S3, object storage), redirect to this directly. - u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil) - if u != nil && err == nil { - ctx.Redirect(u.String()) - return - } - } - - // If we have matched and access to release or issue - fr, err := storage.RepoArchives.Open(rPath) - if err != nil { - ctx.APIErrorInternal(err) - return - } - defer fr.Close() - - ctx.ServeContent(fr, &context.ServeHeaderOptions{ - Filename: downloadName, - LastModified: archiver.CreatedUnix.AsLocalTime(), - }) + serveRepoArchive(ctx, ctx.PathParam("*")) } // GetEditorconfig get editor config of a repository diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index 1b700aa6da..2c137b64ab 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -24,7 +24,6 @@ import ( "code.gitea.io/gitea/modules/optional" repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/storage" api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/templates" "code.gitea.io/gitea/modules/util" @@ -376,53 +375,19 @@ func Download(ctx *context.Context) { } return } - - archiver, err := aReq.Await(ctx) - if err != nil { - ctx.ServerError("archiver.Await", err) - return - } - - download(ctx, aReq.GetArchiveName(), archiver) -} - -func download(ctx *context.Context, archiveName string, archiver *repo_model.RepoArchiver) { - downloadName := ctx.Repo.Repository.Name + "-" + archiveName - - // Add nix format link header so tarballs lock correctly: - // https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md - ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.tar.gz?rev=%s>; rel="immutable"`, - ctx.Repo.Repository.APIURL(), - archiver.CommitID, archiver.CommitID)) - - rPath := archiver.RelativePath() - if setting.RepoArchive.Storage.ServeDirect() { - // If we have a signed url (S3, object storage), redirect to this directly. - u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil) - if u != nil && err == nil { - ctx.Redirect(u.String()) - return - } - } - - // If we have matched and access to release or issue - fr, err := storage.RepoArchives.Open(rPath) - if err != nil { - ctx.ServerError("Open", err) - return - } - defer fr.Close() - - ctx.ServeContent(fr, &context.ServeHeaderOptions{ - Filename: downloadName, - LastModified: archiver.CreatedUnix.AsLocalTime(), - }) + archiver_service.ServeRepoArchive(ctx.Base, ctx.Repo.Repository, ctx.Repo.GitRepo, aReq) } // InitiateDownload will enqueue an archival request, as needed. It may submit // a request that's already in-progress, but the archiver service will just // kind of drop it on the floor if this is the case. func InitiateDownload(ctx *context.Context) { + if setting.Repository.StreamArchives { + ctx.JSON(http.StatusOK, map[string]any{ + "complete": true, + }) + return + } aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.PathParam("*")) if err != nil { ctx.HTTPError(http.StatusBadRequest, "invalid archive request") diff --git a/services/repository/archiver/archiver.go b/services/repository/archiver/archiver.go index a657e3884c..0aa8fd596b 100644 --- a/services/repository/archiver/archiver.go +++ b/services/repository/archiver/archiver.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "io" + "net/http" "os" "strings" "time" @@ -17,11 +18,13 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/httplib" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" + gitea_context "code.gitea.io/gitea/services/context" ) // ArchiveRequest defines the parameters of an archive request, which notably @@ -138,6 +141,25 @@ func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver } } +// Stream satisfies the ArchiveRequest being passed in. Processing +// will occur directly in this routine. +func (aReq *ArchiveRequest) Stream(ctx context.Context, gitRepo *git.Repository, w io.Writer) error { + if aReq.Type == git.ArchiveBundle { + return gitRepo.CreateBundle( + ctx, + aReq.CommitID, + w, + ) + } + return gitRepo.CreateArchive( + ctx, + aReq.Type, + w, + setting.Repository.PrefixArchiveFiles, + aReq.CommitID, + ) +} + // doArchive satisfies the ArchiveRequest being passed in. Processing // will occur in a separate goroutine, as this phase may take a while to // complete. If the archive already exists, doArchive will not do @@ -204,31 +226,17 @@ func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver } defer gitRepo.Close() - go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) { + go func(done chan error, w *io.PipeWriter, archiveReq *ArchiveRequest, gitRepo *git.Repository) { defer func() { if r := recover(); r != nil { done <- fmt.Errorf("%v", r) } }() - if archiver.Type == git.ArchiveBundle { - err = gitRepo.CreateBundle( - ctx, - archiver.CommitID, - w, - ) - } else { - err = gitRepo.CreateArchive( - ctx, - archiver.Type, - w, - setting.Repository.PrefixArchiveFiles, - archiver.CommitID, - ) - } + err := archiveReq.Stream(ctx, gitRepo, w) _ = w.CloseWithError(err) done <- err - }(done, w, archiver, gitRepo) + }(done, w, r, gitRepo) // TODO: add lfs data to zip // TODO: add submodule data to zip @@ -338,3 +346,54 @@ func DeleteRepositoryArchives(ctx context.Context) error { } return storage.Clean(storage.RepoArchives) } + +func ServeRepoArchive(ctx *gitea_context.Base, repo *repo_model.Repository, gitRepo *git.Repository, archiveReq *ArchiveRequest) { + // Add nix format link header so tarballs lock correctly: + // https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md + ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`, + repo.APIURL(), + archiveReq.CommitID, + archiveReq.Type.String(), + archiveReq.CommitID, + )) + downloadName := repo.Name + "-" + archiveReq.GetArchiveName() + + if setting.Repository.StreamArchives { + httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{Filename: downloadName}) + if err := archiveReq.Stream(ctx, gitRepo, ctx.Resp); err != nil && !ctx.Written() { + log.Error("Archive %v streaming failed: %v", archiveReq, err) + ctx.HTTPError(http.StatusInternalServerError) + } + return + } + + archiver, err := archiveReq.Await(ctx) + if err != nil { + log.Error("Archive %v await failed: %v", archiveReq, err) + ctx.HTTPError(http.StatusInternalServerError) + return + } + + rPath := archiver.RelativePath() + if setting.RepoArchive.Storage.ServeDirect() { + // If we have a signed url (S3, object storage), redirect to this directly. + u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil) + if u != nil && err == nil { + ctx.Redirect(u.String()) + return + } + } + + fr, err := storage.RepoArchives.Open(rPath) + if err != nil { + log.Error("Archive %v open file failed: %v", archiveReq, err) + ctx.HTTPError(http.StatusInternalServerError) + return + } + defer fr.Close() + + ctx.ServeContent(fr, &gitea_context.ServeHeaderOptions{ + Filename: downloadName, + LastModified: archiver.CreatedUnix.AsLocalTime(), + }) +}