From 188e0ee8e40ad0b32f9db33a0a217043cfdf3610 Mon Sep 17 00:00:00 2001 From: Kemal Zebari <60799661+kemzeb@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:09:47 -0800 Subject: [PATCH] Use `Project-URL` metadata field to get a PyPI package's homepage URL (#33089) Resolves #33085. --- routers/api/packages/pypi/pypi.go | 47 ++++++++++++-- routers/api/packages/pypi/pypi_test.go | 10 +++ tests/integration/api_packages_pypi_test.go | 69 +++++++++++++++++++-- 3 files changed, 116 insertions(+), 10 deletions(-) diff --git a/routers/api/packages/pypi/pypi.go b/routers/api/packages/pypi/pypi.go index 5ea86071a9..19aa54628d 100644 --- a/routers/api/packages/pypi/pypi.go +++ b/routers/api/packages/pypi/pypi.go @@ -10,6 +10,7 @@ import ( "regexp" "sort" "strings" + "unicode" packages_model "code.gitea.io/gitea/models/packages" packages_module "code.gitea.io/gitea/modules/packages" @@ -139,9 +140,30 @@ func UploadPackageFile(ctx *context.Context) { return } - projectURL := ctx.Req.FormValue("home_page") - if !validation.IsValidURL(projectURL) { - projectURL = "" + // Ensure ctx.Req.Form exists. + _ = ctx.Req.ParseForm() + + var homepageURL string + projectURLs := ctx.Req.Form["project_urls"] + for _, purl := range projectURLs { + label, url, found := strings.Cut(purl, ",") + if !found { + continue + } + if normalizeLabel(label) != "homepage" { + continue + } + homepageURL = strings.TrimSpace(url) + break + } + + if len(homepageURL) == 0 { + // TODO: Home-page is a deprecated metadata field. Remove this branch once it's no longer apart of the spec. + homepageURL = ctx.Req.FormValue("home_page") + } + + if !validation.IsValidURL(homepageURL) { + homepageURL = "" } _, _, err = packages_service.CreatePackageOrAddFileToExisting( @@ -160,7 +182,7 @@ func UploadPackageFile(ctx *context.Context) { Description: ctx.Req.FormValue("description"), LongDescription: ctx.Req.FormValue("long_description"), Summary: ctx.Req.FormValue("summary"), - ProjectURL: projectURL, + ProjectURL: homepageURL, License: ctx.Req.FormValue("license"), RequiresPython: ctx.Req.FormValue("requires_python"), }, @@ -189,6 +211,23 @@ func UploadPackageFile(ctx *context.Context) { ctx.Status(http.StatusCreated) } +// Normalizes a Project-URL label. +// See https://packaging.python.org/en/latest/specifications/well-known-project-urls/#label-normalization. +func normalizeLabel(label string) string { + var builder strings.Builder + + // "A label is normalized by deleting all ASCII punctuation and whitespace, and then converting the result + // to lowercase." + for _, r := range label { + if unicode.IsPunct(r) || unicode.IsSpace(r) { + continue + } + builder.WriteRune(unicode.ToLower(r)) + } + + return builder.String() +} + func isValidNameAndVersion(packageName, packageVersion string) bool { return nameMatcher.MatchString(packageName) && versionMatcher.MatchString(packageVersion) } diff --git a/routers/api/packages/pypi/pypi_test.go b/routers/api/packages/pypi/pypi_test.go index 3023692177..786105693f 100644 --- a/routers/api/packages/pypi/pypi_test.go +++ b/routers/api/packages/pypi/pypi_test.go @@ -36,3 +36,13 @@ func TestIsValidNameAndVersion(t *testing.T) { assert.False(t, isValidNameAndVersion("test-name", "1.0.1aa")) assert.False(t, isValidNameAndVersion("test-name", "1.0.0-alpha.beta")) } + +func TestNormalizeLabel(t *testing.T) { + // Cases fetched from https://packaging.python.org/en/latest/specifications/well-known-project-urls/#label-normalization. + assert.Equal(t, "homepage", normalizeLabel("Homepage")) + assert.Equal(t, "homepage", normalizeLabel("Home-page")) + assert.Equal(t, "homepage", normalizeLabel("Home page")) + assert.Equal(t, "changelog", normalizeLabel("Change_Log")) + assert.Equal(t, "whatsnew", normalizeLabel("What's New?")) + assert.Equal(t, "github", normalizeLabel("github")) +} diff --git a/tests/integration/api_packages_pypi_test.go b/tests/integration/api_packages_pypi_test.go index e973f6a52a..2dabb5005b 100644 --- a/tests/integration/api_packages_pypi_test.go +++ b/tests/integration/api_packages_pypi_test.go @@ -32,15 +32,16 @@ func TestPackagePyPI(t *testing.T) { packageVersion := "1!1.0.1+r1234" packageAuthor := "KN4CK3R" packageDescription := "Test Description" + projectURL := "https://example.com" content := "test" hashSHA256 := "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" root := fmt.Sprintf("/api/packages/%s/pypi", user.Name) - uploadFile := func(t *testing.T, filename, content string, expectedStatus int) { - body := &bytes.Buffer{} - writer := multipart.NewWriter(body) + createBasicMultipartFile := func(filename, packageName, content string) (body *bytes.Buffer, writer *multipart.Writer, closer func() error) { + body = &bytes.Buffer{} + writer = multipart.NewWriter(body) part, _ := writer.CreateFormFile("content", filename) _, _ = io.Copy(part, strings.NewReader(content)) @@ -52,14 +53,27 @@ func TestPackagePyPI(t *testing.T) { writer.WriteField("sha256_digest", hashSHA256) writer.WriteField("requires_python", "3.6") - _ = writer.Close() + return body, writer, writer.Close + } + uploadHelper := func(t *testing.T, body *bytes.Buffer, contentType string, expectedStatus int) { req := NewRequestWithBody(t, "POST", root, body). - SetHeader("Content-Type", writer.FormDataContentType()). + SetHeader("Content-Type", contentType). AddBasicAuth(user.Name) MakeRequest(t, req, expectedStatus) } + uploadFile := func(t *testing.T, filename, content string, expectedStatus int) { + body, writer, closeFunc := createBasicMultipartFile(filename, packageName, content) + + writer.WriteField("project_urls", "DOCUMENTATION , https://readthedocs.org") + writer.WriteField("project_urls", fmt.Sprintf("Home-page, %s", projectURL)) + + _ = closeFunc() + + uploadHelper(t, body, writer.FormDataContentType(), expectedStatus) + } + t.Run("Upload", func(t *testing.T) { defer tests.PrintCurrentTest(t)() @@ -74,6 +88,7 @@ func TestPackagePyPI(t *testing.T) { assert.NoError(t, err) assert.Nil(t, pd.SemVer) assert.IsType(t, &pypi.Metadata{}, pd.Metadata) + assert.Equal(t, projectURL, pd.Metadata.(*pypi.Metadata).ProjectURL) assert.Equal(t, packageName, pd.Package.Name) assert.Equal(t, packageVersion, pd.Version.Version) @@ -133,6 +148,48 @@ func TestPackagePyPI(t *testing.T) { uploadFile(t, "test.tar.gz", content, http.StatusConflict) }) + t.Run("UploadUsingDeprecatedHomepageMetadata", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + pkgName := "homepage-package" + body, writer, closeFunc := createBasicMultipartFile("test.whl", pkgName, content) + + writer.WriteField("home_page", projectURL) + + _ = closeFunc() + + uploadHelper(t, body, writer.FormDataContentType(), http.StatusCreated) + + pvs, err := packages.GetVersionsByPackageName(db.DefaultContext, user.ID, packages.TypePyPI, pkgName) + assert.NoError(t, err) + assert.Len(t, pvs, 1) + + pd, err := packages.GetPackageDescriptor(db.DefaultContext, pvs[0]) + assert.NoError(t, err) + assert.IsType(t, &pypi.Metadata{}, pd.Metadata) + assert.Equal(t, projectURL, pd.Metadata.(*pypi.Metadata).ProjectURL) + }) + + t.Run("UploadWithoutAnyHomepageURLMetadata", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + pkgName := "no-project-url-or-homepage-package" + body, writer, closeFunc := createBasicMultipartFile("test.whl", pkgName, content) + + _ = closeFunc() + + uploadHelper(t, body, writer.FormDataContentType(), http.StatusCreated) + + pvs, err := packages.GetVersionsByPackageName(db.DefaultContext, user.ID, packages.TypePyPI, pkgName) + assert.NoError(t, err) + assert.Len(t, pvs, 1) + + pd, err := packages.GetPackageDescriptor(db.DefaultContext, pvs[0]) + assert.NoError(t, err) + assert.IsType(t, &pypi.Metadata{}, pd.Metadata) + assert.Empty(t, pd.Metadata.(*pypi.Metadata).ProjectURL) + }) + t.Run("Download", func(t *testing.T) { defer tests.PrintCurrentTest(t)() @@ -147,7 +204,7 @@ func TestPackagePyPI(t *testing.T) { downloadFile("test.whl") downloadFile("test.tar.gz") - pvs, err := packages.GetVersionsByPackageType(db.DefaultContext, user.ID, packages.TypePyPI) + pvs, err := packages.GetVersionsByPackageName(db.DefaultContext, user.ID, packages.TypePyPI, packageName) assert.NoError(t, err) assert.Len(t, pvs, 1) assert.Equal(t, int64(2), pvs[0].DownloadCount)