mirror of
https://github.com/go-gitea/gitea
synced 2025-07-22 18:28:37 +00:00
Support repo license (#24872)
Close #278 Close #24076 ## Solutions: - Use [google/licenseclassifier](https://github.com/google/licenseclassifier/) Test result between [google/licensecheck](https://github.com/google/licensecheck) and [go-license-detector](https://github.com/go-enry/go-license-detector): https://github.com/go-gitea/gitea/pull/24872#issuecomment-1560361167 Test result between [google/licensecheck](https://github.com/google/licensecheck) and [google/licenseclassifier](https://github.com/google/licenseclassifier/): https://github.com/go-gitea/gitea/pull/24872#issuecomment-1576092178 - Generate License Convert Name List to avoid import license templates with same contents Gitea automatically get latest license data from[ spdx/license-list-data](https://github.com/spdx/license-list-data). But unfortunately, some license templates have same contents. #20915 [click here to see the list](https://github.com/go-gitea/gitea/pull/24872#issuecomment-1584141684) So we will generate a list of these license templates with same contents and create a new file to save the result when using `make generate-license`. (Need to decide the save path) - Save License info into a new table `repo_license` Can easily support searching repo by license in the future. ## Screen shot Single License:  Multiple Licenses:  Triggers: - [x] Push commit to default branch - [x] Create repo - [x] Mirror repo - [x] When Default Branch is changed, licenses should be updated Todo: - [x] Save Licenses info in to DB when there's a change to license file in the commit - [x] DB Migration - [x] A nominal test? - [x] Select which library to use(https://github.com/go-gitea/gitea/pull/24872#issuecomment-1560361167) - [x] API Support - [x] Add repo license table - ~Select license in settings if there are several licenses(Not recommended)~ - License board(later, not in this PR)  --------- Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com> Co-authored-by: Denys Konovalov <kontakt@denyskon.de> Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: KN4CK3R <admin@oldschoolhack.me> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: 6543 <m.huber@kithara.com> Co-authored-by: a1012112796 <1012112796@qq.com> Co-authored-by: techknowlogick <techknowlogick@gitea.com>
This commit is contained in:
@@ -404,6 +404,13 @@ func repoAssignment(ctx *Context, repo *repo_model.Repository) {
|
||||
ctx.Data["PushMirrors"] = pushMirrors
|
||||
ctx.Data["RepoName"] = ctx.Repo.Repository.Name
|
||||
ctx.Data["IsEmptyRepo"] = ctx.Repo.Repository.IsEmpty
|
||||
|
||||
repoLicenses, err := repo_model.GetRepoLicenses(ctx, ctx.Repo.Repository)
|
||||
if err != nil {
|
||||
ctx.ServerError("GetRepoLicenses", err)
|
||||
return
|
||||
}
|
||||
ctx.Data["DetectedRepoLicenses"] = repoLicenses.StringList()
|
||||
}
|
||||
|
||||
// RepoAssignment returns a middleware to handle repository assignment
|
||||
|
@@ -175,6 +175,11 @@ func innerToRepo(ctx context.Context, repo *repo_model.Repository, permissionInR
|
||||
language = repo.PrimaryLanguage.Language
|
||||
}
|
||||
|
||||
repoLicenses, err := repo_model.GetRepoLicenses(ctx, repo)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
repoAPIURL := repo.APIURL()
|
||||
|
||||
return &api.Repository{
|
||||
@@ -238,6 +243,7 @@ func innerToRepo(ctx context.Context, repo *repo_model.Repository, permissionInR
|
||||
RepoTransfer: transfer,
|
||||
Topics: repo.Topics,
|
||||
ObjectFormatName: repo.ObjectFormatName,
|
||||
Licenses: repoLicenses.StringList(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -156,6 +156,16 @@ func registerCleanupPackages() {
|
||||
})
|
||||
}
|
||||
|
||||
func registerSyncRepoLicenses() {
|
||||
RegisterTaskFatal("sync_repo_licenses", &BaseConfig{
|
||||
Enabled: false,
|
||||
RunAtStart: false,
|
||||
Schedule: "@annually",
|
||||
}, func(ctx context.Context, _ *user_model.User, config Config) error {
|
||||
return repo_service.SyncRepoLicenses(ctx)
|
||||
})
|
||||
}
|
||||
|
||||
func initBasicTasks() {
|
||||
if setting.Mirror.Enabled {
|
||||
registerUpdateMirrorTask()
|
||||
@@ -172,4 +182,5 @@ func initBasicTasks() {
|
||||
if setting.Packages.Enabled {
|
||||
registerCleanupPackages()
|
||||
}
|
||||
registerSyncRepoLicenses()
|
||||
}
|
||||
|
@@ -26,6 +26,7 @@ import (
|
||||
"code.gitea.io/gitea/modules/optional"
|
||||
"code.gitea.io/gitea/modules/structs"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
repo_service "code.gitea.io/gitea/services/repository"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
@@ -302,6 +303,8 @@ func TestGiteaUploadUpdateGitForPullRequest(t *testing.T) {
|
||||
toRepoName := "migrated"
|
||||
uploader := NewGiteaLocalUploader(context.Background(), fromRepoOwner, fromRepoOwner.Name, toRepoName)
|
||||
uploader.gitServiceType = structs.GiteaService
|
||||
|
||||
assert.NoError(t, repo_service.Init(context.Background()))
|
||||
assert.NoError(t, uploader.CreateRepo(&base.Repository{
|
||||
Description: "description",
|
||||
OriginalURL: fromRepo.RepoPath(),
|
||||
|
@@ -24,6 +24,7 @@ import (
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
notify_service "code.gitea.io/gitea/services/notify"
|
||||
repo_service "code.gitea.io/gitea/services/repository"
|
||||
)
|
||||
|
||||
// gitShortEmptySha Git short empty SHA
|
||||
@@ -559,6 +560,14 @@ func SyncPullMirror(ctx context.Context, repoID int64) bool {
|
||||
}
|
||||
}
|
||||
|
||||
// Update License
|
||||
if err = repo_service.AddRepoToLicenseUpdaterQueue(&repo_service.LicenseUpdaterOptions{
|
||||
RepoID: m.Repo.ID,
|
||||
}); err != nil {
|
||||
log.Error("SyncMirrors [repo: %-v]: unable to add repo to license updater queue: %v", m.Repo, err)
|
||||
return false
|
||||
}
|
||||
|
||||
log.Trace("SyncMirrors [repo: %-v]: Successfully updated", m.Repo)
|
||||
|
||||
return true
|
||||
|
@@ -612,6 +612,14 @@ func SetRepoDefaultBranch(ctx context.Context, repo *repo_model.Repository, gitR
|
||||
return err
|
||||
}
|
||||
|
||||
if !repo.IsEmpty {
|
||||
if err := AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{
|
||||
RepoID: repo.ID,
|
||||
}); err != nil {
|
||||
log.Error("AddRepoToLicenseUpdaterQueue: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
notify_service.ChangeDefaultBranch(ctx, repo)
|
||||
|
||||
return nil
|
||||
|
@@ -303,6 +303,25 @@ func CreateRepositoryDirectly(ctx context.Context, doer, u *user_model.User, opt
|
||||
rollbackRepo.OwnerID = u.ID
|
||||
return fmt.Errorf("CreateRepository(git update-server-info): %w", err)
|
||||
}
|
||||
|
||||
// update licenses
|
||||
var licenses []string
|
||||
if len(opts.License) > 0 {
|
||||
licenses = append(licenses, ConvertLicenseName(opts.License))
|
||||
|
||||
stdout, _, err := git.NewCommand(ctx, "rev-parse", "HEAD").
|
||||
SetDescription(fmt.Sprintf("CreateRepository(git rev-parse HEAD): %s", repoPath)).
|
||||
RunStdString(&git.RunOpts{Dir: repoPath})
|
||||
if err != nil {
|
||||
log.Error("CreateRepository(git rev-parse HEAD) in %v: Stdout: %s\nError: %v", repo, stdout, err)
|
||||
rollbackRepo = repo
|
||||
rollbackRepo.OwnerID = u.ID
|
||||
return fmt.Errorf("CreateRepository(git rev-parse HEAD): %w", err)
|
||||
}
|
||||
if err := repo_model.UpdateRepoLicenses(ctx, repo, stdout, licenses); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
if rollbackRepo != nil {
|
||||
|
@@ -140,6 +140,7 @@ func DeleteRepositoryDirectly(ctx context.Context, doer *user_model.User, repoID
|
||||
&git_model.Branch{RepoID: repoID},
|
||||
&git_model.LFSLock{RepoID: repoID},
|
||||
&repo_model.LanguageStat{RepoID: repoID},
|
||||
&repo_model.RepoLicense{RepoID: repoID},
|
||||
&issues_model.Milestone{RepoID: repoID},
|
||||
&repo_model.Mirror{RepoID: repoID},
|
||||
&activities_model.Notification{RepoID: repoID},
|
||||
|
@@ -198,6 +198,9 @@ func ForkRepository(ctx context.Context, doer, owner *user_model.User, opts Fork
|
||||
if err := repo_model.CopyLanguageStat(ctx, opts.BaseRepo, repo); err != nil {
|
||||
log.Error("Copy language stat from oldRepo failed: %v", err)
|
||||
}
|
||||
if err := repo_model.CopyLicense(ctx, opts.BaseRepo, repo); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gitRepo, err := gitrepo.OpenRepository(ctx, repo)
|
||||
if err != nil {
|
||||
|
205
services/repository/license.go
Normal file
205
services/repository/license.go
Normal file
@@ -0,0 +1,205 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
repo_model "code.gitea.io/gitea/models/repo"
|
||||
"code.gitea.io/gitea/modules/container"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/gitrepo"
|
||||
"code.gitea.io/gitea/modules/graceful"
|
||||
"code.gitea.io/gitea/modules/json"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/options"
|
||||
"code.gitea.io/gitea/modules/queue"
|
||||
|
||||
licenseclassifier "github.com/google/licenseclassifier/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
classifier *licenseclassifier.Classifier
|
||||
LicenseFileName = "LICENSE"
|
||||
licenseAliases map[string]string
|
||||
|
||||
// licenseUpdaterQueue represents a queue to handle update repo licenses
|
||||
licenseUpdaterQueue *queue.WorkerPoolQueue[*LicenseUpdaterOptions]
|
||||
)
|
||||
|
||||
func AddRepoToLicenseUpdaterQueue(opts *LicenseUpdaterOptions) error {
|
||||
if opts == nil {
|
||||
return nil
|
||||
}
|
||||
return licenseUpdaterQueue.Push(opts)
|
||||
}
|
||||
|
||||
func loadLicenseAliases() error {
|
||||
if licenseAliases != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := options.AssetFS().ReadFile("license", "etc", "license-aliases.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = json.Unmarshal(data, &licenseAliases)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ConvertLicenseName(name string) string {
|
||||
if err := loadLicenseAliases(); err != nil {
|
||||
return name
|
||||
}
|
||||
|
||||
v, ok := licenseAliases[name]
|
||||
if ok {
|
||||
return v
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func InitLicenseClassifier() error {
|
||||
// threshold should be 0.84~0.86 or the test will be failed
|
||||
classifier = licenseclassifier.NewClassifier(.85)
|
||||
licenseFiles, err := options.AssetFS().ListFiles("license", true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
existLicense := make(container.Set[string])
|
||||
if len(licenseFiles) > 0 {
|
||||
for _, licenseFile := range licenseFiles {
|
||||
licenseName := ConvertLicenseName(licenseFile)
|
||||
if existLicense.Contains(licenseName) {
|
||||
continue
|
||||
}
|
||||
existLicense.Add(licenseName)
|
||||
data, err := options.License(licenseFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
classifier.AddContent("License", licenseFile, licenseName, data)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type LicenseUpdaterOptions struct {
|
||||
RepoID int64
|
||||
}
|
||||
|
||||
func repoLicenseUpdater(items ...*LicenseUpdaterOptions) []*LicenseUpdaterOptions {
|
||||
ctx := graceful.GetManager().ShutdownContext()
|
||||
|
||||
for _, opts := range items {
|
||||
repo, err := repo_model.GetRepositoryByID(ctx, opts.RepoID)
|
||||
if err != nil {
|
||||
log.Error("repoLicenseUpdater [%d] failed: GetRepositoryByID: %v", opts.RepoID, err)
|
||||
continue
|
||||
}
|
||||
if repo.IsEmpty {
|
||||
continue
|
||||
}
|
||||
|
||||
gitRepo, err := gitrepo.OpenRepository(ctx, repo)
|
||||
if err != nil {
|
||||
log.Error("repoLicenseUpdater [%d] failed: OpenRepository: %v", opts.RepoID, err)
|
||||
continue
|
||||
}
|
||||
defer gitRepo.Close()
|
||||
|
||||
commit, err := gitRepo.GetBranchCommit(repo.DefaultBranch)
|
||||
if err != nil {
|
||||
log.Error("repoLicenseUpdater [%d] failed: GetBranchCommit: %v", opts.RepoID, err)
|
||||
continue
|
||||
}
|
||||
if err = UpdateRepoLicenses(ctx, repo, commit); err != nil {
|
||||
log.Error("repoLicenseUpdater [%d] failed: updateRepoLicenses: %v", opts.RepoID, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func SyncRepoLicenses(ctx context.Context) error {
|
||||
log.Trace("Doing: SyncRepoLicenses")
|
||||
|
||||
if err := db.Iterate(
|
||||
ctx,
|
||||
nil,
|
||||
func(ctx context.Context, repo *repo_model.Repository) error {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return db.ErrCancelledf("before sync repo licenses for %s", repo.FullName())
|
||||
default:
|
||||
}
|
||||
return AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{RepoID: repo.ID})
|
||||
},
|
||||
); err != nil {
|
||||
log.Trace("Error: SyncRepoLicenses: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Trace("Finished: SyncReposLicenses")
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateRepoLicenses will update repository licenses col if license file exists
|
||||
func UpdateRepoLicenses(ctx context.Context, repo *repo_model.Repository, commit *git.Commit) error {
|
||||
if commit == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
b, err := commit.GetBlobByPath(LicenseFileName)
|
||||
if err != nil && !git.IsErrNotExist(err) {
|
||||
return fmt.Errorf("GetBlobByPath: %w", err)
|
||||
}
|
||||
|
||||
if git.IsErrNotExist(err) {
|
||||
return repo_model.CleanRepoLicenses(ctx, repo)
|
||||
}
|
||||
|
||||
licenses := make([]string, 0)
|
||||
if b != nil {
|
||||
r, err := b.DataAsync()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
licenses, err = detectLicense(r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("detectLicense: %w", err)
|
||||
}
|
||||
}
|
||||
return repo_model.UpdateRepoLicenses(ctx, repo, commit.ID.String(), licenses)
|
||||
}
|
||||
|
||||
// detectLicense returns the licenses detected by the given content buff
|
||||
func detectLicense(r io.Reader) ([]string, error) {
|
||||
if r == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
matches, err := classifier.MatchFrom(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(matches.Matches) > 0 {
|
||||
results := make(container.Set[string], len(matches.Matches))
|
||||
for _, r := range matches.Matches {
|
||||
if r.MatchType == "License" && !results.Contains(r.Variant) {
|
||||
results.Add(r.Variant)
|
||||
}
|
||||
}
|
||||
return results.Values(), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
73
services/repository/license_test.go
Normal file
73
services/repository/license_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
repo_module "code.gitea.io/gitea/modules/repository"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func Test_detectLicense(t *testing.T) {
|
||||
type DetectLicenseTest struct {
|
||||
name string
|
||||
arg string
|
||||
want []string
|
||||
}
|
||||
|
||||
tests := []DetectLicenseTest{
|
||||
{
|
||||
name: "empty",
|
||||
arg: "",
|
||||
want: nil,
|
||||
},
|
||||
{
|
||||
name: "no detected license",
|
||||
arg: "Copyright (c) 2023 Gitea",
|
||||
want: nil,
|
||||
},
|
||||
}
|
||||
|
||||
repo_module.LoadRepoConfig()
|
||||
err := loadLicenseAliases()
|
||||
assert.NoError(t, err)
|
||||
for _, licenseName := range repo_module.Licenses {
|
||||
license, err := repo_module.GetLicense(licenseName, &repo_module.LicenseValues{
|
||||
Owner: "Gitea",
|
||||
Email: "teabot@gitea.io",
|
||||
Repo: "gitea",
|
||||
Year: "2024",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
tests = append(tests, DetectLicenseTest{
|
||||
name: fmt.Sprintf("single license test: %s", licenseName),
|
||||
arg: string(license),
|
||||
want: []string{ConvertLicenseName(licenseName)},
|
||||
})
|
||||
}
|
||||
|
||||
err = InitLicenseClassifier()
|
||||
assert.NoError(t, err)
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
license, err := detectLicense(strings.NewReader(tt.arg))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.want, license)
|
||||
})
|
||||
}
|
||||
|
||||
result, err := detectLicense(strings.NewReader(tests[2].arg + tests[3].arg + tests[4].arg))
|
||||
assert.NoError(t, err)
|
||||
t.Run("multiple licenses test", func(t *testing.T) {
|
||||
assert.Equal(t, 3, len(result))
|
||||
assert.Contains(t, result, tests[2].want[0])
|
||||
assert.Contains(t, result, tests[3].want[0])
|
||||
assert.Contains(t, result, tests[4].want[0])
|
||||
})
|
||||
}
|
@@ -172,6 +172,11 @@ func MigrateRepositoryGitData(ctx context.Context, u *user_model.User,
|
||||
return repo, fmt.Errorf("StoreMissingLfsObjectsInRepository: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Update repo license
|
||||
if err := AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{RepoID: repo.ID}); err != nil {
|
||||
log.Error("Failed to add repo to license updater queue: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
ctx, committer, err := db.TxContext(ctx)
|
||||
|
@@ -18,6 +18,7 @@ import (
|
||||
user_model "code.gitea.io/gitea/models/user"
|
||||
"code.gitea.io/gitea/modules/graceful"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/queue"
|
||||
repo_module "code.gitea.io/gitea/modules/repository"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/structs"
|
||||
@@ -96,6 +97,12 @@ func PushCreateRepo(ctx context.Context, authUser, owner *user_model.User, repoN
|
||||
|
||||
// Init start repository service
|
||||
func Init(ctx context.Context) error {
|
||||
licenseUpdaterQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo_license_updater", repoLicenseUpdater)
|
||||
if licenseUpdaterQueue == nil {
|
||||
return fmt.Errorf("unable to create repo_license_updater queue")
|
||||
}
|
||||
go graceful.GetManager().RunWithCancel(licenseUpdaterQueue)
|
||||
|
||||
if err := repo_module.LoadRepoConfig(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
Reference in New Issue
Block a user