Refactor parseSignatureFromCommitLine (#29054)

Replace #28849. Thanks to @yp05327 for the looking into the problem.
Fix #28840

The old behavior of newSignatureFromCommitline is not right. The new
parseSignatureFromCommitLine:

1. never fails
2. only accept one format (if there is any other, it could be easily added)

And add some tests.
This commit is contained in:
wxiaoguang 2024-02-09 11:02:53 +08:00 committed by GitHub
parent da2f03750f
commit a24e1da7e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 104 additions and 149 deletions

View File

@ -271,7 +271,7 @@ func GetLatestCommitTime(ctx context.Context, repoPath string) (time.Time, error
return time.Time{}, err
}
commitTime := strings.TrimSpace(stdout)
return time.Parse(GitTimeLayout, commitTime)
return time.Parse("Mon Jan _2 15:04:05 2006 -0700", commitTime)
}
// DivergeObject represents commit count diverging commits

View File

@ -183,11 +183,7 @@ func parseTagRef(objectFormat ObjectFormat, ref map[string]string) (tag *Tag, er
}
}
tag.Tagger, err = newSignatureFromCommitline([]byte(ref["creator"]))
if err != nil {
return nil, fmt.Errorf("parse tagger: %w", err)
}
tag.Tagger = parseSignatureFromCommitLine(ref["creator"])
tag.Message = ref["contents"]
// strip PGP signature if present in contents field
pgpStart := strings.Index(tag.Message, beginpgp)

View File

@ -227,7 +227,7 @@ func TestRepository_parseTagRef(t *testing.T) {
ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Type: "commit",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"),
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil,
},
@ -256,7 +256,7 @@ func TestRepository_parseTagRef(t *testing.T) {
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"),
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil,
},
@ -314,7 +314,7 @@ qbHDASXl
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseAuthorLine(t, "Foo Bar <foo@bar.com> 1565789218 +0300"),
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md",
Signature: &CommitGPGSignature{
Signature: `-----BEGIN PGP SIGNATURE-----
@ -363,14 +363,3 @@ Add changelog of v1.9.1 (#7859)
})
}
}
func parseAuthorLine(t *testing.T, committer string) *Signature {
t.Helper()
sig, err := newSignatureFromCommitline([]byte(committer))
if err != nil {
t.Fatalf("parse author line '%s': %v", committer, err)
}
return sig
}

View File

@ -4,7 +4,46 @@
package git
const (
// GitTimeLayout is the (default) time layout used by git.
GitTimeLayout = "Mon Jan _2 15:04:05 2006 -0700"
import (
"strconv"
"strings"
"time"
"code.gitea.io/gitea/modules/log"
)
// Helper to get a signature from the commit line, which looks like:
//
// full name <user@example.com> 1378823654 +0200
//
// Haven't found the official reference for the standard format yet.
// This function never fails, if the "line" can't be parsed, it returns a default Signature with "zero" time.
func parseSignatureFromCommitLine(line string) *Signature {
sig := &Signature{}
s1, sx, ok1 := strings.Cut(line, " <")
s2, s3, ok2 := strings.Cut(sx, "> ")
if !ok1 || !ok2 {
sig.Name = line
return sig
}
sig.Name, sig.Email = s1, s2
if strings.Count(s3, " ") == 1 {
ts, tz, _ := strings.Cut(s3, " ")
seconds, _ := strconv.ParseInt(ts, 10, 64)
if tzTime, err := time.Parse("-0700", tz); err == nil {
sig.When = time.Unix(seconds, 0).In(tzTime.Location())
}
} else {
// the old gitea code tried to parse the date in a few different formats, but it's not clear why.
// according to public document, only the standard format "timestamp timezone" could be found, so drop other formats.
log.Error("suspicious commit line format: %q", line)
for _, fmt := range []string{ /*"Mon Jan _2 15:04:05 2006 -0700"*/ } {
if t, err := time.Parse(fmt, s3); err == nil {
sig.When = t
break
}
}
}
return sig
}

View File

@ -7,52 +7,8 @@
package git
import (
"bytes"
"strconv"
"strings"
"time"
"github.com/go-git/go-git/v5/plumbing/object"
)
// Signature represents the Author or Committer information.
type Signature = object.Signature
// Helper to get a signature from the commit line, which looks like these:
//
// author Patrick Gundlach <gundlach@speedata.de> 1378823654 +0200
// author Patrick Gundlach <gundlach@speedata.de> Thu, 07 Apr 2005 22:13:13 +0200
//
// but without the "author " at the beginning (this method should)
// be used for author and committer.
//
// FIXME: include timezone for timestamp!
func newSignatureFromCommitline(line []byte) (_ *Signature, err error) {
sig := new(Signature)
emailStart := bytes.IndexByte(line, '<')
if emailStart > 0 { // Empty name has already occurred, even if it shouldn't
sig.Name = strings.TrimSpace(string(line[:emailStart-1]))
}
emailEnd := bytes.IndexByte(line, '>')
sig.Email = string(line[emailStart+1 : emailEnd])
// Check date format.
if len(line) > emailEnd+2 {
firstChar := line[emailEnd+2]
if firstChar >= 48 && firstChar <= 57 {
timestop := bytes.IndexByte(line[emailEnd+2:], ' ')
timestring := string(line[emailEnd+2 : emailEnd+2+timestop])
seconds, _ := strconv.ParseInt(timestring, 10, 64)
sig.When = time.Unix(seconds, 0)
} else {
sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:]))
if err != nil {
return nil, err
}
}
} else {
// Fall back to unix 0 time
sig.When = time.Unix(0, 0)
}
return sig, nil
}

View File

@ -7,21 +7,17 @@
package git
import (
"bytes"
"fmt"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/modules/util"
)
// Signature represents the Author or Committer information.
// Signature represents the Author, Committer or Tagger information.
type Signature struct {
// Name represents a person name. It is an arbitrary string.
Name string
// Email is an email, but it cannot be assumed to be well-formed.
Email string
// When is the timestamp of the signature.
When time.Time
Name string // the committer name, it can be anything
Email string // the committer email, it can be anything
When time.Time // the timestamp of the signature
}
func (s *Signature) String() string {
@ -30,71 +26,5 @@ func (s *Signature) String() string {
// Decode decodes a byte array representing a signature to signature
func (s *Signature) Decode(b []byte) {
sig, _ := newSignatureFromCommitline(b)
s.Email = sig.Email
s.Name = sig.Name
s.When = sig.When
}
// Helper to get a signature from the commit line, which looks like these:
//
// author Patrick Gundlach <gundlach@speedata.de> 1378823654 +0200
// author Patrick Gundlach <gundlach@speedata.de> Thu, 07 Apr 2005 22:13:13 +0200
//
// but without the "author " at the beginning (this method should)
// be used for author and committer.
// FIXME: there are a lot of "return sig, err" (but the err is also nil), that's the old behavior, to avoid breaking
func newSignatureFromCommitline(line []byte) (sig *Signature, err error) {
sig = new(Signature)
emailStart := bytes.LastIndexByte(line, '<')
emailEnd := bytes.LastIndexByte(line, '>')
if emailStart == -1 || emailEnd == -1 || emailEnd < emailStart {
return sig, err
}
if emailStart > 0 { // Empty name has already occurred, even if it shouldn't
sig.Name = strings.TrimSpace(string(line[:emailStart-1]))
}
sig.Email = string(line[emailStart+1 : emailEnd])
hasTime := emailEnd+2 < len(line)
if !hasTime {
return sig, err
}
// Check date format.
firstChar := line[emailEnd+2]
if firstChar >= 48 && firstChar <= 57 {
idx := bytes.IndexByte(line[emailEnd+2:], ' ')
if idx < 0 {
return sig, err
}
timestring := string(line[emailEnd+2 : emailEnd+2+idx])
seconds, _ := strconv.ParseInt(timestring, 10, 64)
sig.When = time.Unix(seconds, 0)
idx += emailEnd + 3
if idx >= len(line) || idx+5 > len(line) {
return sig, err
}
timezone := string(line[idx : idx+5])
tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64)
tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64)
if err1 != nil || err2 != nil {
return sig, err
}
if tzhours < 0 {
tzmins *= -1
}
tz := time.FixedZone("", int(tzhours*60*60+tzmins*60))
sig.When = sig.When.In(tz)
} else {
sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:]))
if err != nil {
return sig, err
}
}
return sig, err
*s = *parseSignatureFromCommitLine(util.UnsafeBytesToString(b))
}

View File

@ -0,0 +1,47 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestParseSignatureFromCommitLine(t *testing.T) {
tests := []struct {
line string
want *Signature
}{
{
line: "a b <c@d.com> 12345 +0100",
want: &Signature{
Name: "a b",
Email: "c@d.com",
When: time.Unix(12345, 0).In(time.FixedZone("", 3600)),
},
},
{
line: "bad line",
want: &Signature{Name: "bad line"},
},
{
line: "bad < line",
want: &Signature{Name: "bad < line"},
},
{
line: "bad > line",
want: &Signature{Name: "bad > line"},
},
{
line: "bad-line <name@example.com>",
want: &Signature{Name: "bad-line <name@example.com>"},
},
}
for _, test := range tests {
got := parseSignatureFromCommitLine(test.line)
assert.EqualValues(t, test.want, got)
}
}

View File

@ -7,6 +7,8 @@ import (
"bytes"
"sort"
"strings"
"code.gitea.io/gitea/modules/util"
)
const (
@ -59,11 +61,7 @@ l:
// A commit can have one or more parents
tag.Type = string(line[spacepos+1:])
case "tagger":
sig, err := newSignatureFromCommitline(line[spacepos+1:])
if err != nil {
return nil, err
}
tag.Tagger = sig
tag.Tagger = parseSignatureFromCommitLine(util.UnsafeBytesToString(line[spacepos+1:]))
}
nextline += eol + 1
case eol == 0: