2018-11-27 21:52:20 +00:00
|
|
|
package packfile
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"compress/zlib"
|
|
|
|
"fmt"
|
|
|
|
"hash"
|
|
|
|
"hash/crc32"
|
|
|
|
"io"
|
|
|
|
stdioutil "io/ioutil"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"gopkg.in/src-d/go-git.v4/plumbing"
|
|
|
|
"gopkg.in/src-d/go-git.v4/utils/binary"
|
|
|
|
"gopkg.in/src-d/go-git.v4/utils/ioutil"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
|
|
|
|
ErrEmptyPackfile = NewError("empty packfile")
|
|
|
|
// ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
|
|
|
|
ErrBadSignature = NewError("malformed pack file signature")
|
|
|
|
// ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
|
|
|
|
// different than VersionSupported.
|
|
|
|
ErrUnsupportedVersion = NewError("unsupported packfile version")
|
|
|
|
// ErrSeekNotSupported returned if seek is not support
|
|
|
|
ErrSeekNotSupported = NewError("not seek support")
|
|
|
|
)
|
|
|
|
|
|
|
|
// ObjectHeader contains the information related to the object, this information
|
|
|
|
// is collected from the previous bytes to the content of the object.
|
|
|
|
type ObjectHeader struct {
|
|
|
|
Type plumbing.ObjectType
|
|
|
|
Offset int64
|
|
|
|
Length int64
|
|
|
|
Reference plumbing.Hash
|
|
|
|
OffsetReference int64
|
|
|
|
}
|
|
|
|
|
|
|
|
type Scanner struct {
|
2019-06-19 02:14:15 +00:00
|
|
|
r *scannerReader
|
2018-11-27 21:52:20 +00:00
|
|
|
crc hash.Hash32
|
|
|
|
|
|
|
|
// pendingObject is used to detect if an object has been read, or still
|
|
|
|
// is waiting to be read
|
|
|
|
pendingObject *ObjectHeader
|
|
|
|
version, objects uint32
|
|
|
|
|
|
|
|
// lsSeekable says if this scanner can do Seek or not, to have a Scanner
|
|
|
|
// seekable a r implementing io.Seeker is required
|
|
|
|
IsSeekable bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewScanner returns a new Scanner based on a reader, if the given reader
|
|
|
|
// implements io.ReadSeeker the Scanner will be also Seekable
|
|
|
|
func NewScanner(r io.Reader) *Scanner {
|
2019-06-19 02:14:15 +00:00
|
|
|
_, ok := r.(io.ReadSeeker)
|
2018-11-27 21:52:20 +00:00
|
|
|
|
|
|
|
crc := crc32.NewIEEE()
|
|
|
|
return &Scanner{
|
2019-06-19 02:14:15 +00:00
|
|
|
r: newScannerReader(r, crc),
|
2018-11-27 21:52:20 +00:00
|
|
|
crc: crc,
|
|
|
|
IsSeekable: ok,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
func (s *Scanner) Reset(r io.Reader) {
|
|
|
|
_, ok := r.(io.ReadSeeker)
|
|
|
|
|
|
|
|
s.r.Reset(r)
|
|
|
|
s.crc.Reset()
|
|
|
|
s.IsSeekable = ok
|
|
|
|
s.pendingObject = nil
|
|
|
|
s.version = 0
|
|
|
|
s.objects = 0
|
|
|
|
}
|
|
|
|
|
2018-11-27 21:52:20 +00:00
|
|
|
// Header reads the whole packfile header (signature, version and object count).
|
|
|
|
// It returns the version and the object count and performs checks on the
|
|
|
|
// validity of the signature and the version fields.
|
|
|
|
func (s *Scanner) Header() (version, objects uint32, err error) {
|
|
|
|
if s.version != 0 {
|
|
|
|
return s.version, s.objects, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
sig, err := s.readSignature()
|
|
|
|
if err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
err = ErrEmptyPackfile
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if !s.isValidSignature(sig) {
|
|
|
|
err = ErrBadSignature
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
version, err = s.readVersion()
|
|
|
|
s.version = version
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if !s.isSupportedVersion(version) {
|
|
|
|
err = ErrUnsupportedVersion.AddDetails("%d", version)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
objects, err = s.readCount()
|
|
|
|
s.objects = objects
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// readSignature reads an returns the signature field in the packfile.
|
|
|
|
func (s *Scanner) readSignature() ([]byte, error) {
|
|
|
|
var sig = make([]byte, 4)
|
|
|
|
if _, err := io.ReadFull(s.r, sig); err != nil {
|
|
|
|
return []byte{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return sig, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// isValidSignature returns if sig is a valid packfile signature.
|
|
|
|
func (s *Scanner) isValidSignature(sig []byte) bool {
|
|
|
|
return bytes.Equal(sig, signature)
|
|
|
|
}
|
|
|
|
|
|
|
|
// readVersion reads and returns the version field of a packfile.
|
|
|
|
func (s *Scanner) readVersion() (uint32, error) {
|
|
|
|
return binary.ReadUint32(s.r)
|
|
|
|
}
|
|
|
|
|
|
|
|
// isSupportedVersion returns whether version v is supported by the parser.
|
|
|
|
// The current supported version is VersionSupported, defined above.
|
|
|
|
func (s *Scanner) isSupportedVersion(v uint32) bool {
|
|
|
|
return v == VersionSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// readCount reads and returns the count of objects field of a packfile.
|
|
|
|
func (s *Scanner) readCount() (uint32, error) {
|
|
|
|
return binary.ReadUint32(s.r)
|
|
|
|
}
|
|
|
|
|
2019-04-17 02:04:23 +00:00
|
|
|
// SeekObjectHeader seeks to specified offset and returns the ObjectHeader
|
|
|
|
// for the next object in the reader
|
|
|
|
func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
|
|
|
|
// if seeking we assume that you are not interested in the header
|
|
|
|
if s.version == 0 {
|
|
|
|
s.version = VersionSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
h, err := s.nextObjectHeader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
h.Offset = offset
|
|
|
|
return h, nil
|
|
|
|
}
|
|
|
|
|
2018-11-27 21:52:20 +00:00
|
|
|
// NextObjectHeader returns the ObjectHeader for the next object in the reader
|
|
|
|
func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
|
|
|
|
if err := s.doPending(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-04-17 02:04:23 +00:00
|
|
|
offset, err := s.r.Seek(0, io.SeekCurrent)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
h, err := s.nextObjectHeader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
h.Offset = offset
|
|
|
|
return h, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// nextObjectHeader returns the ObjectHeader for the next object in the reader
|
|
|
|
// without the Offset field
|
|
|
|
func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
|
2019-06-19 02:14:15 +00:00
|
|
|
s.r.Flush()
|
2018-11-27 21:52:20 +00:00
|
|
|
s.crc.Reset()
|
|
|
|
|
|
|
|
h := &ObjectHeader{}
|
|
|
|
s.pendingObject = h
|
|
|
|
|
|
|
|
var err error
|
|
|
|
h.Offset, err = s.r.Seek(0, io.SeekCurrent)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
h.Type, h.Length, err = s.readObjectTypeAndLength()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
switch h.Type {
|
|
|
|
case plumbing.OFSDeltaObject:
|
|
|
|
no, err := binary.ReadVariableWidthInt(s.r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
h.OffsetReference = h.Offset - no
|
|
|
|
case plumbing.REFDeltaObject:
|
|
|
|
var err error
|
|
|
|
h.Reference, err = binary.ReadHash(s.r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return h, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scanner) doPending() error {
|
|
|
|
if s.version == 0 {
|
|
|
|
var err error
|
|
|
|
s.version, s.objects, err = s.Header()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return s.discardObjectIfNeeded()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scanner) discardObjectIfNeeded() error {
|
|
|
|
if s.pendingObject == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
h := s.pendingObject
|
|
|
|
n, _, err := s.NextObject(stdioutil.Discard)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if n != h.Length {
|
|
|
|
return fmt.Errorf(
|
|
|
|
"error discarding object, discarded %d, expected %d",
|
|
|
|
n, h.Length,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadObjectTypeAndLength reads and returns the object type and the
|
|
|
|
// length field from an object entry in a packfile.
|
|
|
|
func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) {
|
|
|
|
t, c, err := s.readType()
|
|
|
|
if err != nil {
|
|
|
|
return t, 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
l, err := s.readLength(c)
|
|
|
|
|
|
|
|
return t, l, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
|
|
|
|
var c byte
|
|
|
|
var err error
|
|
|
|
if c, err = s.r.ReadByte(); err != nil {
|
|
|
|
return plumbing.ObjectType(0), 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
typ := parseType(c)
|
|
|
|
|
|
|
|
return typ, c, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseType(b byte) plumbing.ObjectType {
|
|
|
|
return plumbing.ObjectType((b & maskType) >> firstLengthBits)
|
|
|
|
}
|
|
|
|
|
|
|
|
// the length is codified in the last 4 bits of the first byte and in
|
|
|
|
// the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
|
|
|
|
func (s *Scanner) readLength(first byte) (int64, error) {
|
|
|
|
length := int64(first & maskFirstLength)
|
|
|
|
|
|
|
|
c := first
|
|
|
|
shift := firstLengthBits
|
|
|
|
var err error
|
|
|
|
for c&maskContinue > 0 {
|
|
|
|
if c, err = s.r.ReadByte(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
length += int64(c&maskLength) << shift
|
|
|
|
shift += lengthBits
|
|
|
|
}
|
|
|
|
|
|
|
|
return length, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NextObject writes the content of the next object into the reader, returns
|
|
|
|
// the number of bytes written, the CRC32 of the content and an error, if any
|
|
|
|
func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
|
|
|
|
s.pendingObject = nil
|
|
|
|
written, err = s.copyObject(w)
|
2019-06-19 02:14:15 +00:00
|
|
|
|
|
|
|
s.r.Flush()
|
2018-11-27 21:52:20 +00:00
|
|
|
crc32 = s.crc.Sum32()
|
2019-06-19 02:14:15 +00:00
|
|
|
s.crc.Reset()
|
|
|
|
|
2018-11-27 21:52:20 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadRegularObject reads and write a non-deltified object
|
|
|
|
// from it zlib stream in an object entry in the packfile.
|
|
|
|
func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
|
2019-06-19 02:14:15 +00:00
|
|
|
zr := zlibReaderPool.Get().(io.ReadCloser)
|
|
|
|
defer zlibReaderPool.Put(zr)
|
2018-11-27 21:52:20 +00:00
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
if err = zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
|
|
|
|
return 0, fmt.Errorf("zlib reset error: %s", err)
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
defer ioutil.CheckClose(zr, &err)
|
2018-11-27 21:52:20 +00:00
|
|
|
buf := byteSlicePool.Get().([]byte)
|
2019-06-19 02:14:15 +00:00
|
|
|
n, err = io.CopyBuffer(w, zr, buf)
|
2018-11-27 21:52:20 +00:00
|
|
|
byteSlicePool.Put(buf)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var byteSlicePool = sync.Pool{
|
|
|
|
New: func() interface{} {
|
|
|
|
return make([]byte, 32*1024)
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// SeekFromStart sets a new offset from start, returns the old position before
|
|
|
|
// the change.
|
|
|
|
func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
|
2019-04-17 02:04:23 +00:00
|
|
|
// if seeking we assume that you are not interested in the header
|
2018-11-27 21:52:20 +00:00
|
|
|
if s.version == 0 {
|
|
|
|
s.version = VersionSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
previous, err = s.r.Seek(0, io.SeekCurrent)
|
|
|
|
if err != nil {
|
|
|
|
return -1, err
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = s.r.Seek(offset, io.SeekStart)
|
|
|
|
return previous, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Checksum returns the checksum of the packfile
|
|
|
|
func (s *Scanner) Checksum() (plumbing.Hash, error) {
|
|
|
|
err := s.discardObjectIfNeeded()
|
|
|
|
if err != nil {
|
|
|
|
return plumbing.ZeroHash, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return binary.ReadHash(s.r)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close reads the reader until io.EOF
|
|
|
|
func (s *Scanner) Close() error {
|
|
|
|
buf := byteSlicePool.Get().([]byte)
|
|
|
|
_, err := io.CopyBuffer(stdioutil.Discard, s.r, buf)
|
|
|
|
byteSlicePool.Put(buf)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
// Flush is a no-op (deprecated)
|
2018-11-27 21:52:20 +00:00
|
|
|
func (s *Scanner) Flush() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
// scannerReader has the following characteristics:
|
|
|
|
// - Provides an io.SeekReader impl for bufio.Reader, when the underlying
|
|
|
|
// reader supports it.
|
|
|
|
// - Keeps track of the current read position, for when the underlying reader
|
|
|
|
// isn't an io.SeekReader, but we still want to know the current offset.
|
|
|
|
// - Writes to the hash writer what it reads, with the aid of a smaller buffer.
|
|
|
|
// The buffer helps avoid a performance penality for performing small writes
|
|
|
|
// to the crc32 hash writer.
|
|
|
|
type scannerReader struct {
|
|
|
|
reader io.Reader
|
|
|
|
crc io.Writer
|
|
|
|
rbuf *bufio.Reader
|
|
|
|
wbuf *bufio.Writer
|
|
|
|
offset int64
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
func newScannerReader(r io.Reader, h io.Writer) *scannerReader {
|
|
|
|
sr := &scannerReader{
|
|
|
|
rbuf: bufio.NewReader(nil),
|
|
|
|
wbuf: bufio.NewWriterSize(nil, 64),
|
|
|
|
crc: h,
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
2019-06-19 02:14:15 +00:00
|
|
|
sr.Reset(r)
|
2018-11-27 21:52:20 +00:00
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
return sr
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
func (r *scannerReader) Reset(reader io.Reader) {
|
|
|
|
r.reader = reader
|
|
|
|
r.rbuf.Reset(r.reader)
|
|
|
|
r.wbuf.Reset(r.crc)
|
2018-11-27 21:52:20 +00:00
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
r.offset = 0
|
|
|
|
if seeker, ok := r.reader.(io.ReadSeeker); ok {
|
|
|
|
r.offset, _ = seeker.Seek(0, io.SeekCurrent)
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
func (r *scannerReader) Read(p []byte) (n int, err error) {
|
|
|
|
n, err = r.rbuf.Read(p)
|
2018-11-27 21:52:20 +00:00
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
r.offset += int64(n)
|
|
|
|
if _, err := r.wbuf.Write(p[:n]); err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
return
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
func (r *scannerReader) ReadByte() (b byte, err error) {
|
|
|
|
b, err = r.rbuf.ReadByte()
|
|
|
|
if err == nil {
|
|
|
|
r.offset++
|
|
|
|
return b, r.wbuf.WriteByte(b)
|
|
|
|
}
|
|
|
|
return
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
func (r *scannerReader) Flush() error {
|
|
|
|
return r.wbuf.Flush()
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
// Seek seeks to a location. If the underlying reader is not an io.ReadSeeker,
|
|
|
|
// then only whence=io.SeekCurrent is supported, any other operation fails.
|
|
|
|
func (r *scannerReader) Seek(offset int64, whence int) (int64, error) {
|
|
|
|
var err error
|
2018-11-27 21:52:20 +00:00
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
if seeker, ok := r.reader.(io.ReadSeeker); !ok {
|
|
|
|
if whence != io.SeekCurrent || offset != 0 {
|
|
|
|
return -1, ErrSeekNotSupported
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if whence == io.SeekCurrent && offset == 0 {
|
|
|
|
return r.offset, nil
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
r.offset, err = seeker.Seek(offset, whence)
|
|
|
|
r.rbuf.Reset(r.reader)
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|
|
|
|
|
2019-06-19 02:14:15 +00:00
|
|
|
return r.offset, err
|
2018-11-27 21:52:20 +00:00
|
|
|
}
|