Skip to content

Commit

Permalink
Use git log name-status in get last commit (go-gitea#16059)
Browse files Browse the repository at this point in the history
* Improve get last commit using git log --name-status

git log --name-status -c provides information about the diff between a
commit and its parents. Using this and adjusting the algorithm to use
the first change to a path allows for a much faster generation of commit
info.

There is a subtle change in the results generated but this will cause
the results to more closely match those from elsewhere.

Signed-off-by: Andrew Thornton <[email protected]>

Co-authored-by: 6543 <[email protected]>
Co-authored-by: techknowlogick <[email protected]>
Co-authored-by: Lauris BH <[email protected]>
  • Loading branch information
4 people authored Jun 20, 2021
1 parent 8fa3bbc commit 23358bc
Show file tree
Hide file tree
Showing 40 changed files with 2,538 additions and 295 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ require (
github.com/couchbase/goutils v0.0.0-20210118111533-e33d3ffb5401 // indirect
github.com/denisenkom/go-mssqldb v0.10.0
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/djherbis/buffer v1.2.0
github.com/djherbis/nio/v3 v3.0.1
github.com/dustin/go-humanize v1.0.0
github.com/editorconfig/editorconfig-core-go/v2 v2.4.2
github.com/emirpasic/gods v1.12.0
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,11 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o=
github.com/djherbis/buffer v1.2.0 h1:PH5Dd2ss0C7CRRhQCZ2u7MssF+No9ide8Ye71nPHcrQ=
github.com/djherbis/buffer v1.2.0/go.mod h1:fjnebbZjCUpPinBRD+TDwXSOeNQ7fPQWLfGQqiAiUyE=
github.com/djherbis/nio/v3 v3.0.1 h1:6wxhnuppteMa6RHA4L81Dq7ThkZH8SwnDzXDYy95vB4=
github.com/djherbis/nio/v3 v3.0.1/go.mod h1:Ng4h80pbZFMla1yKzm61cF0tqqilXZYrogmWgZxOcmg=
github.com/dlclark/regexp2 v1.1.6/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=
Expand Down
111 changes: 40 additions & 71 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (
"math"
"strconv"
"strings"

"github.com/djherbis/buffer"
"github.com/djherbis/nio/v3"
)

// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
Expand Down Expand Up @@ -42,7 +45,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
}
}()

// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
batchReader := bufio.NewReader(batchStdoutReader)

return batchStdinWriter, batchReader, cancel
Expand All @@ -53,7 +56,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
cancel := func() {
_ = batchStdinReader.Close()
_ = batchStdinWriter.Close()
Expand All @@ -74,7 +77,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
}()

// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
batchReader := bufio.NewReader(batchStdoutReader)
batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)

return batchStdinWriter, batchReader, cancel
}
Expand All @@ -84,22 +87,31 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// <sha> SP <type> SP <size> LF
// sha is a 40byte not 20byte here
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
sha, err = rd.ReadBytes(' ')
typ, err = rd.ReadString('\n')
if err != nil {
return
}
sha = sha[:len(sha)-1]

typ, err = rd.ReadString('\n')
if err != nil {
if len(typ) == 1 {
typ, err = rd.ReadString('\n')
if err != nil {
return
}
}
idx := strings.IndexByte(typ, ' ')
if idx < 0 {
log("missing space typ: %s", typ)
err = ErrNotExist{ID: string(sha)}
return
}
sha = []byte(typ[:idx])
typ = typ[idx+1:]

idx := strings.Index(typ, " ")
idx = strings.IndexByte(typ, ' ')
if idx < 0 {
err = ErrNotExist{ID: string(sha)}
return
}

sizeStr := typ[idx+1 : len(typ)-1]
typ = typ[:idx]

Expand Down Expand Up @@ -130,7 +142,7 @@ headerLoop:
}

// Discard the rest of the tag
discard := size - n
discard := size - n + 1
for discard > math.MaxInt32 {
_, err := rd.Discard(math.MaxInt32)
if err != nil {
Expand Down Expand Up @@ -200,85 +212,42 @@ func To40ByteSHA(sha, out []byte) []byte {
return out
}

// ParseTreeLineSkipMode reads an entry from a tree in a cat-file --batch stream
// This simply skips the mode - saving a substantial amount of time and carefully avoids allocations - except where fnameBuf is too small.
// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
// This carefully avoids allocations - except where fnameBuf is too small.
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
//
// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sha []byte, n int, err error) {
func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
var readBytes []byte
// Skip the Mode
readBytes, err = rd.ReadSlice(' ') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
if err != nil {
return
}
n += len(readBytes)

// Deal with the fname
// Read the Mode & fname
readBytes, err = rd.ReadSlice('\x00')
copy(fnameBuf, readBytes)
if len(fnameBuf) > len(readBytes) {
fnameBuf = fnameBuf[:len(readBytes)] // cut the buf the correct size
} else {
fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) // extend the buf and copy in the missing bits
}
for err == bufio.ErrBufferFull { // Then we need to read more
readBytes, err = rd.ReadSlice('\x00')
fnameBuf = append(fnameBuf, readBytes...) // there is little point attempting to avoid allocations here so just extend
}
n += len(fnameBuf)
if err != nil {
return
}
fnameBuf = fnameBuf[:len(fnameBuf)-1] // Drop the terminal NUL
fname = fnameBuf // set the returnable fname to the slice

// Now deal with the 20-byte SHA
idx := 0
for idx < 20 {
read := 0
read, err = rd.Read(shaBuf[idx:20])
n += read
if err != nil {
return
}
idx += read
}
sha = shaBuf
return
}

// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
// This carefully avoids allocations - except where fnameBuf is too small.
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
//
// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
var readBytes []byte
idx := bytes.IndexByte(readBytes, ' ')
if idx < 0 {
log("missing space in readBytes ParseTreeLine: %s", readBytes)

// Read the Mode
readBytes, err = rd.ReadSlice(' ')
if err != nil {
err = &ErrNotExist{}
return
}
n += len(readBytes)
copy(modeBuf, readBytes)
if len(modeBuf) > len(readBytes) {
modeBuf = modeBuf[:len(readBytes)]
} else {
modeBuf = append(modeBuf, readBytes[len(modeBuf):]...)

n += idx + 1
copy(modeBuf, readBytes[:idx])
if len(modeBuf) >= idx {
modeBuf = modeBuf[:idx]
} else {
modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
}
mode = modeBuf[:len(modeBuf)-1] // Drop the SP
mode = modeBuf

readBytes = readBytes[idx+1:]

// Deal with the fname
readBytes, err = rd.ReadSlice('\x00')
copy(fnameBuf, readBytes)
if len(fnameBuf) > len(readBytes) {
fnameBuf = fnameBuf[:len(readBytes)]
Expand All @@ -297,7 +266,7 @@ func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fn
fname = fnameBuf

// Deal with the 20-byte SHA
idx := 0
idx = 0
for idx < 20 {
read := 0
read, err = rd.Read(shaBuf[idx:20])
Expand Down
Loading

0 comments on commit 23358bc

Please sign in to comment.