githaven/modules/git/pipeline/lfs_nogogit.go
Lunny Xiao 4eb2a29910
Improve ObjectFormat interface (#28496)
The 4 functions are duplicated, especially as interface methods. I think
we just need to keep `MustID` the only one and remove other 3.

```
MustID(b []byte) ObjectID
MustIDFromString(s string) ObjectID
NewID(b []byte) (ObjectID, error)
NewIDFromString(s string) (ObjectID, error)
```

Introduced the new interfrace method `ComputeHash` which will replace
the interface `HasherInterface`. Now we don't need to keep two
interfaces.

Reintroduced `git.NewIDFromString` and `git.MustIDFromString`. The new
function will detect the hash length to decide which objectformat of it.
If it's 40, then it's SHA1. If it's 64, then it's SHA256. This will be
right if the commitID is a full one. So the parameter should be always a
full commit id.

@AdamMajer Please review.
2023-12-19 07:20:47 +00:00

247 lines
6.2 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
//go:build !gogit
package pipeline
import (
"bufio"
"bytes"
"fmt"
"io"
"sort"
"strings"
"sync"
"time"
"code.gitea.io/gitea/modules/git"
)
// LFSResult represents commits found using a provided pointer file hash
type LFSResult struct {
Name string
SHA string
Summary string
When time.Time
ParentIDs []git.ObjectID
BranchName string
FullCommitName string
}
type lfsResultSlice []*LFSResult
func (a lfsResultSlice) Len() int { return len(a) }
func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) }
// FindLFSFile finds commits that contain a provided pointer file hash
func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) {
resultsMap := map[string]*LFSResult{}
results := make([]*LFSResult, 0)
basePath := repo.Path
// Use rev-list to provide us with all commits in order
revListReader, revListWriter := io.Pipe()
defer func() {
_ = revListWriter.Close()
_ = revListReader.Close()
}()
go func() {
stderr := strings.Builder{}
err := git.NewCommand(repo.Ctx, "rev-list", "--all").Run(&git.RunOpts{
Dir: repo.Path,
Stdout: revListWriter,
Stderr: &stderr,
})
if err != nil {
_ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String()))
} else {
_ = revListWriter.Close()
}
}()
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinWriter, batchReader, cancel := repo.CatFileBatch(repo.Ctx)
defer cancel()
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
scan := bufio.NewScanner(revListReader)
trees := [][]byte{}
paths := []string{}
fnameBuf := make([]byte, 4096)
modeBuf := make([]byte, 40)
workingShaBuf := make([]byte, objectID.Type().FullLength()/2)
for scan.Scan() {
// Get the next commit ID
commitID := scan.Bytes()
// push the commit to the cat-file --batch process
_, err := batchStdinWriter.Write(commitID)
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte{'\n'})
if err != nil {
return nil, err
}
var curCommit *git.Commit
curPath := ""
commitReadingLoop:
for {
_, typ, size, err := git.ReadBatchLine(batchReader)
if err != nil {
return nil, err
}
switch typ {
case "tag":
// This shouldn't happen but if it does well just get the commit and try again
id, err := git.ReadTagObjectID(batchReader, size)
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte(id + "\n"))
if err != nil {
return nil, err
}
continue
case "commit":
// Read in the commit to get its tree and in case this is one of the last used commits
curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size))
if err != nil {
return nil, err
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil {
return nil, err
}
curPath = ""
case "tree":
var n int64
for n < size {
mode, fname, binObjectID, count, err := git.ParseTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf)
if err != nil {
return nil, err
}
n += int64(count)
if bytes.Equal(binObjectID, objectID.RawValue()) {
result := LFSResult{
Name: curPath + string(fname),
SHA: curCommit.ID.String(),
Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0],
When: curCommit.Author.When,
ParentIDs: curCommit.Parents,
}
resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result
} else if string(mode) == git.EntryModeTree.String() {
hexObjectID := make([]byte, objectID.Type().FullLength())
git.BinToHex(objectID.Type(), binObjectID, hexObjectID)
trees = append(trees, hexObjectID)
paths = append(paths, curPath+string(fname)+"/")
}
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
if len(trees) > 0 {
_, err := batchStdinWriter.Write(trees[len(trees)-1])
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte("\n"))
if err != nil {
return nil, err
}
curPath = paths[len(paths)-1]
trees = trees[:len(trees)-1]
paths = paths[:len(paths)-1]
} else {
break commitReadingLoop
}
}
}
}
if err := scan.Err(); err != nil {
return nil, err
}
for _, result := range resultsMap {
hasParent := false
for _, parentID := range result.ParentIDs {
if _, hasParent = resultsMap[parentID.String()+":"+result.Name]; hasParent {
break
}
}
if !hasParent {
results = append(results, result)
}
}
sort.Sort(lfsResultSlice(results))
// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
shasToNameReader, shasToNameWriter := io.Pipe()
nameRevStdinReader, nameRevStdinWriter := io.Pipe()
errChan := make(chan error, 1)
wg := sync.WaitGroup{}
wg.Add(3)
go func() {
defer wg.Done()
scanner := bufio.NewScanner(nameRevStdinReader)
i := 0
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
result := results[i]
result.FullCommitName = line
result.BranchName = strings.Split(line, "~")[0]
i++
}
}()
go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath)
go func() {
defer wg.Done()
defer shasToNameWriter.Close()
for _, result := range results {
_, err := shasToNameWriter.Write([]byte(result.SHA))
if err != nil {
errChan <- err
break
}
_, err = shasToNameWriter.Write([]byte{'\n'})
if err != nil {
errChan <- err
break
}
}
}()
wg.Wait()
select {
case err, has := <-errChan:
if has {
return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err)
}
default:
}
return results, nil
}