From f74c869221624092999097af38b6f7fae4701420 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Wed, 21 Feb 2024 19:54:17 +0100 Subject: [PATCH] Prevent double use of `git cat-file` session. (#29298) Fixes the reason why #29101 is hard to replicate. Related #29297 Create a repo with a file with minimum size 4097 bytes (I use 10000) and execute the following code: ```go gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, ) assert.NoError(t, err) commit, err := gitRepo.GetCommit() assert.NoError(t, err) entry, err := commit.GetTreeEntryByPath() assert.NoError(t, err) b := entry.Blob() // Create a reader r, err := b.DataAsync() assert.NoError(t, err) defer r.Close() // Create a second reader r2, err := b.DataAsync() assert.NoError(t, err) // Should be no error but is ErrNotExist defer r2.Close() ``` The problem is the check in `CatFileBatch`: https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87 `Buffered() > 0` is used to check if there is a "operation" in progress at the moment. This is a problem because we can't control the internal buffer in the `bufio.Reader`. The code above demonstrates a sequence which initiates an operation for which the code thinks there is no active processing. The second call to `DataAsync()` therefore reuses the existing instances instead of creating a new batch reader. --- modules/git/repo_base_nogogit.go | 21 ++++++++++----- tests/integration/git_test.go | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index d5a350a92..8c6eae589 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -27,10 +27,12 @@ type Repository struct { gpgSettings *GPGSettings + batchInUse bool batchCancel context.CancelFunc batchReader *bufio.Reader batchWriter WriteCloserError + checkInUse bool checkCancel context.CancelFunc checkReader *bufio.Reader checkWriter WriteCloserError @@ -79,23 +81,28 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { // CatFileBatch obtains a CatFileBatch for this repository func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) { - if repo.batchCancel == nil || repo.batchReader.Buffered() > 0 { + if repo.batchCancel == nil || repo.batchInUse { log.Debug("Opening temporary cat file batch for: %s", repo.Path) return CatFileBatch(ctx, repo.Path) } - return repo.batchWriter, repo.batchReader, func() {} + repo.batchInUse = true + return repo.batchWriter, repo.batchReader, func() { + repo.batchInUse = false + } } // CatFileBatchCheck obtains a CatFileBatchCheck for this repository func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) { - if repo.checkCancel == nil || repo.checkReader.Buffered() > 0 { - log.Debug("Opening temporary cat file batch-check: %s", repo.Path) + if repo.checkCancel == nil || repo.checkInUse { + log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) return CatFileBatchCheck(ctx, repo.Path) } - return repo.checkWriter, repo.checkReader, func() {} + repo.checkInUse = true + return repo.checkWriter, repo.checkReader, func() { + repo.checkInUse = false + } } -// Close this repository, in particular close the underlying gogitStorage if this is not nil func (repo *Repository) Close() (err error) { if repo == nil { return nil @@ -105,12 +112,14 @@ func (repo *Repository) Close() (err error) { repo.batchReader = nil repo.batchWriter = nil repo.batchCancel = nil + repo.batchInUse = false } if repo.checkCancel != nil { repo.checkCancel() repo.checkCancel = nil repo.checkReader = nil repo.checkWriter = nil + repo.checkInUse = false } repo.LastCommitCache = nil repo.tagCache = nil diff --git a/tests/integration/git_test.go b/tests/integration/git_test.go index 0c3a8616f..95350d79c 100644 --- a/tests/integration/git_test.go +++ b/tests/integration/git_test.go @@ -4,6 +4,7 @@ package integration import ( + "bytes" "encoding/hex" "fmt" "math/rand" @@ -25,9 +26,11 @@ import ( user_model "code.gitea.io/gitea/models/user" gitea_context "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" + files_service "code.gitea.io/gitea/services/repository/files" "code.gitea.io/gitea/tests" "github.com/stretchr/testify/assert" @@ -848,3 +851,44 @@ func doCreateAgitFlowPull(dstPath string, ctx *APITestContext, baseBranch, headB t.Run("CheckoutMasterAgain", doGitCheckoutBranch(dstPath, "master")) } } + +func TestDataAsync_Issue29101(t *testing.T) { + onGiteaRun(t, func(t *testing.T, u *url.URL) { + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) + repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1}) + + resp, err := files_service.ChangeRepoFiles(db.DefaultContext, repo, user, &files_service.ChangeRepoFilesOptions{ + Files: []*files_service.ChangeRepoFile{ + { + Operation: "create", + TreePath: "test.txt", + ContentReader: bytes.NewReader(make([]byte, 10000)), + }, + }, + OldBranch: repo.DefaultBranch, + NewBranch: repo.DefaultBranch, + }) + assert.NoError(t, err) + + sha := resp.Commit.SHA + + gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, repo) + assert.NoError(t, err) + + commit, err := gitRepo.GetCommit(sha) + assert.NoError(t, err) + + entry, err := commit.GetTreeEntryByPath("test.txt") + assert.NoError(t, err) + + b := entry.Blob() + + r, err := b.DataAsync() + assert.NoError(t, err) + defer r.Close() + + r2, err := b.DataAsync() + assert.NoError(t, err) + defer r2.Close() + }) +}