Refactor "Content" for file uploading (#25851)

Before: the concept "Content string" is used everywhere. It has some
problems:

1. Sometimes it means "base64 encoded content", sometimes it means "raw
binary content"
2. It doesn't work with large files, eg: uploading a 1G LFS file would
make Gitea process OOM

This PR does the refactoring: use "ContentReader" / "ContentBase64"
instead of "Content"

This PR is not breaking because the key in API JSON is still "content":
`` ContentBase64 string `json:"content"` ``
This commit is contained in:
wxiaoguang 2023-07-19 02:14:47 +08:00 committed by GitHub
parent 265a28802a
commit 236c645bf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 103 additions and 80 deletions

View File

@ -26,7 +26,7 @@ type CreateFileOptions struct {
FileOptions
// content must be base64 encoded
// required: true
Content string `json:"content"`
ContentBase64 string `json:"content"`
}
// Branch returns branch name
@ -54,7 +54,7 @@ type UpdateFileOptions struct {
DeleteFileOptions
// content must be base64 encoded
// required: true
Content string `json:"content"`
ContentBase64 string `json:"content"`
// from_path (optional) is the path of the original file which will be moved/renamed to the path in the URL
FromPath string `json:"from_path" binding:"MaxSize(500)"`
}
@ -74,7 +74,7 @@ type ChangeFileOperation struct {
// required: true
Path string `json:"path" binding:"Required;MaxSize(500)"`
// new or updated file content, must be base64 encoded
Content string `json:"content"`
ContentBase64 string `json:"content"`
// sha is the SHA for the file that already exists, required for update or delete
SHA string `json:"sha"`
// old path of the file to move

View File

@ -408,6 +408,14 @@ func canReadFiles(r *context.Repository) bool {
return r.Permission.CanRead(unit.TypeCode)
}
func base64Reader(s string) (io.Reader, error) {
b, err := base64.StdEncoding.DecodeString(s)
if err != nil {
return nil, err
}
return bytes.NewReader(b), nil
}
// ChangeFiles handles API call for modifying multiple files
func ChangeFiles(ctx *context.APIContext) {
// swagger:operation POST /repos/{owner}/{repo}/contents repository repoChangeFiles
@ -449,13 +457,18 @@ func ChangeFiles(ctx *context.APIContext) {
apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch
}
files := []*files_service.ChangeRepoFile{}
var files []*files_service.ChangeRepoFile
for _, file := range apiOpts.Files {
contentReader, err := base64Reader(file.ContentBase64)
if err != nil {
ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err)
return
}
changeRepoFile := &files_service.ChangeRepoFile{
Operation: file.Operation,
TreePath: file.Path,
FromTreePath: file.FromPath,
Content: file.Content,
ContentReader: contentReader,
SHA: file.SHA,
}
files = append(files, changeRepoFile)
@ -544,12 +557,18 @@ func CreateFile(ctx *context.APIContext) {
apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch
}
contentReader, err := base64Reader(apiOpts.ContentBase64)
if err != nil {
ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err)
return
}
opts := &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{
{
Operation: "create",
TreePath: ctx.Params("*"),
Content: apiOpts.Content,
ContentReader: contentReader,
},
},
Message: apiOpts.Message,
@ -636,11 +655,17 @@ func UpdateFile(ctx *context.APIContext) {
apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch
}
contentReader, err := base64Reader(apiOpts.ContentBase64)
if err != nil {
ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err)
return
}
opts := &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{
{
Operation: "update",
Content: apiOpts.Content,
ContentReader: contentReader,
SHA: apiOpts.SHA,
FromTreePath: apiOpts.FromPath,
TreePath: ctx.Params("*"),
@ -709,14 +734,6 @@ func createOrUpdateFiles(ctx *context.APIContext, opts *files_service.ChangeRepo
}
}
for _, file := range opts.Files {
content, err := base64.StdEncoding.DecodeString(file.Content)
if err != nil {
return nil, err
}
file.Content = string(content)
}
return files_service.ChangeRepoFiles(ctx, ctx.Repo.Repository, ctx.Doer, opts)
}

View File

@ -287,7 +287,7 @@ func editFilePost(ctx *context.Context, form forms.EditRepoFileForm, isNewFile b
Operation: operation,
FromTreePath: ctx.Repo.TreePath,
TreePath: form.TreePath,
Content: strings.ReplaceAll(form.Content, "\r", ""),
ContentReader: strings.NewReader(strings.ReplaceAll(form.Content, "\r", "")),
},
},
Signoff: form.Signoff,

View File

@ -6,6 +6,7 @@ package files
import (
"context"
"fmt"
"io"
"path"
"strings"
"time"
@ -38,7 +39,7 @@ type ChangeRepoFile struct {
Operation string
TreePath string
FromTreePath string
Content string
ContentReader io.Reader
SHA string
Options *RepoFileOptions
}
@ -387,7 +388,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
}
}
treeObjectContent := file.Content
treeObjectContentReader := file.ContentReader
var lfsMetaObject *git_model.LFSMetaObject
if setting.LFS.StartServer && hasOldBranch {
// Check there is no way this can return multiple infos
@ -402,17 +403,17 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" {
// OK so we are supposed to LFS this data!
pointer, err := lfs.GeneratePointer(strings.NewReader(file.Content))
pointer, err := lfs.GeneratePointer(treeObjectContentReader)
if err != nil {
return err
}
lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID}
treeObjectContent = pointer.StringContent()
treeObjectContentReader = strings.NewReader(pointer.StringContent())
}
}
// Add the object to the database
objectHash, err := t.HashObject(strings.NewReader(treeObjectContent))
objectHash, err := t.HashObject(treeObjectContentReader)
if err != nil {
return err
}
@ -439,7 +440,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
return err
}
if !exist {
if err := contentStore.Put(lfsMetaObject.Pointer, strings.NewReader(file.Content)); err != nil {
if err := contentStore.Put(lfsMetaObject.Pointer, file.ContentReader); err != nil {
if _, err2 := git_model.RemoveLFSMetaObjectByOid(ctx, repoID, lfsMetaObject.Oid); err2 != nil {
return fmt.Errorf("unable to remove failed inserted LFS object %s: %v (Prev Error: %w)", lfsMetaObject.Oid, err2, err)
}

View File

@ -16125,7 +16125,7 @@
"content": {
"description": "new or updated file content, must be base64 encoded",
"type": "string",
"x-go-name": "Content"
"x-go-name": "ContentBase64"
},
"from_path": {
"description": "old path of the file to move",
@ -16810,7 +16810,7 @@
"content": {
"description": "content must be base64 encoded",
"type": "string",
"x-go-name": "Content"
"x-go-name": "ContentBase64"
},
"dates": {
"$ref": "#/definitions/CommitDateOptions"
@ -21687,7 +21687,7 @@
"content": {
"description": "content must be base64 encoded",
"type": "string",
"x-go-name": "Content"
"x-go-name": "ContentBase64"
},
"dates": {
"$ref": "#/definitions/CommitDateOptions"

View File

@ -5,6 +5,7 @@ package integration
import (
"net/url"
"strings"
"testing"
"time"
@ -66,7 +67,7 @@ func TestPullRequestTargetEvent(t *testing.T) {
{
Operation: "create",
TreePath: ".gitea/workflows/pr.yml",
Content: "name: test\non: pull_request_target\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - run: echo helloworld\n",
ContentReader: strings.NewReader("name: test\non: pull_request_target\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - run: echo helloworld\n"),
},
},
Message: "add workflow",
@ -94,7 +95,7 @@ func TestPullRequestTargetEvent(t *testing.T) {
{
Operation: "create",
TreePath: "file_1.txt",
Content: "file1",
ContentReader: strings.NewReader("file1"),
},
},
Message: "add file1",

View File

@ -46,7 +46,7 @@ func getCreateFileOptions() api.CreateFileOptions {
Committer: time.Unix(978307190, 0),
},
},
Content: contentEncoded,
ContentBase64: contentEncoded,
}
}

View File

@ -4,6 +4,8 @@
package integration
import (
"strings"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
@ -17,7 +19,7 @@ func createFileInBranch(user *user_model.User, repo *repo_model.Repository, tree
{
Operation: "create",
TreePath: treePath,
Content: content,
ContentReader: strings.NewReader(content),
},
},
OldBranch: branchName,

View File

@ -44,7 +44,7 @@ func getUpdateFileOptions() *api.UpdateFileOptions {
},
SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885",
},
Content: contentEncoded,
ContentBase64: contentEncoded,
}
}

View File

@ -45,11 +45,11 @@ func getChangeFilesOptions() *api.ChangeFilesOptions {
Files: []*api.ChangeFileOperation{
{
Operation: "create",
Content: newContentEncoded,
ContentBase64: newContentEncoded,
},
{
Operation: "update",
Content: updateContentEncoded,
ContentBase64: updateContentEncoded,
SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885",
},
{

View File

@ -125,7 +125,7 @@ func TestEmptyRepoAddFileByAPI(t *testing.T) {
NewBranchName: "new_branch",
Message: "init",
},
Content: base64.StdEncoding.EncodeToString([]byte("newly-added-api-file")),
ContentBase64: base64.StdEncoding.EncodeToString([]byte("newly-added-api-file")),
})
resp := MakeRequest(t, req, http.StatusCreated)

View File

@ -337,7 +337,7 @@ func crudActionCreateFile(t *testing.T, ctx APITestContext, user *user_model.Use
Email: user.Email,
},
},
Content: base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("This is new text for %s", path))),
ContentBase64: base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("This is new text for %s", path))),
}, callback...)
}

View File

@ -372,7 +372,7 @@ func TestConflictChecking(t *testing.T) {
{
Operation: "create",
TreePath: "important_file",
Content: "Just a non-important file",
ContentReader: strings.NewReader("Just a non-important file"),
},
},
Message: "Add a important file",
@ -387,7 +387,7 @@ func TestConflictChecking(t *testing.T) {
{
Operation: "create",
TreePath: "important_file",
Content: "Not the same content :P",
ContentReader: strings.NewReader("Not the same content :P"),
},
},
Message: "Add a important file",

View File

@ -6,6 +6,7 @@ package integration
import (
"net/http"
"net/url"
"strings"
"testing"
"time"
@ -106,7 +107,7 @@ func createOutdatedPR(t *testing.T, actor, forkOrg *user_model.User) *issues_mod
{
Operation: "create",
TreePath: "File_A",
Content: "File A",
ContentReader: strings.NewReader("File A"),
},
},
Message: "Add File A",
@ -133,7 +134,7 @@ func createOutdatedPR(t *testing.T, actor, forkOrg *user_model.User) *issues_mod
{
Operation: "create",
TreePath: "File_B",
Content: "File B",
ContentReader: strings.NewReader("File B"),
},
},
Message: "Add File on PR branch",

View File

@ -6,6 +6,7 @@ package integration
import (
"net/url"
"path/filepath"
"strings"
"testing"
"time"
@ -26,7 +27,7 @@ func getCreateRepoFilesOptions(repo *repo_model.Repository) *files_service.Chang
{
Operation: "create",
TreePath: "new/file.txt",
Content: "This is a NEW file",
ContentReader: strings.NewReader("This is a NEW file"),
},
},
OldBranch: repo.DefaultBranch,
@ -44,7 +45,7 @@ func getUpdateRepoFilesOptions(repo *repo_model.Repository) *files_service.Chang
Operation: "update",
TreePath: "README.md",
SHA: "4b4851ad51df6a7d9f25c979345979eaeb5b349f",
Content: "This is UPDATED content for the README file",
ContentReader: strings.NewReader("This is UPDATED content for the README file"),
},
},
OldBranch: repo.DefaultBranch,