githaven/services/archiver/archiver.go
Lunny Xiao b223d36195
Rework repository archive (#14723)
* Use storage to store archive files

* Fix backend lint

* Add archiver table on database

* Finish archive download

* Fix test

* Add database migrations

* Add status for archiver

* Fix lint

* Add queue

* Add doctor to check and delete old archives

* Improve archive queue

* Fix tests

* improve archive storage

* Delete repo archives

* Add missing fixture

* fix fixture

* Fix fixture

* Fix test

* Fix archiver cleaning

* Fix bug

* Add docs for repository archive storage

* remove repo-archive configuration

* Fix test

* Fix test

* Fix lint

Co-authored-by: 6543 <6543@obermui.de>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
2021-06-23 17:12:38 -04:00

250 lines
6.4 KiB
Go

// Copyright 2020 The Gitea Authors.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"errors"
"fmt"
"io"
"os"
"regexp"
"strings"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
)
// ArchiveRequest defines the parameters of an archive request, which notably
// includes the specific repository being archived as well as the commit, the
// name by which it was requested, and the kind of archive being requested.
// This is entirely opaque to external entities, though, and mostly used as a
// handle elsewhere.
type ArchiveRequest struct {
RepoID int64
refName string
Type git.ArchiveType
CommitID string
}
// SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all
// the way to 64.
var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`)
// NewRequest creates an archival request, based on the URI. The
// resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
// if it's determined that the request still needs to be satisfied.
func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) {
r := &ArchiveRequest{
RepoID: repoID,
}
var ext string
switch {
case strings.HasSuffix(uri, ".zip"):
ext = ".zip"
r.Type = git.ZIP
case strings.HasSuffix(uri, ".tar.gz"):
ext = ".tar.gz"
r.Type = git.TARGZ
default:
return nil, fmt.Errorf("Unknown format: %s", uri)
}
r.refName = strings.TrimSuffix(uri, ext)
var err error
// Get corresponding commit.
if repo.IsBranchExist(r.refName) {
r.CommitID, err = repo.GetBranchCommitID(r.refName)
if err != nil {
return nil, err
}
} else if repo.IsTagExist(r.refName) {
r.CommitID, err = repo.GetTagCommitID(r.refName)
if err != nil {
return nil, err
}
} else if shaRegex.MatchString(r.refName) {
if repo.IsCommitExist(r.refName) {
r.CommitID = r.refName
} else {
return nil, git.ErrNotExist{
ID: r.refName,
}
}
} else {
return nil, fmt.Errorf("Unknow ref %s type", r.refName)
}
return r, nil
}
// GetArchiveName returns the name of the caller, based on the ref used by the
// caller to create this request.
func (aReq *ArchiveRequest) GetArchiveName() string {
return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String()
}
func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) {
ctx, commiter, err := models.TxDBContext()
if err != nil {
return nil, err
}
defer commiter.Close()
archiver, err := models.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID)
if err != nil {
return nil, err
}
if archiver != nil {
// FIXME: If another process are generating it, we think it's not ready and just return
// Or we should wait until the archive generated.
if archiver.Status == models.RepoArchiverGenerating {
return nil, nil
}
} else {
archiver = &models.RepoArchiver{
RepoID: r.RepoID,
Type: r.Type,
CommitID: r.CommitID,
Status: models.RepoArchiverGenerating,
}
if err := models.AddRepoArchiver(ctx, archiver); err != nil {
return nil, err
}
}
rPath, err := archiver.RelativePath()
if err != nil {
return nil, err
}
_, err = storage.RepoArchives.Stat(rPath)
if err == nil {
if archiver.Status == models.RepoArchiverGenerating {
archiver.Status = models.RepoArchiverReady
return archiver, models.UpdateRepoArchiverStatus(ctx, archiver)
}
return archiver, nil
}
if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("unable to stat archive: %v", err)
}
rd, w := io.Pipe()
defer func() {
w.Close()
rd.Close()
}()
var done = make(chan error)
repo, err := archiver.LoadRepo()
if err != nil {
return nil, fmt.Errorf("archiver.LoadRepo failed: %v", err)
}
gitRepo, err := git.OpenRepository(repo.RepoPath())
if err != nil {
return nil, err
}
defer gitRepo.Close()
go func(done chan error, w *io.PipeWriter, archiver *models.RepoArchiver, gitRepo *git.Repository) {
defer func() {
if r := recover(); r != nil {
done <- fmt.Errorf("%v", r)
}
}()
err = gitRepo.CreateArchive(
graceful.GetManager().ShutdownContext(),
archiver.Type,
w,
setting.Repository.PrefixArchiveFiles,
archiver.CommitID,
)
_ = w.CloseWithError(err)
done <- err
}(done, w, archiver, gitRepo)
// TODO: add lfs data to zip
// TODO: add submodule data to zip
if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil {
return nil, fmt.Errorf("unable to write archive: %v", err)
}
err = <-done
if err != nil {
return nil, err
}
if archiver.Status == models.RepoArchiverGenerating {
archiver.Status = models.RepoArchiverReady
if err = models.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
return nil, err
}
}
return archiver, commiter.Commit()
}
// ArchiveRepository satisfies the ArchiveRequest being passed in. Processing
// will occur in a separate goroutine, as this phase may take a while to
// complete. If the archive already exists, ArchiveRepository will not do
// anything. In all cases, the caller should be examining the *ArchiveRequest
// being returned for completion, as it may be different than the one they passed
// in.
func ArchiveRepository(request *ArchiveRequest) (*models.RepoArchiver, error) {
return doArchive(request)
}
var archiverQueue queue.UniqueQueue
// Init initlize archive
func Init() error {
handler := func(data ...queue.Data) {
for _, datum := range data {
archiveReq, ok := datum.(*ArchiveRequest)
if !ok {
log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum)
continue
}
log.Trace("ArchiverData Process: %#v", archiveReq)
if _, err := doArchive(archiveReq); err != nil {
log.Error("Archive %v faild: %v", datum, err)
}
}
}
archiverQueue = queue.CreateUniqueQueue("repo-archive", handler, new(ArchiveRequest))
if archiverQueue == nil {
return errors.New("unable to create codes indexer queue")
}
go graceful.GetManager().RunWithShutdownFns(archiverQueue.Run)
return nil
}
// StartArchive push the archive request to the queue
func StartArchive(request *ArchiveRequest) error {
has, err := archiverQueue.Has(request)
if err != nil {
return err
}
if has {
return nil
}
return archiverQueue.Push(request)
}