|
- // Copyright 2020 The Gitea Authors.
- // All rights reserved.
- // Use of this source code is governed by a MIT-style
- // license that can be found in the LICENSE file.
-
- package archiver
-
- import (
- "io"
- "io/ioutil"
- "os"
- "path"
- "regexp"
- "strings"
- "sync"
- "time"
-
- "code.gitea.io/gitea/modules/base"
- "code.gitea.io/gitea/modules/context"
- "code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/graceful"
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/setting"
- "code.gitea.io/gitea/modules/util"
- )
-
- // ArchiveRequest defines the parameters of an archive request, which notably
- // includes the specific repository being archived as well as the commit, the
- // name by which it was requested, and the kind of archive being requested.
- // This is entirely opaque to external entities, though, and mostly used as a
- // handle elsewhere.
- type ArchiveRequest struct {
- uri string
- repo *git.Repository
- refName string
- ext string
- archivePath string
- archiveType git.ArchiveType
- archiveComplete bool
- commit *git.Commit
- cchan chan struct{}
- }
-
- var archiveInProgress []*ArchiveRequest
- var archiveMutex sync.Mutex
-
- // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all
- // the way to 64.
- var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`)
-
- // These facilitate testing, by allowing the unit tests to control (to some extent)
- // the goroutine used for processing the queue.
- var archiveQueueMutex *sync.Mutex
- var archiveQueueStartCond *sync.Cond
- var archiveQueueReleaseCond *sync.Cond
-
- // GetArchivePath returns the path from which we can serve this archive.
- func (aReq *ArchiveRequest) GetArchivePath() string {
- return aReq.archivePath
- }
-
- // GetArchiveName returns the name of the caller, based on the ref used by the
- // caller to create this request.
- func (aReq *ArchiveRequest) GetArchiveName() string {
- return aReq.refName + aReq.ext
- }
-
- // IsComplete returns the completion status of this request.
- func (aReq *ArchiveRequest) IsComplete() bool {
- return aReq.archiveComplete
- }
-
- // WaitForCompletion will wait for this request to complete, with no timeout.
- // It returns whether the archive was actually completed, as the channel could
- // have also been closed due to an error.
- func (aReq *ArchiveRequest) WaitForCompletion(ctx *context.Context) bool {
- select {
- case <-aReq.cchan:
- case <-ctx.Req.Context().Done():
- }
-
- return aReq.IsComplete()
- }
-
- // TimedWaitForCompletion will wait for this request to complete, with timeout
- // happening after the specified Duration. It returns whether the archive is
- // now complete and whether we hit the timeout or not. The latter may not be
- // useful if the request is complete or we started to shutdown.
- func (aReq *ArchiveRequest) TimedWaitForCompletion(ctx *context.Context, dur time.Duration) (bool, bool) {
- timeout := false
- select {
- case <-time.After(dur):
- timeout = true
- case <-aReq.cchan:
- case <-ctx.Req.Context().Done():
- }
-
- return aReq.IsComplete(), timeout
- }
-
- // The caller must hold the archiveMutex across calls to getArchiveRequest.
- func getArchiveRequest(repo *git.Repository, commit *git.Commit, archiveType git.ArchiveType) *ArchiveRequest {
- for _, r := range archiveInProgress {
- // Need to be referring to the same repository.
- if r.repo.Path == repo.Path && r.commit.ID == commit.ID && r.archiveType == archiveType {
- return r
- }
- }
- return nil
- }
-
- // DeriveRequestFrom creates an archival request, based on the URI. The
- // resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
- // if it's determined that the request still needs to be satisfied.
- func DeriveRequestFrom(ctx *context.Context, uri string) *ArchiveRequest {
- if ctx.Repo == nil || ctx.Repo.GitRepo == nil {
- log.Trace("Repo not initialized")
- return nil
- }
- r := &ArchiveRequest{
- uri: uri,
- repo: ctx.Repo.GitRepo,
- }
-
- switch {
- case strings.HasSuffix(uri, ".zip"):
- r.ext = ".zip"
- r.archivePath = path.Join(r.repo.Path, "archives/zip")
- r.archiveType = git.ZIP
- case strings.HasSuffix(uri, ".tar.gz"):
- r.ext = ".tar.gz"
- r.archivePath = path.Join(r.repo.Path, "archives/targz")
- r.archiveType = git.TARGZ
- default:
- log.Trace("Unknown format: %s", uri)
- return nil
- }
-
- r.refName = strings.TrimSuffix(r.uri, r.ext)
- isDir, err := util.IsDir(r.archivePath)
- if err != nil {
- ctx.ServerError("Download -> util.IsDir(archivePath)", err)
- return nil
- }
- if !isDir {
- if err := os.MkdirAll(r.archivePath, os.ModePerm); err != nil {
- ctx.ServerError("Download -> os.MkdirAll(archivePath)", err)
- return nil
- }
- }
-
- // Get corresponding commit.
- if r.repo.IsBranchExist(r.refName) {
- r.commit, err = r.repo.GetBranchCommit(r.refName)
- if err != nil {
- ctx.ServerError("GetBranchCommit", err)
- return nil
- }
- } else if r.repo.IsTagExist(r.refName) {
- r.commit, err = r.repo.GetTagCommit(r.refName)
- if err != nil {
- ctx.ServerError("GetTagCommit", err)
- return nil
- }
- } else if shaRegex.MatchString(r.refName) {
- r.commit, err = r.repo.GetCommit(r.refName)
- if err != nil {
- ctx.NotFound("GetCommit", nil)
- return nil
- }
- } else {
- ctx.NotFound("DeriveRequestFrom", nil)
- return nil
- }
-
- archiveMutex.Lock()
- defer archiveMutex.Unlock()
- if rExisting := getArchiveRequest(r.repo, r.commit, r.archiveType); rExisting != nil {
- return rExisting
- }
-
- r.archivePath = path.Join(r.archivePath, base.ShortSha(r.commit.ID.String())+r.ext)
- r.archiveComplete, err = util.IsFile(r.archivePath)
- if err != nil {
- ctx.ServerError("util.IsFile", err)
- return nil
- }
- return r
- }
-
- func doArchive(r *ArchiveRequest) {
- var (
- err error
- tmpArchive *os.File
- destArchive *os.File
- )
-
- // Close the channel to indicate to potential waiters that this request
- // has finished.
- defer close(r.cchan)
-
- // It could have happened that we enqueued two archival requests, due to
- // race conditions and difficulties in locking. Do one last check that
- // the archive we're referring to doesn't already exist. If it does exist,
- // then just mark the request as complete and move on.
- isFile, err := util.IsFile(r.archivePath)
- if err != nil {
- log.Error("Unable to check if %s util.IsFile: %v. Will ignore and recreate.", r.archivePath, err)
- }
- if isFile {
- r.archiveComplete = true
- return
- }
-
- // Create a temporary file to use while the archive is being built. We
- // will then copy it into place (r.archivePath) once it's fully
- // constructed.
- tmpArchive, err = ioutil.TempFile("", "archive")
- if err != nil {
- log.Error("Unable to create a temporary archive file! Error: %v", err)
- return
- }
- defer func() {
- tmpArchive.Close()
- os.Remove(tmpArchive.Name())
- }()
-
- if err = r.commit.CreateArchive(graceful.GetManager().ShutdownContext(), tmpArchive.Name(), git.CreateArchiveOpts{
- Format: r.archiveType,
- Prefix: setting.Repository.PrefixArchiveFiles,
- }); err != nil {
- log.Error("Download -> CreateArchive "+tmpArchive.Name(), err)
- return
- }
-
- // Now we copy it into place
- if destArchive, err = os.Create(r.archivePath); err != nil {
- log.Error("Unable to open archive " + r.archivePath)
- return
- }
- _, err = io.Copy(destArchive, tmpArchive)
- destArchive.Close()
- if err != nil {
- log.Error("Unable to write archive " + r.archivePath)
- return
- }
-
- // Block any attempt to finalize creating a new request if we're marking
- r.archiveComplete = true
- }
-
- // ArchiveRepository satisfies the ArchiveRequest being passed in. Processing
- // will occur in a separate goroutine, as this phase may take a while to
- // complete. If the archive already exists, ArchiveRepository will not do
- // anything. In all cases, the caller should be examining the *ArchiveRequest
- // being returned for completion, as it may be different than the one they passed
- // in.
- func ArchiveRepository(request *ArchiveRequest) *ArchiveRequest {
- // We'll return the request that's already been enqueued if it has been
- // enqueued, or we'll immediately enqueue it if it has not been enqueued
- // and it is not marked complete.
- archiveMutex.Lock()
- defer archiveMutex.Unlock()
- if rExisting := getArchiveRequest(request.repo, request.commit, request.archiveType); rExisting != nil {
- return rExisting
- }
- if request.archiveComplete {
- return request
- }
-
- request.cchan = make(chan struct{})
- archiveInProgress = append(archiveInProgress, request)
- go func() {
- // Wait to start, if we have the Cond for it. This is currently only
- // useful for testing, so that the start and release of queued entries
- // can be controlled to examine the queue.
- if archiveQueueStartCond != nil {
- archiveQueueMutex.Lock()
- archiveQueueStartCond.Wait()
- archiveQueueMutex.Unlock()
- }
-
- // Drop the mutex while we process the request. This may take a long
- // time, and it's not necessary now that we've added the reequest to
- // archiveInProgress.
- doArchive(request)
-
- if archiveQueueReleaseCond != nil {
- archiveQueueMutex.Lock()
- archiveQueueReleaseCond.Wait()
- archiveQueueMutex.Unlock()
- }
-
- // Purge this request from the list. To do so, we'll just take the
- // index at which we ended up at and swap the final element into that
- // position, then chop off the now-redundant final element. The slice
- // may have change in between these two segments and we may have moved,
- // so we search for it here. We could perhaps avoid this search
- // entirely if len(archiveInProgress) == 1, but we should verify
- // correctness.
- archiveMutex.Lock()
- defer archiveMutex.Unlock()
-
- idx := -1
- for _idx, req := range archiveInProgress {
- if req == request {
- idx = _idx
- break
- }
- }
- if idx == -1 {
- log.Error("ArchiveRepository: Failed to find request for removal.")
- return
- }
- archiveInProgress = append(archiveInProgress[:idx], archiveInProgress[idx+1:]...)
- }()
-
- return request
- }
|