| @@ -5,9 +5,7 @@ | |||||
| package models | package models | ||||
| import ( | import ( | ||||
| "io/ioutil" | |||||
| "os" | |||||
| "path" | |||||
| "fmt" | |||||
| "strconv" | "strconv" | ||||
| "strings" | "strings" | ||||
| @@ -16,8 +14,6 @@ import ( | |||||
| "code.gitea.io/gitea/modules/indexer" | "code.gitea.io/gitea/modules/indexer" | ||||
| "code.gitea.io/gitea/modules/log" | "code.gitea.io/gitea/modules/log" | ||||
| "code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
| "github.com/Unknwon/com" | |||||
| ) | ) | ||||
| // RepoIndexerStatus status of a repo's entry in the repo indexer | // RepoIndexerStatus status of a repo's entry in the repo indexer | ||||
| @@ -132,7 +128,11 @@ func populateRepoIndexer(maxRepoID int64) { | |||||
| } | } | ||||
| func updateRepoIndexer(repo *Repository) error { | func updateRepoIndexer(repo *Repository) error { | ||||
| changes, err := getRepoChanges(repo) | |||||
| sha, err := getDefaultBranchSha(repo) | |||||
| if err != nil { | |||||
| return err | |||||
| } | |||||
| changes, err := getRepoChanges(repo, sha) | |||||
| if err != nil { | if err != nil { | ||||
| return err | return err | ||||
| } else if changes == nil { | } else if changes == nil { | ||||
| @@ -140,12 +140,12 @@ func updateRepoIndexer(repo *Repository) error { | |||||
| } | } | ||||
| batch := indexer.RepoIndexerBatch() | batch := indexer.RepoIndexerBatch() | ||||
| for _, filename := range changes.UpdatedFiles { | |||||
| if err := addUpdate(filename, repo, batch); err != nil { | |||||
| for _, update := range changes.Updates { | |||||
| if err := addUpdate(update, repo, batch); err != nil { | |||||
| return err | return err | ||||
| } | } | ||||
| } | } | ||||
| for _, filename := range changes.RemovedFiles { | |||||
| for _, filename := range changes.RemovedFilenames { | |||||
| if err := addDelete(filename, repo, batch); err != nil { | if err := addDelete(filename, repo, batch); err != nil { | ||||
| return err | return err | ||||
| } | } | ||||
| @@ -153,56 +153,61 @@ func updateRepoIndexer(repo *Repository) error { | |||||
| if err = batch.Flush(); err != nil { | if err = batch.Flush(); err != nil { | ||||
| return err | return err | ||||
| } | } | ||||
| return updateLastIndexSync(repo) | |||||
| return repo.updateIndexerStatus(sha) | |||||
| } | } | ||||
| // repoChanges changes (file additions/updates/removals) to a repo | // repoChanges changes (file additions/updates/removals) to a repo | ||||
| type repoChanges struct { | type repoChanges struct { | ||||
| UpdatedFiles []string | |||||
| RemovedFiles []string | |||||
| Updates []fileUpdate | |||||
| RemovedFilenames []string | |||||
| } | } | ||||
| // getRepoChanges returns changes to repo since last indexer update | |||||
| func getRepoChanges(repo *Repository) (*repoChanges, error) { | |||||
| repoWorkingPool.CheckIn(com.ToStr(repo.ID)) | |||||
| defer repoWorkingPool.CheckOut(com.ToStr(repo.ID)) | |||||
| type fileUpdate struct { | |||||
| Filename string | |||||
| BlobSha string | |||||
| } | |||||
| if err := repo.UpdateLocalCopyBranch(""); err != nil { | |||||
| return nil, err | |||||
| } else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) { | |||||
| // repo does not have any commits yet, so nothing to update | |||||
| return nil, nil | |||||
| } else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil { | |||||
| return nil, err | |||||
| } else if err = repo.getIndexerStatus(); err != nil { | |||||
| func getDefaultBranchSha(repo *Repository) (string, error) { | |||||
| stdout, err := git.NewCommand("show-ref", "-s", repo.DefaultBranch).RunInDir(repo.RepoPath()) | |||||
| if err != nil { | |||||
| return "", err | |||||
| } | |||||
| return strings.TrimSpace(stdout), nil | |||||
| } | |||||
| // getRepoChanges returns changes to repo since last indexer update | |||||
| func getRepoChanges(repo *Repository, revision string) (*repoChanges, error) { | |||||
| if err := repo.getIndexerStatus(); err != nil { | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| if len(repo.IndexerStatus.CommitSha) == 0 { | if len(repo.IndexerStatus.CommitSha) == 0 { | ||||
| return genesisChanges(repo) | |||||
| return genesisChanges(repo, revision) | |||||
| } | } | ||||
| return nonGenesisChanges(repo) | |||||
| return nonGenesisChanges(repo, revision) | |||||
| } | } | ||||
| func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error { | |||||
| filepath := path.Join(repo.LocalCopyPath(), filename) | |||||
| if stat, err := os.Stat(filepath); err != nil { | |||||
| func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error { | |||||
| stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | |||||
| RunInDir(repo.RepoPath()) | |||||
| if err != nil { | |||||
| return err | return err | ||||
| } else if stat.Size() > setting.Indexer.MaxIndexerFileSize { | |||||
| return nil | |||||
| } else if stat.IsDir() { | |||||
| // file could actually be a directory, if it is the root of a submodule. | |||||
| // We do not index submodule contents, so don't do anything. | |||||
| } | |||||
| if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | |||||
| return fmt.Errorf("Misformatted git cat-file output: %v", err) | |||||
| } else if int64(size) > setting.Indexer.MaxIndexerFileSize { | |||||
| return nil | return nil | ||||
| } | } | ||||
| fileContents, err := ioutil.ReadFile(filepath) | |||||
| fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). | |||||
| RunInDirBytes(repo.RepoPath()) | |||||
| if err != nil { | if err != nil { | ||||
| return err | return err | ||||
| } else if !base.IsTextFile(fileContents) { | } else if !base.IsTextFile(fileContents) { | ||||
| return nil | return nil | ||||
| } | } | ||||
| return batch.Add(indexer.RepoIndexerUpdate{ | return batch.Add(indexer.RepoIndexerUpdate{ | ||||
| Filepath: filename, | |||||
| Filepath: update.Filename, | |||||
| Op: indexer.RepoIndexerOpUpdate, | Op: indexer.RepoIndexerOpUpdate, | ||||
| Data: &indexer.RepoIndexerData{ | Data: &indexer.RepoIndexerData{ | ||||
| RepoID: repo.ID, | RepoID: repo.ID, | ||||
| @@ -221,42 +226,76 @@ func addDelete(filename string, repo *Repository, batch *indexer.Batch) error { | |||||
| }) | }) | ||||
| } | } | ||||
| // genesisChanges get changes to add repo to the indexer for the first time | |||||
| func genesisChanges(repo *Repository) (*repoChanges, error) { | |||||
| var changes repoChanges | |||||
| stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath()) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| for _, line := range strings.Split(stdout, "\n") { | |||||
| filename := strings.TrimSpace(line) | |||||
| if len(filename) == 0 { | |||||
| // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command | |||||
| func parseGitLsTreeOutput(stdout string) ([]fileUpdate, error) { | |||||
| lines := strings.Split(stdout, "\n") | |||||
| updates := make([]fileUpdate, 0, len(lines)) | |||||
| for _, line := range lines { | |||||
| // expect line to be "<mode> <object-type> <object-sha>\t<filename>" | |||||
| line = strings.TrimSpace(line) | |||||
| if len(line) == 0 { | |||||
| continue | continue | ||||
| } else if filename[0] == '"' { | |||||
| } | |||||
| firstSpaceIndex := strings.IndexByte(line, ' ') | |||||
| if firstSpaceIndex < 0 { | |||||
| log.Error(4, "Misformatted git ls-tree output: %s", line) | |||||
| continue | |||||
| } | |||||
| tabIndex := strings.IndexByte(line, '\t') | |||||
| if tabIndex < 42+firstSpaceIndex || tabIndex == len(line)-1 { | |||||
| log.Error(4, "Misformatted git ls-tree output: %s", line) | |||||
| continue | |||||
| } | |||||
| if objectType := line[firstSpaceIndex+1 : tabIndex-41]; objectType != "blob" { | |||||
| // submodules appear as commit objects, we do not index submodules | |||||
| continue | |||||
| } | |||||
| blobSha := line[tabIndex-40 : tabIndex] | |||||
| filename := line[tabIndex+1:] | |||||
| if filename[0] == '"' { | |||||
| var err error | |||||
| filename, err = strconv.Unquote(filename) | filename, err = strconv.Unquote(filename) | ||||
| if err != nil { | if err != nil { | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| } | } | ||||
| changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | |||||
| updates = append(updates, fileUpdate{ | |||||
| Filename: filename, | |||||
| BlobSha: blobSha, | |||||
| }) | |||||
| } | |||||
| return updates, nil | |||||
| } | |||||
| // genesisChanges get changes to add repo to the indexer for the first time | |||||
| func genesisChanges(repo *Repository, revision string) (*repoChanges, error) { | |||||
| var changes repoChanges | |||||
| stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). | |||||
| RunInDir(repo.RepoPath()) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | } | ||||
| return &changes, nil | |||||
| changes.Updates, err = parseGitLsTreeOutput(stdout) | |||||
| return &changes, err | |||||
| } | } | ||||
| // nonGenesisChanges get changes since the previous indexer update | // nonGenesisChanges get changes since the previous indexer update | ||||
| func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | |||||
| func nonGenesisChanges(repo *Repository, revision string) (*repoChanges, error) { | |||||
| diffCmd := git.NewCommand("diff", "--name-status", | diffCmd := git.NewCommand("diff", "--name-status", | ||||
| repo.IndexerStatus.CommitSha, "HEAD") | |||||
| stdout, err := diffCmd.RunInDir(repo.LocalCopyPath()) | |||||
| repo.IndexerStatus.CommitSha, revision) | |||||
| stdout, err := diffCmd.RunInDir(repo.RepoPath()) | |||||
| if err != nil { | if err != nil { | ||||
| // previous commit sha may have been removed by a force push, so | // previous commit sha may have been removed by a force push, so | ||||
| // try rebuilding from scratch | // try rebuilding from scratch | ||||
| log.Warn("git diff: %v", err) | |||||
| if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { | if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| return genesisChanges(repo) | |||||
| return genesisChanges(repo, revision) | |||||
| } | } | ||||
| var changes repoChanges | var changes repoChanges | ||||
| updatedFilenames := make([]string, 0, 10) | |||||
| for _, line := range strings.Split(stdout, "\n") { | for _, line := range strings.Split(stdout, "\n") { | ||||
| line = strings.TrimSpace(line) | line = strings.TrimSpace(line) | ||||
| if len(line) == 0 { | if len(line) == 0 { | ||||
| @@ -274,23 +313,22 @@ func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | |||||
| switch status := line[0]; status { | switch status := line[0]; status { | ||||
| case 'M', 'A': | case 'M', 'A': | ||||
| changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | |||||
| updatedFilenames = append(updatedFilenames, filename) | |||||
| case 'D': | case 'D': | ||||
| changes.RemovedFiles = append(changes.RemovedFiles, filename) | |||||
| changes.RemovedFilenames = append(changes.RemovedFilenames, filename) | |||||
| default: | default: | ||||
| log.Warn("Unrecognized status: %c (line=%s)", status, line) | log.Warn("Unrecognized status: %c (line=%s)", status, line) | ||||
| } | } | ||||
| } | } | ||||
| return &changes, nil | |||||
| } | |||||
| func updateLastIndexSync(repo *Repository) error { | |||||
| stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath()) | |||||
| cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") | |||||
| cmd.AddArguments(updatedFilenames...) | |||||
| stdout, err = cmd.RunInDir(repo.RepoPath()) | |||||
| if err != nil { | if err != nil { | ||||
| return err | |||||
| return nil, err | |||||
| } | } | ||||
| sha := strings.TrimSpace(stdout) | |||||
| return repo.updateIndexerStatus(sha) | |||||
| changes.Updates, err = parseGitLsTreeOutput(stdout) | |||||
| return &changes, err | |||||
| } | } | ||||
| func processRepoIndexerOperationQueue() { | func processRepoIndexerOperationQueue() { | ||||