Indexed search of repository contents (for default branch only)tags/v1.21.12.1
| @@ -192,7 +192,11 @@ ITERATE_BUFFER_SIZE = 50 | |||
| [indexer] | |||
| ISSUE_INDEXER_PATH = indexers/issues.bleve | |||
| ; repo indexer by default disabled, since it uses a lot of disk space | |||
| REPO_INDEXER_ENABLED = false | |||
| REPO_INDEXER_PATH = indexers/repos.bleve | |||
| UPDATE_BUFFER_LEN = 20 | |||
| MAX_FILE_SIZE = 1048576 | |||
| [admin] | |||
| ; Disable regular (non-admin) users to create organizations | |||
| @@ -63,6 +63,10 @@ func TestMain(m *testing.M) { | |||
| fmt.Printf("os.RemoveAll: %v\n", err) | |||
| os.Exit(1) | |||
| } | |||
| if err = os.RemoveAll(setting.Indexer.RepoPath); err != nil { | |||
| fmt.Printf("Unable to remove repo indexer: %v\n", err) | |||
| os.Exit(1) | |||
| } | |||
| os.Exit(exitCode) | |||
| } | |||
| @@ -12,6 +12,8 @@ PATH = data/gitea.db | |||
| [indexer] | |||
| ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve | |||
| REPO_INDEXER_ENABLED = true | |||
| REPO_INDEXER_PATH = integrations/indexers-mysql/repos.bleve | |||
| [repository] | |||
| ROOT = integrations/gitea-integration-mysql/gitea-repositories | |||
| @@ -12,6 +12,8 @@ PATH = data/gitea.db | |||
| [indexer] | |||
| ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve | |||
| REPO_INDEXER_ENABLED = true | |||
| REPO_INDEXER_PATH = integrations/indexers-pgsql/repos.bleve | |||
| [repository] | |||
| ROOT = integrations/gitea-integration-pgsql/gitea-repositories | |||
| @@ -0,0 +1,35 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package integrations | |||
| import ( | |||
| "net/http" | |||
| "testing" | |||
| "github.com/PuerkitoBio/goquery" | |||
| "github.com/stretchr/testify/assert" | |||
| ) | |||
| func resultFilenames(t testing.TB, doc *HTMLDoc) []string { | |||
| resultsSelection := doc.doc.Find(".repository.search") | |||
| assert.EqualValues(t, 1, resultsSelection.Length(), | |||
| "Invalid template (repo search template has changed?)") | |||
| filenameSelections := resultsSelection.Find(".repo-search-result").Find(".header").Find("span.file") | |||
| result := make([]string, filenameSelections.Length()) | |||
| filenameSelections.Each(func(i int, selection *goquery.Selection) { | |||
| result[i] = selection.Text() | |||
| }) | |||
| return result | |||
| } | |||
| func TestSearchRepo(t *testing.T) { | |||
| prepareTestEnv(t) | |||
| req := NewRequestf(t, "GET", "/user2/repo1/search?q=Description&page=1") | |||
| resp := MakeRequest(t, req, http.StatusOK) | |||
| filenames := resultFilenames(t, NewHTMLParser(t, resp.Body)) | |||
| assert.EqualValues(t, []string{"README.md"}, filenames) | |||
| } | |||
| @@ -7,6 +7,8 @@ PATH = :memory: | |||
| [indexer] | |||
| ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve | |||
| REPO_INDEXER_ENABLED = true | |||
| REPO_INDEXER_PATH = integrations/indexers-sqlite/repos.bleve | |||
| [repository] | |||
| ROOT = integrations/gitea-integration-sqlite/gitea-repositories | |||
| @@ -0,0 +1 @@ | |||
| [] # empty | |||
| @@ -144,6 +144,8 @@ var migrations = []Migration{ | |||
| NewMigration("remove organization watch repositories", removeOrganizationWatchRepo), | |||
| // v47 -> v48 | |||
| NewMigration("add deleted branches", addDeletedBranch), | |||
| // v48 -> v49 | |||
| NewMigration("add repo indexer status", addRepoIndexerStatus), | |||
| } | |||
| // Migrate database to current version | |||
| @@ -0,0 +1,25 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package migrations | |||
| import ( | |||
| "fmt" | |||
| "github.com/go-xorm/xorm" | |||
| ) | |||
| func addRepoIndexerStatus(x *xorm.Engine) error { | |||
| // RepoIndexerStatus see models/repo_indexer.go | |||
| type RepoIndexerStatus struct { | |||
| ID int64 `xorm:"pk autoincr"` | |||
| RepoID int64 `xorm:"INDEX NOT NULL"` | |||
| CommitSha string `xorm:"VARCHAR(40)"` | |||
| } | |||
| if err := x.Sync2(new(RepoIndexerStatus)); err != nil { | |||
| return fmt.Errorf("Sync2: %v", err) | |||
| } | |||
| return nil | |||
| } | |||
| @@ -13,6 +13,10 @@ import ( | |||
| "path" | |||
| "strings" | |||
| "code.gitea.io/gitea/modules/log" | |||
| "code.gitea.io/gitea/modules/setting" | |||
| "code.gitea.io/gitea/modules/util" | |||
| // Needed for the MySQL driver | |||
| _ "github.com/go-sql-driver/mysql" | |||
| "github.com/go-xorm/core" | |||
| @@ -23,9 +27,6 @@ import ( | |||
| // Needed for the MSSSQL driver | |||
| _ "github.com/denisenkom/go-mssqldb" | |||
| "code.gitea.io/gitea/modules/log" | |||
| "code.gitea.io/gitea/modules/setting" | |||
| ) | |||
| // Engine represents a xorm engine or session. | |||
| @@ -115,6 +116,7 @@ func init() { | |||
| new(Stopwatch), | |||
| new(TrackedTime), | |||
| new(DeletedBranch), | |||
| new(RepoIndexerStatus), | |||
| ) | |||
| gonicNames := []string{"SSL", "UID"} | |||
| @@ -150,8 +152,13 @@ func LoadConfigs() { | |||
| DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500) | |||
| sec = setting.Cfg.Section("indexer") | |||
| setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve") | |||
| setting.Indexer.IssuePath = absolutePath( | |||
| sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")) | |||
| setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false) | |||
| setting.Indexer.RepoPath = absolutePath( | |||
| sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve")) | |||
| setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) | |||
| setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024) | |||
| } | |||
| // parsePostgreSQLHostPort parses given input in various forms defined in | |||
| @@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error { | |||
| } | |||
| return x.DumpTablesToFile(tbs, filePath) | |||
| } | |||
| // absolutePath make path absolute if it is relative | |||
| func absolutePath(path string) string { | |||
| workDir, err := setting.WorkDir() | |||
| if err != nil { | |||
| log.Fatal(4, "Failed to get work directory: %v", err) | |||
| } | |||
| return util.EnsureAbsolutePath(path, workDir) | |||
| } | |||
| @@ -205,10 +205,11 @@ type Repository struct { | |||
| ExternalMetas map[string]string `xorm:"-"` | |||
| Units []*RepoUnit `xorm:"-"` | |||
| IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"` | |||
| ForkID int64 `xorm:"INDEX"` | |||
| BaseRepo *Repository `xorm:"-"` | |||
| Size int64 `xorm:"NOT NULL DEFAULT 0"` | |||
| IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"` | |||
| ForkID int64 `xorm:"INDEX"` | |||
| BaseRepo *Repository `xorm:"-"` | |||
| Size int64 `xorm:"NOT NULL DEFAULT 0"` | |||
| IndexerStatus *RepoIndexerStatus `xorm:"-"` | |||
| Created time.Time `xorm:"-"` | |||
| CreatedUnix int64 `xorm:"INDEX created"` | |||
| @@ -782,8 +783,10 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error { | |||
| if err != nil { | |||
| return fmt.Errorf("git fetch origin: %v", err) | |||
| } | |||
| if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil { | |||
| return fmt.Errorf("git reset --hard origin/%s: %v", branch, err) | |||
| if len(branch) > 0 { | |||
| if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil { | |||
| return fmt.Errorf("git reset --hard origin/%s: %v", branch, err) | |||
| } | |||
| } | |||
| } | |||
| return nil | |||
| @@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err | |||
| if err = SyncReleasesWithTags(repo, gitRepo); err != nil { | |||
| log.Error(4, "Failed to synchronize tags to releases for repository: %v", err) | |||
| } | |||
| UpdateRepoIndexer(repo) | |||
| } | |||
| if err = repo.UpdateSize(); err != nil { | |||
| @@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error { | |||
| go HookQueue.Add(repo.ID) | |||
| } | |||
| DeleteRepoFromIndexer(repo) | |||
| return nil | |||
| } | |||
| @@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) ( | |||
| if err != nil { | |||
| return fmt.Errorf("PushUpdate: %v", err) | |||
| } | |||
| UpdateRepoIndexer(repo) | |||
| return nil | |||
| } | |||
| @@ -0,0 +1,302 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package models | |||
| import ( | |||
| "io/ioutil" | |||
| "os" | |||
| "path" | |||
| "strconv" | |||
| "strings" | |||
| "code.gitea.io/git" | |||
| "code.gitea.io/gitea/modules/base" | |||
| "code.gitea.io/gitea/modules/indexer" | |||
| "code.gitea.io/gitea/modules/log" | |||
| "code.gitea.io/gitea/modules/setting" | |||
| "github.com/Unknwon/com" | |||
| ) | |||
| // RepoIndexerStatus status of a repo's entry in the repo indexer | |||
| // For now, implicitly refers to default branch | |||
| type RepoIndexerStatus struct { | |||
| ID int64 `xorm:"pk autoincr"` | |||
| RepoID int64 `xorm:"INDEX"` | |||
| CommitSha string `xorm:"VARCHAR(40)"` | |||
| } | |||
| func (repo *Repository) getIndexerStatus() error { | |||
| if repo.IndexerStatus != nil { | |||
| return nil | |||
| } | |||
| status := &RepoIndexerStatus{RepoID: repo.ID} | |||
| has, err := x.Get(status) | |||
| if err != nil { | |||
| return err | |||
| } else if !has { | |||
| status.CommitSha = "" | |||
| } | |||
| repo.IndexerStatus = status | |||
| return nil | |||
| } | |||
| func (repo *Repository) updateIndexerStatus(sha string) error { | |||
| if err := repo.getIndexerStatus(); err != nil { | |||
| return err | |||
| } | |||
| if len(repo.IndexerStatus.CommitSha) == 0 { | |||
| repo.IndexerStatus.CommitSha = sha | |||
| _, err := x.Insert(repo.IndexerStatus) | |||
| return err | |||
| } | |||
| repo.IndexerStatus.CommitSha = sha | |||
| _, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha"). | |||
| Update(repo.IndexerStatus) | |||
| return err | |||
| } | |||
| type repoIndexerOperation struct { | |||
| repo *Repository | |||
| deleted bool | |||
| } | |||
| var repoIndexerOperationQueue chan repoIndexerOperation | |||
| // InitRepoIndexer initialize the repo indexer | |||
| func InitRepoIndexer() { | |||
| if !setting.Indexer.RepoIndexerEnabled { | |||
| return | |||
| } | |||
| indexer.InitRepoIndexer(populateRepoIndexer) | |||
| repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength) | |||
| go processRepoIndexerOperationQueue() | |||
| } | |||
| // populateRepoIndexer populate the repo indexer with data | |||
| func populateRepoIndexer() error { | |||
| log.Info("Populating repository indexer (this may take a while)") | |||
| for page := 1; ; page++ { | |||
| repos, _, err := SearchRepositoryByName(&SearchRepoOptions{ | |||
| Page: page, | |||
| PageSize: 10, | |||
| OrderBy: SearchOrderByID, | |||
| Private: true, | |||
| }) | |||
| if err != nil { | |||
| return err | |||
| } else if len(repos) == 0 { | |||
| return nil | |||
| } | |||
| for _, repo := range repos { | |||
| if err = updateRepoIndexer(repo); err != nil { | |||
| // only log error, since this should not prevent | |||
| // gitea from starting up | |||
| log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err) | |||
| } | |||
| } | |||
| } | |||
| } | |||
| type updateBatch struct { | |||
| updates []indexer.RepoIndexerUpdate | |||
| } | |||
| func updateRepoIndexer(repo *Repository) error { | |||
| changes, err := getRepoChanges(repo) | |||
| if err != nil { | |||
| return err | |||
| } else if changes == nil { | |||
| return nil | |||
| } | |||
| batch := indexer.RepoIndexerBatch() | |||
| for _, filename := range changes.UpdatedFiles { | |||
| if err := addUpdate(filename, repo, batch); err != nil { | |||
| return err | |||
| } | |||
| } | |||
| for _, filename := range changes.RemovedFiles { | |||
| if err := addDelete(filename, repo, batch); err != nil { | |||
| return err | |||
| } | |||
| } | |||
| if err = batch.Flush(); err != nil { | |||
| return err | |||
| } | |||
| return updateLastIndexSync(repo) | |||
| } | |||
| // repoChanges changes (file additions/updates/removals) to a repo | |||
| type repoChanges struct { | |||
| UpdatedFiles []string | |||
| RemovedFiles []string | |||
| } | |||
| // getRepoChanges returns changes to repo since last indexer update | |||
| func getRepoChanges(repo *Repository) (*repoChanges, error) { | |||
| repoWorkingPool.CheckIn(com.ToStr(repo.ID)) | |||
| defer repoWorkingPool.CheckOut(com.ToStr(repo.ID)) | |||
| if err := repo.UpdateLocalCopyBranch(""); err != nil { | |||
| return nil, err | |||
| } else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) { | |||
| // repo does not have any commits yet, so nothing to update | |||
| return nil, nil | |||
| } else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil { | |||
| return nil, err | |||
| } else if err = repo.getIndexerStatus(); err != nil { | |||
| return nil, err | |||
| } | |||
| if len(repo.IndexerStatus.CommitSha) == 0 { | |||
| return genesisChanges(repo) | |||
| } | |||
| return nonGenesisChanges(repo) | |||
| } | |||
| func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error { | |||
| filepath := path.Join(repo.LocalCopyPath(), filename) | |||
| if stat, err := os.Stat(filepath); err != nil { | |||
| return err | |||
| } else if stat.Size() > setting.Indexer.MaxIndexerFileSize { | |||
| return nil | |||
| } | |||
| fileContents, err := ioutil.ReadFile(filepath) | |||
| if err != nil { | |||
| return err | |||
| } else if !base.IsTextFile(fileContents) { | |||
| return nil | |||
| } | |||
| return batch.Add(indexer.RepoIndexerUpdate{ | |||
| Filepath: filename, | |||
| Op: indexer.RepoIndexerOpUpdate, | |||
| Data: &indexer.RepoIndexerData{ | |||
| RepoID: repo.ID, | |||
| Content: string(fileContents), | |||
| }, | |||
| }) | |||
| } | |||
| func addDelete(filename string, repo *Repository, batch *indexer.Batch) error { | |||
| return batch.Add(indexer.RepoIndexerUpdate{ | |||
| Filepath: filename, | |||
| Op: indexer.RepoIndexerOpDelete, | |||
| Data: &indexer.RepoIndexerData{ | |||
| RepoID: repo.ID, | |||
| }, | |||
| }) | |||
| } | |||
| // genesisChanges get changes to add repo to the indexer for the first time | |||
| func genesisChanges(repo *Repository) (*repoChanges, error) { | |||
| var changes repoChanges | |||
| stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath()) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| for _, line := range strings.Split(stdout, "\n") { | |||
| filename := strings.TrimSpace(line) | |||
| if len(filename) == 0 { | |||
| continue | |||
| } else if filename[0] == '"' { | |||
| filename, err = strconv.Unquote(filename) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | |||
| } | |||
| return &changes, nil | |||
| } | |||
| // nonGenesisChanges get changes since the previous indexer update | |||
| func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | |||
| diffCmd := git.NewCommand("diff", "--name-status", | |||
| repo.IndexerStatus.CommitSha, "HEAD") | |||
| stdout, err := diffCmd.RunInDir(repo.LocalCopyPath()) | |||
| if err != nil { | |||
| // previous commit sha may have been removed by a force push, so | |||
| // try rebuilding from scratch | |||
| if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { | |||
| return nil, err | |||
| } | |||
| return genesisChanges(repo) | |||
| } | |||
| var changes repoChanges | |||
| for _, line := range strings.Split(stdout, "\n") { | |||
| line = strings.TrimSpace(line) | |||
| if len(line) == 0 { | |||
| continue | |||
| } | |||
| filename := strings.TrimSpace(line[1:]) | |||
| if len(filename) == 0 { | |||
| continue | |||
| } else if filename[0] == '"' { | |||
| filename, err = strconv.Unquote(filename) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| switch status := line[0]; status { | |||
| case 'M', 'A': | |||
| changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | |||
| case 'D': | |||
| changes.RemovedFiles = append(changes.RemovedFiles, filename) | |||
| default: | |||
| log.Warn("Unrecognized status: %c (line=%s)", status, line) | |||
| } | |||
| } | |||
| return &changes, nil | |||
| } | |||
| func updateLastIndexSync(repo *Repository) error { | |||
| stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath()) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| sha := strings.TrimSpace(stdout) | |||
| return repo.updateIndexerStatus(sha) | |||
| } | |||
| func processRepoIndexerOperationQueue() { | |||
| for { | |||
| op := <-repoIndexerOperationQueue | |||
| if op.deleted { | |||
| if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil { | |||
| log.Error(4, "DeleteRepoFromIndexer: %v", err) | |||
| } | |||
| } else { | |||
| if err := updateRepoIndexer(op.repo); err != nil { | |||
| log.Error(4, "updateRepoIndexer: %v", err) | |||
| } | |||
| } | |||
| } | |||
| } | |||
| // DeleteRepoFromIndexer remove all of a repository's entries from the indexer | |||
| func DeleteRepoFromIndexer(repo *Repository) { | |||
| addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true}) | |||
| } | |||
| // UpdateRepoIndexer update a repository's entries in the indexer | |||
| func UpdateRepoIndexer(repo *Repository) { | |||
| addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false}) | |||
| } | |||
| func addOperationToQueue(op repoIndexerOperation) { | |||
| if !setting.Indexer.RepoIndexerEnabled { | |||
| return | |||
| } | |||
| select { | |||
| case repoIndexerOperationQueue <- op: | |||
| break | |||
| default: | |||
| go func() { | |||
| repoIndexerOperationQueue <- op | |||
| }() | |||
| } | |||
| } | |||
| @@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) { | |||
| commits = ListToPushCommits(l) | |||
| } | |||
| if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch { | |||
| UpdateRepoIndexer(repo) | |||
| } | |||
| if err := CommitRepoAction(CommitRepoActionOptions{ | |||
| PusherName: opts.PusherName, | |||
| RepoOwnerID: owner.ID, | |||
| @@ -367,6 +367,7 @@ func RepoAssignment() macaron.Handler { | |||
| ctx.Data["DisableSSH"] = setting.SSH.Disabled | |||
| ctx.Data["ExposeAnonSSH"] = setting.SSH.ExposeAnonymous | |||
| ctx.Data["DisableHTTP"] = setting.Repository.DisableHTTPGit | |||
| ctx.Data["RepoSearchEnabled"] = setting.Indexer.RepoIndexerEnabled | |||
| ctx.Data["CloneLink"] = repo.CloneLink() | |||
| ctx.Data["WikiCloneLink"] = repo.WikiCloneLink() | |||
| @@ -0,0 +1,199 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package indexer | |||
| import ( | |||
| "os" | |||
| "strings" | |||
| "code.gitea.io/gitea/modules/log" | |||
| "code.gitea.io/gitea/modules/setting" | |||
| "github.com/blevesearch/bleve" | |||
| "github.com/blevesearch/bleve/analysis/analyzer/custom" | |||
| "github.com/blevesearch/bleve/analysis/token/camelcase" | |||
| "github.com/blevesearch/bleve/analysis/token/lowercase" | |||
| "github.com/blevesearch/bleve/analysis/tokenizer/unicode" | |||
| ) | |||
| const repoIndexerAnalyzer = "repoIndexerAnalyzer" | |||
| // repoIndexer (thread-safe) index for repository contents | |||
| var repoIndexer bleve.Index | |||
| // RepoIndexerOp type of operation to perform on repo indexer | |||
| type RepoIndexerOp int | |||
| const ( | |||
| // RepoIndexerOpUpdate add/update a file's contents | |||
| RepoIndexerOpUpdate = iota | |||
| // RepoIndexerOpDelete delete a file | |||
| RepoIndexerOpDelete | |||
| ) | |||
| // RepoIndexerData data stored in the repo indexer | |||
| type RepoIndexerData struct { | |||
| RepoID int64 | |||
| Content string | |||
| } | |||
| // RepoIndexerUpdate an update to the repo indexer | |||
| type RepoIndexerUpdate struct { | |||
| Filepath string | |||
| Op RepoIndexerOp | |||
| Data *RepoIndexerData | |||
| } | |||
| func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error { | |||
| id := filenameIndexerID(update.Data.RepoID, update.Filepath) | |||
| switch update.Op { | |||
| case RepoIndexerOpUpdate: | |||
| return batch.Index(id, update.Data) | |||
| case RepoIndexerOpDelete: | |||
| batch.Delete(id) | |||
| default: | |||
| log.Error(4, "Unrecognized repo indexer op: %d", update.Op) | |||
| } | |||
| return nil | |||
| } | |||
| // InitRepoIndexer initialize repo indexer | |||
| func InitRepoIndexer(populateIndexer func() error) { | |||
| _, err := os.Stat(setting.Indexer.RepoPath) | |||
| if err != nil { | |||
| if os.IsNotExist(err) { | |||
| if err = createRepoIndexer(); err != nil { | |||
| log.Fatal(4, "CreateRepoIndexer: %v", err) | |||
| } | |||
| if err = populateIndexer(); err != nil { | |||
| log.Fatal(4, "PopulateRepoIndex: %v", err) | |||
| } | |||
| } else { | |||
| log.Fatal(4, "InitRepoIndexer: %v", err) | |||
| } | |||
| } else { | |||
| repoIndexer, err = bleve.Open(setting.Indexer.RepoPath) | |||
| if err != nil { | |||
| log.Fatal(4, "InitRepoIndexer, open index: %v", err) | |||
| } | |||
| } | |||
| } | |||
| // createRepoIndexer create a repo indexer if one does not already exist | |||
| func createRepoIndexer() error { | |||
| docMapping := bleve.NewDocumentMapping() | |||
| docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) | |||
| textFieldMapping := bleve.NewTextFieldMapping() | |||
| docMapping.AddFieldMappingsAt("Content", textFieldMapping) | |||
| mapping := bleve.NewIndexMapping() | |||
| if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { | |||
| return err | |||
| } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{ | |||
| "type": custom.Name, | |||
| "char_filters": []string{}, | |||
| "tokenizer": unicode.Name, | |||
| "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, | |||
| }); err != nil { | |||
| return err | |||
| } | |||
| mapping.DefaultAnalyzer = repoIndexerAnalyzer | |||
| mapping.AddDocumentMapping("repo", docMapping) | |||
| var err error | |||
| repoIndexer, err = bleve.New(setting.Indexer.RepoPath, mapping) | |||
| return err | |||
| } | |||
| func filenameIndexerID(repoID int64, filename string) string { | |||
| return indexerID(repoID) + "_" + filename | |||
| } | |||
| func filenameOfIndexerID(indexerID string) string { | |||
| index := strings.IndexByte(indexerID, '_') | |||
| if index == -1 { | |||
| log.Error(4, "Unexpected ID in repo indexer: %s", indexerID) | |||
| } | |||
| return indexerID[index+1:] | |||
| } | |||
| // RepoIndexerBatch batch to add updates to | |||
| func RepoIndexerBatch() *Batch { | |||
| return &Batch{ | |||
| batch: repoIndexer.NewBatch(), | |||
| index: repoIndexer, | |||
| } | |||
| } | |||
| // DeleteRepoFromIndexer delete all of a repo's files from indexer | |||
| func DeleteRepoFromIndexer(repoID int64) error { | |||
| query := numericEqualityQuery(repoID, "RepoID") | |||
| searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false) | |||
| result, err := repoIndexer.Search(searchRequest) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| batch := RepoIndexerBatch() | |||
| for _, hit := range result.Hits { | |||
| batch.batch.Delete(hit.ID) | |||
| if err = batch.flushIfFull(); err != nil { | |||
| return err | |||
| } | |||
| } | |||
| return batch.Flush() | |||
| } | |||
| // RepoSearchResult result of performing a search in a repo | |||
| type RepoSearchResult struct { | |||
| StartIndex int | |||
| EndIndex int | |||
| Filename string | |||
| Content string | |||
| } | |||
| // SearchRepoByKeyword searches for files in the specified repo. | |||
| // Returns the matching file-paths | |||
| func SearchRepoByKeyword(repoID int64, keyword string, page, pageSize int) (int64, []*RepoSearchResult, error) { | |||
| phraseQuery := bleve.NewMatchPhraseQuery(keyword) | |||
| phraseQuery.FieldVal = "Content" | |||
| phraseQuery.Analyzer = repoIndexerAnalyzer | |||
| indexerQuery := bleve.NewConjunctionQuery( | |||
| numericEqualityQuery(repoID, "RepoID"), | |||
| phraseQuery, | |||
| ) | |||
| from := (page - 1) * pageSize | |||
| searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) | |||
| searchRequest.Fields = []string{"Content"} | |||
| searchRequest.IncludeLocations = true | |||
| result, err := repoIndexer.Search(searchRequest) | |||
| if err != nil { | |||
| return 0, nil, err | |||
| } | |||
| searchResults := make([]*RepoSearchResult, len(result.Hits)) | |||
| for i, hit := range result.Hits { | |||
| var startIndex, endIndex int = -1, -1 | |||
| for _, locations := range hit.Locations["Content"] { | |||
| location := locations[0] | |||
| locationStart := int(location.Start) | |||
| locationEnd := int(location.End) | |||
| if startIndex < 0 || locationStart < startIndex { | |||
| startIndex = locationStart | |||
| } | |||
| if endIndex < 0 || locationEnd > endIndex { | |||
| endIndex = locationEnd | |||
| } | |||
| } | |||
| searchResults[i] = &RepoSearchResult{ | |||
| StartIndex: startIndex, | |||
| EndIndex: endIndex, | |||
| Filename: filenameOfIndexerID(hit.ID), | |||
| Content: hit.Fields["Content"].(string), | |||
| } | |||
| } | |||
| return int64(result.Total), searchResults, nil | |||
| } | |||
| @@ -0,0 +1,128 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package search | |||
| import ( | |||
| "bytes" | |||
| gotemplate "html/template" | |||
| "strings" | |||
| "code.gitea.io/gitea/modules/highlight" | |||
| "code.gitea.io/gitea/modules/indexer" | |||
| "code.gitea.io/gitea/modules/util" | |||
| ) | |||
| // Result a search result to display | |||
| type Result struct { | |||
| Filename string | |||
| HighlightClass string | |||
| LineNumbers []int | |||
| FormattedLines gotemplate.HTML | |||
| } | |||
| func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { | |||
| startIndex := selectionStartIndex | |||
| numLinesBefore := 0 | |||
| for ; startIndex > 0; startIndex-- { | |||
| if content[startIndex-1] == '\n' { | |||
| if numLinesBefore == 1 { | |||
| break | |||
| } | |||
| numLinesBefore++ | |||
| } | |||
| } | |||
| endIndex := selectionEndIndex | |||
| numLinesAfter := 0 | |||
| for ; endIndex < len(content); endIndex++ { | |||
| if content[endIndex] == '\n' { | |||
| if numLinesAfter == 1 { | |||
| break | |||
| } | |||
| numLinesAfter++ | |||
| } | |||
| } | |||
| return startIndex, endIndex | |||
| } | |||
| func writeStrings(buf *bytes.Buffer, strs ...string) error { | |||
| for _, s := range strs { | |||
| _, err := buf.WriteString(s) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| } | |||
| return nil | |||
| } | |||
| func searchResult(result *indexer.RepoSearchResult, startIndex, endIndex int) (*Result, error) { | |||
| startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n") | |||
| var formattedLinesBuffer bytes.Buffer | |||
| contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n") | |||
| lineNumbers := make([]int, len(contentLines)) | |||
| index := startIndex | |||
| for i, line := range contentLines { | |||
| var err error | |||
| if index < result.EndIndex && | |||
| result.StartIndex < index+len(line) && | |||
| result.StartIndex < result.EndIndex { | |||
| openActiveIndex := util.Max(result.StartIndex-index, 0) | |||
| closeActiveIndex := util.Min(result.EndIndex-index, len(line)) | |||
| err = writeStrings(&formattedLinesBuffer, | |||
| `<li>`, | |||
| line[:openActiveIndex], | |||
| `<span class='active'>`, | |||
| line[openActiveIndex:closeActiveIndex], | |||
| `</span>`, | |||
| line[closeActiveIndex:], | |||
| `</li>`, | |||
| ) | |||
| } else { | |||
| err = writeStrings(&formattedLinesBuffer, | |||
| `<li>`, | |||
| line, | |||
| `</li>`, | |||
| ) | |||
| } | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| lineNumbers[i] = startLineNum + i | |||
| index += len(line) | |||
| } | |||
| return &Result{ | |||
| Filename: result.Filename, | |||
| HighlightClass: highlight.FileNameToHighlightClass(result.Filename), | |||
| LineNumbers: lineNumbers, | |||
| FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()), | |||
| }, nil | |||
| } | |||
| // PerformSearch perform a search on a repository | |||
| func PerformSearch(repoID int64, keyword string, page, pageSize int) (int, []*Result, error) { | |||
| if len(keyword) == 0 { | |||
| return 0, nil, nil | |||
| } | |||
| total, results, err := indexer.SearchRepoByKeyword(repoID, keyword, page, pageSize) | |||
| if err != nil { | |||
| return 0, nil, err | |||
| } | |||
| displayResults := make([]*Result, len(results)) | |||
| for i, result := range results { | |||
| startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex) | |||
| displayResults[i], err = searchResult(result, startIndex, endIndex) | |||
| if err != nil { | |||
| return 0, nil, err | |||
| } | |||
| } | |||
| return int(total), displayResults, nil | |||
| } | |||
| @@ -140,8 +140,11 @@ var ( | |||
| // Indexer settings | |||
| Indexer struct { | |||
| IssuePath string | |||
| UpdateQueueLength int | |||
| IssuePath string | |||
| RepoIndexerEnabled bool | |||
| RepoPath string | |||
| UpdateQueueLength int | |||
| MaxIndexerFileSize int64 | |||
| } | |||
| // Webhook settings | |||
| @@ -234,12 +237,13 @@ var ( | |||
| // UI settings | |||
| UI = struct { | |||
| ExplorePagingNum int | |||
| IssuePagingNum int | |||
| FeedMaxCommitNum int | |||
| ThemeColorMetaTag string | |||
| MaxDisplayFileSize int64 | |||
| ShowUserEmail bool | |||
| ExplorePagingNum int | |||
| IssuePagingNum int | |||
| RepoSearchPagingNum int | |||
| FeedMaxCommitNum int | |||
| ThemeColorMetaTag string | |||
| MaxDisplayFileSize int64 | |||
| ShowUserEmail bool | |||
| Admin struct { | |||
| UserPagingNum int | |||
| @@ -256,11 +260,12 @@ var ( | |||
| Keywords string | |||
| } `ini:"ui.meta"` | |||
| }{ | |||
| ExplorePagingNum: 20, | |||
| IssuePagingNum: 10, | |||
| FeedMaxCommitNum: 5, | |||
| ThemeColorMetaTag: `#6cc644`, | |||
| MaxDisplayFileSize: 8388608, | |||
| ExplorePagingNum: 20, | |||
| IssuePagingNum: 10, | |||
| RepoSearchPagingNum: 10, | |||
| FeedMaxCommitNum: 5, | |||
| ThemeColorMetaTag: `#6cc644`, | |||
| MaxDisplayFileSize: 8388608, | |||
| Admin: struct { | |||
| UserPagingNum int | |||
| RepoPagingNum int | |||
| @@ -0,0 +1,16 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package util | |||
| import "path/filepath" | |||
| // EnsureAbsolutePath ensure that a path is absolute, making it | |||
| // relative to absoluteBase if necessary | |||
| func EnsureAbsolutePath(path string, absoluteBase string) string { | |||
| if filepath.IsAbs(path) { | |||
| return path | |||
| } | |||
| return filepath.Join(absoluteBase, path) | |||
| } | |||
| @@ -38,3 +38,19 @@ func OptionalBoolOf(b bool) OptionalBool { | |||
| } | |||
| return OptionalBoolFalse | |||
| } | |||
| // Max max of two ints | |||
| func Max(a, b int) int { | |||
| if a < b { | |||
| return b | |||
| } | |||
| return a | |||
| } | |||
| // Min min of two ints | |||
| func Min(a, b int) int { | |||
| if a > b { | |||
| return b | |||
| } | |||
| return a | |||
| } | |||
| @@ -848,6 +848,10 @@ activity.title.releases_n = %d Releases | |||
| activity.title.releases_published_by = %s published by %s | |||
| activity.published_release_label = Published | |||
| search = Search | |||
| search.search_repo = Search repository | |||
| search.results = Search results for "%s" in <a href="%s">%s</a> | |||
| settings = Settings | |||
| settings.desc = Settings is where you can manage the settings for the repository | |||
| settings.options = Options | |||
| @@ -158,6 +158,11 @@ | |||
| } | |||
| &.file.list { | |||
| .repo-description { | |||
| display: flex; | |||
| justify-content: space-between; | |||
| align-items: center; | |||
| } | |||
| #repo-desc { | |||
| font-size: 1.2em; | |||
| } | |||
| @@ -226,7 +231,7 @@ | |||
| } | |||
| } | |||
| #file-content { | |||
| .non-diff-file-content { | |||
| .header { | |||
| .icon { | |||
| font-size: 1em; | |||
| @@ -244,7 +249,7 @@ | |||
| background: transparent; | |||
| border: 0; | |||
| outline: none; | |||
| } | |||
| } | |||
| .btn-octicon:hover { | |||
| color: #4078c0; | |||
| } | |||
| @@ -323,6 +328,9 @@ | |||
| } | |||
| } | |||
| } | |||
| .active { | |||
| background: #ffffdd; | |||
| } | |||
| } | |||
| } | |||
| @@ -1038,6 +1046,13 @@ | |||
| overflow-x: auto; | |||
| overflow-y: hidden; | |||
| } | |||
| .repo-search-result { | |||
| padding-top: 10px; | |||
| padding-bottom: 10px; | |||
| .lines-num a { | |||
| color: inherit; | |||
| } | |||
| } | |||
| &.quickstart { | |||
| .guide { | |||
| @@ -66,6 +66,7 @@ func GlobalInit() { | |||
| // Booting long running goroutines. | |||
| cron.NewContext() | |||
| models.InitIssueIndexer() | |||
| models.InitRepoIndexer() | |||
| models.InitSyncMirrors() | |||
| models.InitDeliverHooks() | |||
| models.InitTestPullRequests() | |||
| @@ -0,0 +1,46 @@ | |||
| // Copyright 2017 The Gitea Authors. All rights reserved. | |||
| // Use of this source code is governed by a MIT-style | |||
| // license that can be found in the LICENSE file. | |||
| package repo | |||
| import ( | |||
| "path" | |||
| "strings" | |||
| "code.gitea.io/gitea/modules/base" | |||
| "code.gitea.io/gitea/modules/context" | |||
| "code.gitea.io/gitea/modules/search" | |||
| "code.gitea.io/gitea/modules/setting" | |||
| "github.com/Unknwon/paginater" | |||
| ) | |||
| const tplSearch base.TplName = "repo/search" | |||
| // Search render repository search page | |||
| func Search(ctx *context.Context) { | |||
| if !setting.Indexer.RepoIndexerEnabled { | |||
| ctx.Redirect(ctx.Repo.RepoLink, 302) | |||
| return | |||
| } | |||
| keyword := strings.TrimSpace(ctx.Query("q")) | |||
| page := ctx.QueryInt("page") | |||
| if page <= 0 { | |||
| page = 1 | |||
| } | |||
| total, searchResults, err := search.PerformSearch(ctx.Repo.Repository.ID, keyword, page, setting.UI.RepoSearchPagingNum) | |||
| if err != nil { | |||
| ctx.Handle(500, "SearchResults", err) | |||
| return | |||
| } | |||
| ctx.Data["Keyword"] = keyword | |||
| pager := paginater.New(total, setting.UI.RepoSearchPagingNum, page, 5) | |||
| ctx.Data["Page"] = pager | |||
| ctx.Data["SourcePath"] = setting.AppSubURL + "/" + | |||
| path.Join(ctx.Repo.Repository.Owner.Name, ctx.Repo.Repository.Name, "src", ctx.Repo.Repository.DefaultBranch) | |||
| ctx.Data["SearchResults"] = searchResults | |||
| ctx.Data["RequireHighlightJS"] = true | |||
| ctx.Data["PageIsViewCode"] = true | |||
| ctx.HTML(200, tplSearch) | |||
| } | |||
| @@ -649,6 +649,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| m.Group("/:username/:reponame", func() { | |||
| m.Get("/stars", repo.Stars) | |||
| m.Get("/watchers", repo.Watchers) | |||
| m.Get("/search", context.CheckUnit(models.UnitTypeCode), repo.Search) | |||
| }, ignSignIn, context.RepoAssignment(), context.RepoRef(), context.UnitTypes(), context.LoadRepoUnits()) | |||
| m.Group("/:username", func() { | |||
| @@ -3,10 +3,26 @@ | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| {{template "base/alert" .}} | |||
| <p id="repo-desc"> | |||
| {{if .Repository.DescriptionHTML}}<span class="description has-emoji">{{.Repository.DescriptionHTML}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{.i18n.Tr "repo.no_desc"}}</span>{{end}} | |||
| <a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a> | |||
| </p> | |||
| <div class="ui repo-description"> | |||
| <div id="repo-desc"> | |||
| {{if .Repository.DescriptionHTML}}<span class="description has-emoji">{{.Repository.DescriptionHTML}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{.i18n.Tr "repo.no_desc"}}</span>{{end}} | |||
| <a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a> | |||
| </div> | |||
| {{if .RepoSearchEnabled}} | |||
| <div class="ui repo-search"> | |||
| <form class="ui form" action="{{.RepoLink}}/search" method="get"> | |||
| <div class="field"> | |||
| <div class="ui action input"> | |||
| <input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "repo.search.search_repo"}}"> | |||
| <button class="ui icon button" type="submit"> | |||
| <i class="search icon"></i> | |||
| </button> | |||
| </div> | |||
| </div> | |||
| </form> | |||
| </div> | |||
| {{end}} | |||
| </div> | |||
| {{template "repo/sub_menu" .}} | |||
| <div class="ui secondary menu"> | |||
| {{if .PullRequestCtx.Allowed}} | |||
| @@ -0,0 +1,49 @@ | |||
| {{template "base/head" .}} | |||
| <div class="repository file list"> | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| <div class="ui repo-search"> | |||
| <form class="ui form" method="get"> | |||
| <div class="ui fluid action input"> | |||
| <input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "repo.search.search_repo"}}"> | |||
| <button class="ui button" type="submit"> | |||
| <i class="search icon"></i> | |||
| </button> | |||
| </div> | |||
| </form> | |||
| </div> | |||
| {{if .Keyword}} | |||
| <h3> | |||
| {{.i18n.Tr "repo.search.results" .Keyword .RepoLink .RepoName | Str2html}} | |||
| </h3> | |||
| <div class="repository search"> | |||
| {{range $result := .SearchResults}} | |||
| <div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result"> | |||
| <h4 class="ui top attached normal header"> | |||
| <span class="file">{{.Filename}}</span> | |||
| <a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $.SourcePath}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a> | |||
| </h4> | |||
| <div class="ui attached table segment"> | |||
| <div class="file-body file-code code-view"> | |||
| <table> | |||
| <tbody> | |||
| <tr> | |||
| <td class="lines-num"> | |||
| {{range .LineNumbers}} | |||
| <a href="{{EscapePound $.SourcePath}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a> | |||
| {{end}} | |||
| </td> | |||
| <td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td> | |||
| </tr> | |||
| </tbody> | |||
| </table> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| {{end}} | |||
| </div> | |||
| {{template "base/paginate" .}} | |||
| {{end}} | |||
| </div> | |||
| </div> | |||
| {{template "base/footer" .}} | |||
| @@ -1,4 +1,4 @@ | |||
| <div id="file-content" class="{{TabSizeClass .Editorconfig .FileName}}"> | |||
| <div class="{{TabSizeClass .Editorconfig .FileName}} non-diff-file-content"> | |||
| <h4 class="ui top attached header" id="{{if .ReadmeExist}}repo-readme{{else}}repo-read-file{{end}}"> | |||
| {{if .ReadmeExist}} | |||
| <i class="book icon ui left"></i> | |||
| @@ -0,0 +1,78 @@ | |||
| // Copyright (c) 2016 Couchbase, Inc. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package camelcase | |||
| import ( | |||
| "bytes" | |||
| "unicode/utf8" | |||
| "github.com/blevesearch/bleve/analysis" | |||
| "github.com/blevesearch/bleve/registry" | |||
| ) | |||
| const Name = "camelCase" | |||
| // CamelCaseFilter splits a given token into a set of tokens where each resulting token | |||
| // falls into one the following classes: | |||
| // 1) Upper case followed by lower case letters. | |||
| // Terminated by a number, an upper case letter, and a non alpha-numeric symbol. | |||
| // 2) Upper case followed by upper case letters. | |||
| // Terminated by a number, an upper case followed by a lower case letter, and a non alpha-numeric symbol. | |||
| // 3) Lower case followed by lower case letters. | |||
| // Terminated by a number, an upper case letter, and a non alpha-numeric symbol. | |||
| // 4) Number followed by numbers. | |||
| // Terminated by a letter, and a non alpha-numeric symbol. | |||
| // 5) Non alpha-numeric symbol followed by non alpha-numeric symbols. | |||
| // Terminated by a number, and a letter. | |||
| // | |||
| // It does a one-time sequential pass over an input token, from left to right. | |||
| // The scan is greedy and generates the longest substring that fits into one of the classes. | |||
| // | |||
| // See the test file for examples of classes and their parsings. | |||
| type CamelCaseFilter struct{} | |||
| func NewCamelCaseFilter() *CamelCaseFilter { | |||
| return &CamelCaseFilter{} | |||
| } | |||
| func (f *CamelCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream { | |||
| rv := make(analysis.TokenStream, 0, len(input)) | |||
| nextPosition := 1 | |||
| for _, token := range input { | |||
| runeCount := utf8.RuneCount(token.Term) | |||
| runes := bytes.Runes(token.Term) | |||
| p := NewParser(runeCount, nextPosition, token.Start) | |||
| for i := 0; i < runeCount; i++ { | |||
| if i+1 >= runeCount { | |||
| p.Push(runes[i], nil) | |||
| } else { | |||
| p.Push(runes[i], &runes[i+1]) | |||
| } | |||
| } | |||
| rv = append(rv, p.FlushTokens()...) | |||
| nextPosition = p.NextPosition() | |||
| } | |||
| return rv | |||
| } | |||
| func CamelCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
| return NewCamelCaseFilter(), nil | |||
| } | |||
| func init() { | |||
| registry.RegisterTokenFilter(Name, CamelCaseFilterConstructor) | |||
| } | |||
| @@ -0,0 +1,109 @@ | |||
| // Copyright (c) 2016 Couchbase, Inc. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package camelcase | |||
| import ( | |||
| "github.com/blevesearch/bleve/analysis" | |||
| ) | |||
| func (p *Parser) buildTokenFromTerm(buffer []rune) *analysis.Token { | |||
| term := analysis.BuildTermFromRunes(buffer) | |||
| token := &analysis.Token{ | |||
| Term: term, | |||
| Position: p.position, | |||
| Start: p.index, | |||
| End: p.index + len(term), | |||
| } | |||
| p.position++ | |||
| p.index += len(term) | |||
| return token | |||
| } | |||
| // Parser accepts a symbol and passes it to the current state (representing a class). | |||
| // The state can accept it (and accumulate it). Otherwise, the parser creates a new state that | |||
| // starts with the pushed symbol. | |||
| // | |||
| // Parser accumulates a new resulting token every time it switches state. | |||
| // Use FlushTokens() to get the results after the last symbol was pushed. | |||
| type Parser struct { | |||
| bufferLen int | |||
| buffer []rune | |||
| current State | |||
| tokens []*analysis.Token | |||
| position int | |||
| index int | |||
| } | |||
| func NewParser(len, position, index int) *Parser { | |||
| return &Parser{ | |||
| bufferLen: len, | |||
| buffer: make([]rune, 0, len), | |||
| tokens: make([]*analysis.Token, 0, len), | |||
| position: position, | |||
| index: index, | |||
| } | |||
| } | |||
| func (p *Parser) Push(sym rune, peek *rune) { | |||
| if p.current == nil { | |||
| // the start of parsing | |||
| p.current = p.NewState(sym) | |||
| p.buffer = append(p.buffer, sym) | |||
| } else if p.current.Member(sym, peek) { | |||
| // same state, just accumulate | |||
| p.buffer = append(p.buffer, sym) | |||
| } else { | |||
| // the old state is no more, thus convert the buffer | |||
| p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer)) | |||
| // let the new state begin | |||
| p.current = p.NewState(sym) | |||
| p.buffer = make([]rune, 0, p.bufferLen) | |||
| p.buffer = append(p.buffer, sym) | |||
| } | |||
| } | |||
| // Note. States have to have different starting symbols. | |||
| func (p *Parser) NewState(sym rune) State { | |||
| var found State | |||
| found = &LowerCaseState{} | |||
| if found.StartSym(sym) { | |||
| return found | |||
| } | |||
| found = &UpperCaseState{} | |||
| if found.StartSym(sym) { | |||
| return found | |||
| } | |||
| found = &NumberCaseState{} | |||
| if found.StartSym(sym) { | |||
| return found | |||
| } | |||
| return &NonAlphaNumericCaseState{} | |||
| } | |||
| func (p *Parser) FlushTokens() []*analysis.Token { | |||
| p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer)) | |||
| return p.tokens | |||
| } | |||
| func (p *Parser) NextPosition() int { | |||
| return p.position | |||
| } | |||
| @@ -0,0 +1,87 @@ | |||
| // Copyright (c) 2016 Couchbase, Inc. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| package camelcase | |||
| import ( | |||
| "unicode" | |||
| ) | |||
| // States codify the classes that the parser recognizes. | |||
| type State interface { | |||
| // is _sym_ the start character | |||
| StartSym(sym rune) bool | |||
| // is _sym_ a member of a class. | |||
| // peek, the next sym on the tape, can also be used to determine a class. | |||
| Member(sym rune, peek *rune) bool | |||
| } | |||
| type LowerCaseState struct{} | |||
| func (s *LowerCaseState) Member(sym rune, peek *rune) bool { | |||
| return unicode.IsLower(sym) | |||
| } | |||
| func (s *LowerCaseState) StartSym(sym rune) bool { | |||
| return s.Member(sym, nil) | |||
| } | |||
| type UpperCaseState struct { | |||
| startedCollecting bool // denotes that the start character has been read | |||
| collectingUpper bool // denotes if this is a class of all upper case letters | |||
| } | |||
| func (s *UpperCaseState) Member(sym rune, peek *rune) bool { | |||
| if !(unicode.IsLower(sym) || unicode.IsUpper(sym)) { | |||
| return false | |||
| } | |||
| if peek != nil && unicode.IsUpper(sym) && unicode.IsLower(*peek) { | |||
| return false | |||
| } | |||
| if !s.startedCollecting { | |||
| // now we have to determine if upper-case letters are collected. | |||
| s.startedCollecting = true | |||
| s.collectingUpper = unicode.IsUpper(sym) | |||
| return true | |||
| } | |||
| return s.collectingUpper == unicode.IsUpper(sym) | |||
| } | |||
| func (s *UpperCaseState) StartSym(sym rune) bool { | |||
| return unicode.IsUpper(sym) | |||
| } | |||
| type NumberCaseState struct{} | |||
| func (s *NumberCaseState) Member(sym rune, peek *rune) bool { | |||
| return unicode.IsNumber(sym) | |||
| } | |||
| func (s *NumberCaseState) StartSym(sym rune) bool { | |||
| return s.Member(sym, nil) | |||
| } | |||
| type NonAlphaNumericCaseState struct{} | |||
| func (s *NonAlphaNumericCaseState) Member(sym rune, peek *rune) bool { | |||
| return !unicode.IsLower(sym) && !unicode.IsUpper(sym) && !unicode.IsNumber(sym) | |||
| } | |||
| func (s *NonAlphaNumericCaseState) StartSym(sym rune) bool { | |||
| return s.Member(sym, nil) | |||
| } | |||
| @@ -98,6 +98,12 @@ | |||
| "revision": "011b168f7b84ffef05aed6716d73d21b1a33e971", | |||
| "revisionTime": "2017-06-14T16:31:07Z" | |||
| }, | |||
| { | |||
| "checksumSHA1": "xj8o/nQj59yt+o+RZSa0n9V3vKY=", | |||
| "path": "github.com/blevesearch/bleve/analysis/token/camelcase", | |||
| "revision": "174f8ed44a0bf65e7c8fb228b60b58de62654cd2", | |||
| "revisionTime": "2017-06-28T17:18:15Z" | |||
| }, | |||
| { | |||
| "checksumSHA1": "3VIPkl12t1ko4y6DkbPcz+MtQjY=", | |||
| "path": "github.com/blevesearch/bleve/analysis/token/lowercase", | |||