From 4734d43e1422da04f9ff79ea0212f7e9472b55a1 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Mon, 25 Mar 2024 00:05:00 +0800 Subject: [PATCH] Support repo code search without setting up an indexer (#29998) By using git's ability, end users (especially small instance users) do not need to enable the indexer, they could also benefit from the code searching feature. Fix #29996 ![image](https://github.com/go-gitea/gitea/assets/2114189/11b7e458-88a4-480d-b4d7-72ee59406dd1) ![image](https://github.com/go-gitea/gitea/assets/2114189/0fe777d5-c95c-4288-a818-0427680805b6) --------- Co-authored-by: silverwind --- .../administration/repo-indexer.en-us.md | 6 + docs/content/installation/comparison.en-us.md | 3 + modules/git/command.go | 5 +- modules/git/git.go | 8 +- modules/git/grep.go | 112 ++++++++++++++++++ modules/git/grep_test.go | 41 +++++++ modules/indexer/code/search.go | 35 +++--- options/locale/locale_en-US.ini | 1 + routers/web/repo/search.go | 67 +++++++---- templates/repo/home.tmpl | 23 ++-- templates/shared/search/code/search.tmpl | 15 ++- templates/shared/searchbottom.tmpl | 2 + 12 files changed, 253 insertions(+), 65 deletions(-) create mode 100644 modules/git/grep.go create mode 100644 modules/git/grep_test.go diff --git a/docs/content/administration/repo-indexer.en-us.md b/docs/content/administration/repo-indexer.en-us.md index 6dec2d63f..aa8222291 100644 --- a/docs/content/administration/repo-indexer.en-us.md +++ b/docs/content/administration/repo-indexer.en-us.md @@ -17,6 +17,12 @@ menu: # Repository indexer +## Builtin repository code search without indexer + +Users could do repository-level code search without setting up a repository indexer. +The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories. +Better code search support could be achieved by setting up the repository indexer. + ## Setting up the repository indexer Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md): diff --git a/docs/content/installation/comparison.en-us.md b/docs/content/installation/comparison.en-us.md index 1ba4f7ecc..3fb6561f3 100644 --- a/docs/content/installation/comparison.en-us.md +++ b/docs/content/installation/comparison.en-us.md @@ -87,6 +87,9 @@ _Symbols used in table:_ | Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ | +- Gitea has builtin repository-level code search +- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md) + ## Issue Tracker | Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE | diff --git a/modules/git/command.go b/modules/git/command.go index 371109730..22cb275ab 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -367,7 +367,6 @@ type RunStdError interface { error Unwrap() error Stderr() string - IsExitCode(code int) bool } type runStdError struct { @@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string { return r.stderr } -func (r *runStdError) IsExitCode(code int) bool { +func IsErrorExitCode(err error, code int) bool { var exitError *exec.ExitError - if errors.As(r.err, &exitError) { + if errors.As(err, &exitError) { return exitError.ExitCode() == code } return false diff --git a/modules/git/git.go b/modules/git/git.go index f688ea748..e411269f7 100644 --- a/modules/git/git.go +++ b/modules/git/git.go @@ -341,7 +341,7 @@ func checkGitVersionCompatibility(gitVer *version.Version) error { func configSet(key, value string) error { stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil) - if err != nil && !err.IsExitCode(1) { + if err != nil && !IsErrorExitCode(err, 1) { return fmt.Errorf("failed to get git config %s, err: %w", key, err) } @@ -364,7 +364,7 @@ func configSetNonExist(key, value string) error { // already exist return nil } - if err.IsExitCode(1) { + if IsErrorExitCode(err, 1) { // not exist, set new config _, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil) if err != nil { @@ -382,7 +382,7 @@ func configAddNonExist(key, value string) error { // already exist return nil } - if err.IsExitCode(1) { + if IsErrorExitCode(err, 1) { // not exist, add new config _, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil) if err != nil { @@ -403,7 +403,7 @@ func configUnsetAll(key, value string) error { } return nil } - if err.IsExitCode(1) { + if IsErrorExitCode(err, 1) { // not exist return nil } diff --git a/modules/git/grep.go b/modules/git/grep.go new file mode 100644 index 000000000..e53399598 --- /dev/null +++ b/modules/git/grep.go @@ -0,0 +1,112 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "os" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/util" +) + +type GrepResult struct { + Filename string + LineNumbers []int + LineCodes []string +} + +type GrepOptions struct { + RefName string + ContextLineNumber int + IsFuzzy bool +} + +func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) { + stdoutReader, stdoutWriter, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("unable to create os pipe to grep: %w", err) + } + defer func() { + _ = stdoutReader.Close() + _ = stdoutWriter.Close() + }() + + /* + The output is like this ( "^@" means \x00): + + HEAD:.air.toml + 6^@bin = "gitea" + + HEAD:.changelog.yml + 2^@repo: go-gitea/gitea + */ + var results []*GrepResult + cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name") + cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber)) + if opts.IsFuzzy { + words := strings.Fields(search) + for _, word := range words { + cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) + } + } else { + cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) + } + cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD")) + stderr := bytes.Buffer{} + err = cmd.Run(&RunOpts{ + Dir: repo.Path, + Stdout: stdoutWriter, + Stderr: &stderr, + PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { + _ = stdoutWriter.Close() + defer stdoutReader.Close() + + isInBlock := false + scanner := bufio.NewScanner(stdoutReader) + var res *GrepResult + for scanner.Scan() { + line := scanner.Text() + if !isInBlock { + if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok { + isInBlock = true + res = &GrepResult{Filename: filename} + results = append(results, res) + } + continue + } + if line == "" { + if len(results) >= 50 { + cancel() + break + } + isInBlock = false + continue + } + if line == "--" { + continue + } + if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok { + lineNumInt, _ := strconv.Atoi(lineNum) + res.LineNumbers = append(res.LineNumbers, lineNumInt) + res.LineCodes = append(res.LineCodes, lineCode) + } + } + return scanner.Err() + }, + }) + // git grep exits with 1 if no results are found + if IsErrorExitCode(err, 1) && stderr.Len() == 0 { + return nil, nil + } + if err != nil && !errors.Is(err, context.Canceled) { + return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String()) + } + return results, nil +} diff --git a/modules/git/grep_test.go b/modules/git/grep_test.go new file mode 100644 index 000000000..3993fa7ff --- /dev/null +++ b/modules/git/grep_test.go @@ -0,0 +1,41 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGrepSearch(t *testing.T) { + repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo")) + assert.NoError(t, err) + defer repo.Close() + + res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{}) + assert.NoError(t, err) + assert.Equal(t, []*GrepResult{ + { + Filename: "java-hello/main.java", + LineNumbers: []int{3}, + LineCodes: []string{" public static void main(String[] args)"}, + }, + { + Filename: "main.vendor.java", + LineNumbers: []int{3}, + LineCodes: []string{" public static void main(String[] args)"}, + }, + }, res) + + res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{}) + assert.NoError(t, err) + assert.Len(t, res, 0) + + res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{}) + assert.Error(t, err) + assert.Len(t, res, 0) +} diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 51c7595cf..5f35e8073 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error { return nil } +func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine { + // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting + hl, _ := highlight.Code(filename, "", code) + highlightedLines := strings.Split(string(hl), "\n") + + // The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n` + lines := make([]ResultLine, min(len(highlightedLines), len(lineNums))) + for i := 0; i < len(lines); i++ { + lines[i].Num = lineNums[i] + lines[i].FormattedContent = template.HTML(highlightedLines[i]) + } + return lines +} + func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) { startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n") var formattedLinesBuffer bytes.Buffer contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n") - lines := make([]ResultLine, 0, len(contentLines)) + lineNums := make([]int, 0, len(contentLines)) index := startIndex for i, line := range contentLines { var err error @@ -91,29 +105,16 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res line[closeActiveIndex:], ) } else { - err = writeStrings(&formattedLinesBuffer, - line, - ) + err = writeStrings(&formattedLinesBuffer, line) } if err != nil { return nil, err } - lines = append(lines, ResultLine{Num: startLineNum + i}) + lineNums = append(lineNums, startLineNum+i) index += len(line) } - // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting - hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String()) - highlightedLines := strings.Split(string(hl), "\n") - - // The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n` - lines = lines[:min(len(highlightedLines), len(lines))] - highlightedLines = highlightedLines[:len(lines)] - for i := 0; i < len(lines); i++ { - lines[i].FormattedContent = template.HTML(highlightedLines[i]) - } - return &Result{ RepoID: result.RepoID, Filename: result.Filename, @@ -121,7 +122,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res UpdatedUnix: result.UpdatedUnix, Language: result.Language, Color: result.Color, - Lines: lines, + Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()), }, nil } diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 4c52c4eee..07082f99a 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -172,6 +172,7 @@ org_kind = Search orgs... team_kind = Search teams... code_kind = Search code... code_search_unavailable = Code search is currently not available. Please contact the site administrator. +code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer. package_kind = Search packages... project_kind = Search projects... branch_kind = Search branches... diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 0f377a97b..9d65427b8 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -5,9 +5,11 @@ package repo import ( "net/http" + "strings" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/git" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/services/context" @@ -17,11 +19,6 @@ const tplSearch base.TplName = "repo/search" // Search render repository search page func Search(ctx *context.Context) { - if !setting.Indexer.RepoIndexerEnabled { - ctx.Redirect(ctx.Repo.RepoLink) - return - } - language := ctx.FormTrim("l") keyword := ctx.FormTrim("q") @@ -42,26 +39,54 @@ func Search(ctx *context.Context) { page = 1 } - total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: []int64{ctx.Repo.Repository.ID}, - Keyword: keyword, - IsKeywordFuzzy: isFuzzy, - Language: language, - Paginator: &db.ListOptions{ - Page: page, - PageSize: setting.UI.RepoSearchPagingNum, - }, - }) - if err != nil { - if code_indexer.IsAvailable(ctx) { - ctx.ServerError("SearchResults", err) + var total int + var searchResults []*code_indexer.Result + var searchResultLanguages []*code_indexer.SearchResultLanguages + if setting.Indexer.RepoIndexerEnabled { + var err error + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: []int64{ctx.Repo.Repository.ID}, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) + if err != nil { + if code_indexer.IsAvailable(ctx) { + ctx.ServerError("SearchResults", err) + return + } + ctx.Data["CodeIndexerUnavailable"] = true + } else { + ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) + } + } else { + res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy}) + if err != nil { + ctx.ServerError("GrepSearch", err) return } - ctx.Data["CodeIndexerUnavailable"] = true - } else { - ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) + total = len(res) + pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res)) + pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res)) + res = res[pageStart:pageEnd] + for _, r := range res { + searchResults = append(searchResults, &code_indexer.Result{ + RepoID: ctx.Repo.Repository.ID, + Filename: r.Filename, + CommitID: ctx.Repo.CommitID, + // UpdatedUnix: not supported yet + // Language: not supported yet + // Color: not supported yet + Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")), + }) + } } + ctx.Data["CodeIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled ctx.Data["Repo"] = ctx.Repo.Repository ctx.Data["SearchResults"] = searchResults ctx.Data["SearchResultLanguages"] = searchResultLanguages diff --git a/templates/repo/home.tmpl b/templates/repo/home.tmpl index 7b70f70be..2463c768f 100644 --- a/templates/repo/home.tmpl +++ b/templates/repo/home.tmpl @@ -5,27 +5,18 @@ {{template "base/alert" .}} {{template "repo/code/recently_pushed_new_branches" .}} {{if and (not .HideRepoInfo) (not .IsBlame)}} -
-
+
+
{{$description := .Repository.DescriptionHTML $.Context}} {{if $description}}{{$description | RenderCodeBlock}}{{else if .IsRepositoryAdmin}}{{ctx.Locale.Tr "repo.no_desc"}}{{end}} {{.Repository.Website}}
- {{if .RepoSearchEnabled}} -
{{range .Topics}}{{.Name}}{{end}} diff --git a/templates/shared/search/code/search.tmpl b/templates/shared/search/code/search.tmpl index 545ec1ea6..cb873f5a9 100644 --- a/templates/shared/search/code/search.tmpl +++ b/templates/shared/search/code/search.tmpl @@ -7,9 +7,16 @@

{{ctx.Locale.Tr "search.code_search_unavailable"}}

- {{else if .SearchResults}} - {{template "shared/search/code/results" .}} - {{else if .Keyword}} -
{{ctx.Locale.Tr "search.no_results"}}
+ {{else}} + {{if not .CodeIndexerEnabled}} +
+

{{ctx.Locale.Tr "search.code_search_by_git_grep"}}

+
+ {{end}} + {{if .SearchResults}} + {{template "shared/search/code/results" .}} + {{else if .Keyword}} +
{{ctx.Locale.Tr "search.no_results"}}
+ {{end}} {{end}}
diff --git a/templates/shared/searchbottom.tmpl b/templates/shared/searchbottom.tmpl index 43d6092e8..b22324585 100644 --- a/templates/shared/searchbottom.tmpl +++ b/templates/shared/searchbottom.tmpl @@ -1,3 +1,4 @@ +{{if or .result.Language (not .result.UpdatedUnix.IsZero)}}
{{if .result.Language}} @@ -10,3 +11,4 @@ {{end}}
+{{end}}