parent
f135cb7c94
commit
6f7cd94a02
@ -39,8 +39,6 @@ import (
|
|||||||
const (
|
const (
|
||||||
unicodeNormalizeName = "unicodeNormalize"
|
unicodeNormalizeName = "unicodeNormalize"
|
||||||
maxBatchSize = 16
|
maxBatchSize = 16
|
||||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
|
||||||
fuzzyDenominator = 4
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||||
@ -245,7 +243,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||||
keywordQuery = phraseQuery
|
keywordQuery = phraseQuery
|
||||||
if opts.IsKeywordFuzzy {
|
if opts.IsKeywordFuzzy {
|
||||||
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
|
phraseQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(opts.RepoIDs) > 0 {
|
if len(opts.RepoIDs) > 0 {
|
||||||
|
@ -47,3 +47,15 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
|
|||||||
|
|
||||||
return index, 0, nil
|
return index, 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GuessFuzzinessByKeyword(s string) int {
|
||||||
|
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
|
||||||
|
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
|
||||||
|
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
|
||||||
|
for _, r := range s {
|
||||||
|
if r >= 128 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min(2, len(s)/4)
|
||||||
|
}
|
||||||
|
@ -35,11 +35,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const maxBatchSize = 16
|
||||||
maxBatchSize = 16
|
|
||||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
|
||||||
fuzzyDenominator = 4
|
|
||||||
)
|
|
||||||
|
|
||||||
// IndexerData an update to the issue indexer
|
// IndexerData an update to the issue indexer
|
||||||
type IndexerData internal.IndexerData
|
type IndexerData internal.IndexerData
|
||||||
@ -162,7 +158,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
|||||||
if options.Keyword != "" {
|
if options.Keyword != "" {
|
||||||
fuzziness := 0
|
fuzziness := 0
|
||||||
if options.IsFuzzyKeyword {
|
if options.IsFuzzyKeyword {
|
||||||
fuzziness = len(options.Keyword) / fuzzyDenominator
|
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
|
||||||
}
|
}
|
||||||
|
|
||||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||||
|
@ -28,6 +28,7 @@ func Search(ctx *context.Context) {
|
|||||||
ctx.Data["Language"] = language
|
ctx.Data["Language"] = language
|
||||||
ctx.Data["IsFuzzy"] = isFuzzy
|
ctx.Data["IsFuzzy"] = isFuzzy
|
||||||
ctx.Data["PageIsViewCode"] = true
|
ctx.Data["PageIsViewCode"] = true
|
||||||
|
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
|
||||||
|
|
||||||
if keyword == "" {
|
if keyword == "" {
|
||||||
ctx.HTML(http.StatusOK, tplSearch)
|
ctx.HTML(http.StatusOK, tplSearch)
|
||||||
@ -86,7 +87,6 @@ func Search(ctx *context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
|
|
||||||
ctx.Data["Repo"] = ctx.Repo.Repository
|
ctx.Data["Repo"] = ctx.Repo.Repository
|
||||||
ctx.Data["SearchResults"] = searchResults
|
ctx.Data["SearchResults"] = searchResults
|
||||||
ctx.Data["SearchResultLanguages"] = searchResultLanguages
|
ctx.Data["SearchResultLanguages"] = searchResultLanguages
|
||||||
|
Loading…
Reference in New Issue
Block a user