forked from Shiloh/githaven
557 lines
17 KiB
Go
Vendored
557 lines
17 KiB
Go
Vendored
// Copyright (c) 2014 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package mapping
|
|
|
|
import (
|
|
"encoding"
|
|
"encoding/json"
|
|
"fmt"
|
|
"reflect"
|
|
"time"
|
|
|
|
"github.com/blevesearch/bleve/registry"
|
|
)
|
|
|
|
// A DocumentMapping describes how a type of document
|
|
// should be indexed.
|
|
// As documents can be hierarchical, named sub-sections
|
|
// of documents are mapped using the same structure in
|
|
// the Properties field.
|
|
// Each value inside a document can be indexed 0 or more
|
|
// ways. These index entries are called fields and
|
|
// are stored in the Fields field.
|
|
// Entire sections of a document can be ignored or
|
|
// excluded by setting Enabled to false.
|
|
// If not explicitly mapped, default mapping operations
|
|
// are used. To disable this automatic handling, set
|
|
// Dynamic to false.
|
|
type DocumentMapping struct {
|
|
Enabled bool `json:"enabled"`
|
|
Dynamic bool `json:"dynamic"`
|
|
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
|
|
Fields []*FieldMapping `json:"fields,omitempty"`
|
|
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
|
|
|
|
// StructTagKey overrides "json" when looking for field names in struct tags
|
|
StructTagKey string `json:"struct_tag_key,omitempty"`
|
|
}
|
|
|
|
func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
|
|
var err error
|
|
if dm.DefaultAnalyzer != "" {
|
|
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, property := range dm.Properties {
|
|
err = property.Validate(cache)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, field := range dm.Fields {
|
|
if field.Analyzer != "" {
|
|
_, err = cache.AnalyzerNamed(field.Analyzer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if field.DateFormat != "" {
|
|
_, err = cache.DateTimeParserNamed(field.DateFormat)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
switch field.Type {
|
|
case "text", "datetime", "number", "boolean", "geopoint":
|
|
default:
|
|
return fmt.Errorf("unknown field type: '%s'", field.Type)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// analyzerNameForPath attempts to first find the field
|
|
// described by this path, then returns the analyzer
|
|
// configured for that field
|
|
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
|
|
field := dm.fieldDescribedByPath(path)
|
|
if field != nil {
|
|
return field.Analyzer
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
|
|
pathElements := decodePath(path)
|
|
if len(pathElements) > 1 {
|
|
// easy case, there is more than 1 path element remaining
|
|
// the next path element must match a property name
|
|
// at this level
|
|
for propName, subDocMapping := range dm.Properties {
|
|
if propName == pathElements[0] {
|
|
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
|
|
}
|
|
}
|
|
} else {
|
|
// just 1 path elememnt
|
|
// first look for property name with empty field
|
|
for propName, subDocMapping := range dm.Properties {
|
|
if propName == pathElements[0] {
|
|
// found property name match, now look at its fields
|
|
for _, field := range subDocMapping.Fields {
|
|
if field.Name == "" || field.Name == pathElements[0] {
|
|
// match
|
|
return field
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// next, walk the properties again, looking for field overriding the name
|
|
for propName, subDocMapping := range dm.Properties {
|
|
if propName != pathElements[0] {
|
|
// property name isn't a match, but field name could override it
|
|
for _, field := range subDocMapping.Fields {
|
|
if field.Name == pathElements[0] {
|
|
return field
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// documentMappingForPath only returns EXACT matches for a sub document
|
|
// or for an explicitly mapped field, if you want to find the
|
|
// closest document mapping to a field not explicitly mapped
|
|
// use closestDocMapping
|
|
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
|
|
pathElements := decodePath(path)
|
|
current := dm
|
|
OUTER:
|
|
for i, pathElement := range pathElements {
|
|
for name, subDocMapping := range current.Properties {
|
|
if name == pathElement {
|
|
current = subDocMapping
|
|
continue OUTER
|
|
}
|
|
}
|
|
// no subDocMapping matches this pathElement
|
|
// only if this is the last element check for field name
|
|
if i == len(pathElements)-1 {
|
|
for _, field := range current.Fields {
|
|
if field.Name == pathElement {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
return current
|
|
}
|
|
|
|
// closestDocMapping findest the most specific document mapping that matches
|
|
// part of the provided path
|
|
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
|
|
pathElements := decodePath(path)
|
|
current := dm
|
|
OUTER:
|
|
for _, pathElement := range pathElements {
|
|
for name, subDocMapping := range current.Properties {
|
|
if name == pathElement {
|
|
current = subDocMapping
|
|
continue OUTER
|
|
}
|
|
}
|
|
break
|
|
}
|
|
return current
|
|
}
|
|
|
|
// NewDocumentMapping returns a new document mapping
|
|
// with all the default values.
|
|
func NewDocumentMapping() *DocumentMapping {
|
|
return &DocumentMapping{
|
|
Enabled: true,
|
|
Dynamic: true,
|
|
}
|
|
}
|
|
|
|
// NewDocumentStaticMapping returns a new document
|
|
// mapping that will not automatically index parts
|
|
// of a document without an explicit mapping.
|
|
func NewDocumentStaticMapping() *DocumentMapping {
|
|
return &DocumentMapping{
|
|
Enabled: true,
|
|
}
|
|
}
|
|
|
|
// NewDocumentDisabledMapping returns a new document
|
|
// mapping that will not perform any indexing.
|
|
func NewDocumentDisabledMapping() *DocumentMapping {
|
|
return &DocumentMapping{}
|
|
}
|
|
|
|
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
|
|
// for the specified named subsection.
|
|
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
|
|
if dm.Properties == nil {
|
|
dm.Properties = make(map[string]*DocumentMapping)
|
|
}
|
|
dm.Properties[property] = sdm
|
|
}
|
|
|
|
// AddFieldMappingsAt adds one or more FieldMappings
|
|
// at the named sub-document. If the named sub-document
|
|
// doesn't yet exist it is created for you.
|
|
// This is a convenience function to make most common
|
|
// mappings more concise.
|
|
// Otherwise, you would:
|
|
// subMapping := NewDocumentMapping()
|
|
// subMapping.AddFieldMapping(fieldMapping)
|
|
// parentMapping.AddSubDocumentMapping(property, subMapping)
|
|
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
|
|
if dm.Properties == nil {
|
|
dm.Properties = make(map[string]*DocumentMapping)
|
|
}
|
|
sdm, ok := dm.Properties[property]
|
|
if !ok {
|
|
sdm = NewDocumentMapping()
|
|
}
|
|
for _, fm := range fms {
|
|
sdm.AddFieldMapping(fm)
|
|
}
|
|
dm.Properties[property] = sdm
|
|
}
|
|
|
|
// AddFieldMapping adds the provided FieldMapping for this section
|
|
// of the document.
|
|
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
|
|
if dm.Fields == nil {
|
|
dm.Fields = make([]*FieldMapping, 0)
|
|
}
|
|
dm.Fields = append(dm.Fields, fm)
|
|
}
|
|
|
|
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
|
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
|
|
|
|
var tmp map[string]json.RawMessage
|
|
err := json.Unmarshal(data, &tmp)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// set defaults for fields which might have been omitted
|
|
dm.Enabled = true
|
|
dm.Dynamic = true
|
|
|
|
var invalidKeys []string
|
|
for k, v := range tmp {
|
|
switch k {
|
|
case "enabled":
|
|
err := json.Unmarshal(v, &dm.Enabled)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "dynamic":
|
|
err := json.Unmarshal(v, &dm.Dynamic)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "default_analyzer":
|
|
err := json.Unmarshal(v, &dm.DefaultAnalyzer)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "properties":
|
|
err := json.Unmarshal(v, &dm.Properties)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "fields":
|
|
err := json.Unmarshal(v, &dm.Fields)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case "struct_tag_key":
|
|
err := json.Unmarshal(v, &dm.StructTagKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
default:
|
|
invalidKeys = append(invalidKeys, k)
|
|
}
|
|
}
|
|
|
|
if MappingJSONStrict && len(invalidKeys) > 0 {
|
|
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
|
|
rv := ""
|
|
current := dm
|
|
for _, pathElement := range path {
|
|
var ok bool
|
|
current, ok = current.Properties[pathElement]
|
|
if !ok {
|
|
break
|
|
}
|
|
if current.DefaultAnalyzer != "" {
|
|
rv = current.DefaultAnalyzer
|
|
}
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
|
|
// allow default "json" tag to be overridden
|
|
structTagKey := dm.StructTagKey
|
|
if structTagKey == "" {
|
|
structTagKey = "json"
|
|
}
|
|
|
|
val := reflect.ValueOf(data)
|
|
if !val.IsValid() {
|
|
return
|
|
}
|
|
|
|
typ := val.Type()
|
|
switch typ.Kind() {
|
|
case reflect.Map:
|
|
// FIXME can add support for other map keys in the future
|
|
if typ.Key().Kind() == reflect.String {
|
|
for _, key := range val.MapKeys() {
|
|
fieldName := key.String()
|
|
fieldVal := val.MapIndex(key).Interface()
|
|
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
|
|
}
|
|
}
|
|
case reflect.Struct:
|
|
for i := 0; i < val.NumField(); i++ {
|
|
field := typ.Field(i)
|
|
fieldName := field.Name
|
|
// anonymous fields of type struct can elide the type name
|
|
if field.Anonymous && field.Type.Kind() == reflect.Struct {
|
|
fieldName = ""
|
|
}
|
|
|
|
// if the field has a name under the specified tag, prefer that
|
|
tag := field.Tag.Get(structTagKey)
|
|
tagFieldName := parseTagName(tag)
|
|
if tagFieldName == "-" {
|
|
continue
|
|
}
|
|
// allow tag to set field name to empty, only if anonymous
|
|
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
|
|
fieldName = tagFieldName
|
|
}
|
|
|
|
if val.Field(i).CanInterface() {
|
|
fieldVal := val.Field(i).Interface()
|
|
newpath := path
|
|
if fieldName != "" {
|
|
newpath = append(path, fieldName)
|
|
}
|
|
dm.processProperty(fieldVal, newpath, indexes, context)
|
|
}
|
|
}
|
|
case reflect.Slice, reflect.Array:
|
|
for i := 0; i < val.Len(); i++ {
|
|
if val.Index(i).CanInterface() {
|
|
fieldVal := val.Index(i).Interface()
|
|
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
|
|
}
|
|
}
|
|
case reflect.Ptr:
|
|
ptrElem := val.Elem()
|
|
if ptrElem.IsValid() && ptrElem.CanInterface() {
|
|
dm.processProperty(ptrElem.Interface(), path, indexes, context)
|
|
}
|
|
case reflect.String:
|
|
dm.processProperty(val.String(), path, indexes, context)
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
dm.processProperty(float64(val.Int()), path, indexes, context)
|
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
dm.processProperty(float64(val.Uint()), path, indexes, context)
|
|
case reflect.Float32, reflect.Float64:
|
|
dm.processProperty(float64(val.Float()), path, indexes, context)
|
|
case reflect.Bool:
|
|
dm.processProperty(val.Bool(), path, indexes, context)
|
|
}
|
|
|
|
}
|
|
|
|
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
|
|
pathString := encodePath(path)
|
|
// look to see if there is a mapping for this field
|
|
subDocMapping := dm.documentMappingForPath(pathString)
|
|
closestDocMapping := dm.closestDocMapping(pathString)
|
|
|
|
// check to see if we even need to do further processing
|
|
if subDocMapping != nil && !subDocMapping.Enabled {
|
|
return
|
|
}
|
|
|
|
propertyValue := reflect.ValueOf(property)
|
|
if !propertyValue.IsValid() {
|
|
// cannot do anything with the zero value
|
|
return
|
|
}
|
|
propertyType := propertyValue.Type()
|
|
switch propertyType.Kind() {
|
|
case reflect.String:
|
|
propertyValueString := propertyValue.String()
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
if fieldMapping.Type == "geopoint" {
|
|
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
|
|
} else {
|
|
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
|
}
|
|
}
|
|
} else if closestDocMapping.Dynamic {
|
|
// automatic indexing behavior
|
|
|
|
// first see if it can be parsed by the default date parser
|
|
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
|
|
if dateTimeParser != nil {
|
|
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
|
|
if err != nil {
|
|
// index as text
|
|
fieldMapping := newTextFieldMappingDynamic(context.im)
|
|
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
|
} else {
|
|
// index as datetime
|
|
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
|
|
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
|
|
}
|
|
}
|
|
}
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
|
|
return
|
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
|
|
return
|
|
case reflect.Float64, reflect.Float32:
|
|
propertyValFloat := propertyValue.Float()
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
|
|
}
|
|
} else if closestDocMapping.Dynamic {
|
|
// automatic indexing behavior
|
|
fieldMapping := newNumericFieldMappingDynamic(context.im)
|
|
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
|
|
}
|
|
case reflect.Bool:
|
|
propertyValBool := propertyValue.Bool()
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
|
|
}
|
|
} else if closestDocMapping.Dynamic {
|
|
// automatic indexing behavior
|
|
fieldMapping := newBooleanFieldMappingDynamic(context.im)
|
|
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
|
|
}
|
|
case reflect.Struct:
|
|
switch property := property.(type) {
|
|
case time.Time:
|
|
// don't descend into the time struct
|
|
if subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processTime(property, pathString, path, indexes, context)
|
|
}
|
|
} else if closestDocMapping.Dynamic {
|
|
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
|
|
fieldMapping.processTime(property, pathString, path, indexes, context)
|
|
}
|
|
case encoding.TextMarshaler:
|
|
txt, err := property.MarshalText()
|
|
if err == nil && subDocMapping != nil {
|
|
// index by explicit mapping
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
if fieldMapping.Type == "text" {
|
|
fieldMapping.processString(string(txt), pathString, path, indexes, context)
|
|
}
|
|
}
|
|
}
|
|
dm.walkDocument(property, path, indexes, context)
|
|
default:
|
|
if subDocMapping != nil {
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
if fieldMapping.Type == "geopoint" {
|
|
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
|
|
}
|
|
}
|
|
}
|
|
dm.walkDocument(property, path, indexes, context)
|
|
}
|
|
case reflect.Map, reflect.Slice:
|
|
if subDocMapping != nil {
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
if fieldMapping.Type == "geopoint" {
|
|
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
|
|
}
|
|
}
|
|
}
|
|
dm.walkDocument(property, path, indexes, context)
|
|
case reflect.Ptr:
|
|
if !propertyValue.IsNil() {
|
|
switch property := property.(type) {
|
|
case encoding.TextMarshaler:
|
|
// ONLY process TextMarshaler if there is an explicit mapping
|
|
// AND all of the fiels are of type text
|
|
// OTHERWISE process field without TextMarshaler
|
|
if subDocMapping != nil {
|
|
allFieldsText := true
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
if fieldMapping.Type != "text" {
|
|
allFieldsText = false
|
|
break
|
|
}
|
|
}
|
|
txt, err := property.MarshalText()
|
|
if err == nil && allFieldsText {
|
|
txtStr := string(txt)
|
|
for _, fieldMapping := range subDocMapping.Fields {
|
|
fieldMapping.processString(txtStr, pathString, path, indexes, context)
|
|
}
|
|
return
|
|
}
|
|
}
|
|
dm.walkDocument(property, path, indexes, context)
|
|
default:
|
|
dm.walkDocument(property, path, indexes, context)
|
|
}
|
|
}
|
|
default:
|
|
dm.walkDocument(property, path, indexes, context)
|
|
}
|
|
}
|