* Update makefile to use dep * Migrate to dep * Fix some deps * Try to find a better version for golang.org/x/net * Try to find a better version for golang.org/x/oauth2
		
			
				
	
	
		
			360 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			360 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2013 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package cldr
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"encoding/xml"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"unicode"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| // RuleProcessor can be passed to Collator's Process method, which
 | |
| // parses the rules and calls the respective method for each rule found.
 | |
| type RuleProcessor interface {
 | |
| 	Reset(anchor string, before int) error
 | |
| 	Insert(level int, str, context, extend string) error
 | |
| 	Index(id string)
 | |
| }
 | |
| 
 | |
| const (
 | |
| 	// cldrIndex is a Unicode-reserved sentinel value used to mark the start
 | |
| 	// of a grouping within an index.
 | |
| 	// We ignore any rule that starts with this rune.
 | |
| 	// See http://unicode.org/reports/tr35/#Collation_Elements for details.
 | |
| 	cldrIndex = "\uFDD0"
 | |
| 
 | |
| 	// specialAnchor is the format in which to represent logical reset positions,
 | |
| 	// such as "first tertiary ignorable".
 | |
| 	specialAnchor = "<%s/>"
 | |
| )
 | |
| 
 | |
| // Process parses the rules for the tailorings of this collation
 | |
| // and calls the respective methods of p for each rule found.
 | |
| func (c Collation) Process(p RuleProcessor) (err error) {
 | |
| 	if len(c.Cr) > 0 {
 | |
| 		if len(c.Cr) > 1 {
 | |
| 			return fmt.Errorf("multiple cr elements, want 0 or 1")
 | |
| 		}
 | |
| 		return processRules(p, c.Cr[0].Data())
 | |
| 	}
 | |
| 	if c.Rules.Any != nil {
 | |
| 		return c.processXML(p)
 | |
| 	}
 | |
| 	return errors.New("no tailoring data")
 | |
| }
 | |
| 
 | |
| // processRules parses rules in the Collation Rule Syntax defined in
 | |
| // http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
 | |
| func processRules(p RuleProcessor, s string) (err error) {
 | |
| 	chk := func(s string, e error) string {
 | |
| 		if err == nil {
 | |
| 			err = e
 | |
| 		}
 | |
| 		return s
 | |
| 	}
 | |
| 	i := 0 // Save the line number for use after the loop.
 | |
| 	scanner := bufio.NewScanner(strings.NewReader(s))
 | |
| 	for ; scanner.Scan() && err == nil; i++ {
 | |
| 		for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
 | |
| 			level := 5
 | |
| 			var ch byte
 | |
| 			switch ch, s = s[0], s[1:]; ch {
 | |
| 			case '&': // followed by <anchor> or '[' <key> ']'
 | |
| 				if s = skipSpace(s); consume(&s, '[') {
 | |
| 					s = chk(parseSpecialAnchor(p, s))
 | |
| 				} else {
 | |
| 					s = chk(parseAnchor(p, 0, s))
 | |
| 				}
 | |
| 			case '<': // sort relation '<'{1,4}, optionally followed by '*'.
 | |
| 				for level = 1; consume(&s, '<'); level++ {
 | |
| 				}
 | |
| 				if level > 4 {
 | |
| 					err = fmt.Errorf("level %d > 4", level)
 | |
| 				}
 | |
| 				fallthrough
 | |
| 			case '=': // identity relation, optionally followed by *.
 | |
| 				if consume(&s, '*') {
 | |
| 					s = chk(parseSequence(p, level, s))
 | |
| 				} else {
 | |
| 					s = chk(parseOrder(p, level, s))
 | |
| 				}
 | |
| 			default:
 | |
| 				chk("", fmt.Errorf("illegal operator %q", ch))
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	if chk("", scanner.Err()); err != nil {
 | |
| 		return fmt.Errorf("%d: %v", i, err)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // parseSpecialAnchor parses the anchor syntax which is either of the form
 | |
| //    ['before' <level>] <anchor>
 | |
| // or
 | |
| //    [<label>]
 | |
| // The starting should already be consumed.
 | |
| func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
 | |
| 	i := strings.IndexByte(s, ']')
 | |
| 	if i == -1 {
 | |
| 		return "", errors.New("unmatched bracket")
 | |
| 	}
 | |
| 	a := strings.TrimSpace(s[:i])
 | |
| 	s = s[i+1:]
 | |
| 	if strings.HasPrefix(a, "before ") {
 | |
| 		l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
 | |
| 		if err != nil {
 | |
| 			return s, err
 | |
| 		}
 | |
| 		return parseAnchor(p, int(l), s)
 | |
| 	}
 | |
| 	return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
 | |
| }
 | |
| 
 | |
| func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
 | |
| 	anchor, s, err := scanString(s)
 | |
| 	if err != nil {
 | |
| 		return s, err
 | |
| 	}
 | |
| 	return s, p.Reset(anchor, level)
 | |
| }
 | |
| 
 | |
| func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
 | |
| 	var value, context, extend string
 | |
| 	if value, s, err = scanString(s); err != nil {
 | |
| 		return s, err
 | |
| 	}
 | |
| 	if strings.HasPrefix(value, cldrIndex) {
 | |
| 		p.Index(value[len(cldrIndex):])
 | |
| 		return
 | |
| 	}
 | |
| 	if consume(&s, '|') {
 | |
| 		if context, s, err = scanString(s); err != nil {
 | |
| 			return s, errors.New("missing string after context")
 | |
| 		}
 | |
| 	}
 | |
| 	if consume(&s, '/') {
 | |
| 		if extend, s, err = scanString(s); err != nil {
 | |
| 			return s, errors.New("missing string after extension")
 | |
| 		}
 | |
| 	}
 | |
| 	return s, p.Insert(level, value, context, extend)
 | |
| }
 | |
| 
 | |
| // scanString scans a single input string.
 | |
| func scanString(s string) (str, tail string, err error) {
 | |
| 	if s = skipSpace(s); s == "" {
 | |
| 		return s, s, errors.New("missing string")
 | |
| 	}
 | |
| 	buf := [16]byte{} // small but enough to hold most cases.
 | |
| 	value := buf[:0]
 | |
| 	for s != "" {
 | |
| 		if consume(&s, '\'') {
 | |
| 			i := strings.IndexByte(s, '\'')
 | |
| 			if i == -1 {
 | |
| 				return "", "", errors.New(`unmatched single quote`)
 | |
| 			}
 | |
| 			if i == 0 {
 | |
| 				value = append(value, '\'')
 | |
| 			} else {
 | |
| 				value = append(value, s[:i]...)
 | |
| 			}
 | |
| 			s = s[i+1:]
 | |
| 			continue
 | |
| 		}
 | |
| 		r, sz := utf8.DecodeRuneInString(s)
 | |
| 		if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
 | |
| 			break
 | |
| 		}
 | |
| 		value = append(value, s[:sz]...)
 | |
| 		s = s[sz:]
 | |
| 	}
 | |
| 	return string(value), skipSpace(s), nil
 | |
| }
 | |
| 
 | |
| func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
 | |
| 	if s = skipSpace(s); s == "" {
 | |
| 		return s, errors.New("empty sequence")
 | |
| 	}
 | |
| 	last := rune(0)
 | |
| 	for s != "" {
 | |
| 		r, sz := utf8.DecodeRuneInString(s)
 | |
| 		s = s[sz:]
 | |
| 
 | |
| 		if r == '-' {
 | |
| 			// We have a range. The first element was already written.
 | |
| 			if last == 0 {
 | |
| 				return s, errors.New("range without starter value")
 | |
| 			}
 | |
| 			r, sz = utf8.DecodeRuneInString(s)
 | |
| 			s = s[sz:]
 | |
| 			if r == utf8.RuneError || r < last {
 | |
| 				return s, fmt.Errorf("invalid range %q-%q", last, r)
 | |
| 			}
 | |
| 			for i := last + 1; i <= r; i++ {
 | |
| 				if err := p.Insert(level, string(i), "", ""); err != nil {
 | |
| 					return s, err
 | |
| 				}
 | |
| 			}
 | |
| 			last = 0
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		if unicode.IsSpace(r) || unicode.IsPunct(r) {
 | |
| 			break
 | |
| 		}
 | |
| 
 | |
| 		// normal case
 | |
| 		if err := p.Insert(level, string(r), "", ""); err != nil {
 | |
| 			return s, err
 | |
| 		}
 | |
| 		last = r
 | |
| 	}
 | |
| 	return s, nil
 | |
| }
 | |
| 
 | |
| func skipSpace(s string) string {
 | |
| 	return strings.TrimLeftFunc(s, unicode.IsSpace)
 | |
| }
 | |
| 
 | |
| // consumes returns whether the next byte is ch. If so, it gobbles it by
 | |
| // updating s.
 | |
| func consume(s *string, ch byte) (ok bool) {
 | |
| 	if *s == "" || (*s)[0] != ch {
 | |
| 		return false
 | |
| 	}
 | |
| 	*s = (*s)[1:]
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // The following code parses Collation rules of CLDR version 24 and before.
 | |
| 
 | |
| var lmap = map[byte]int{
 | |
| 	'p': 1,
 | |
| 	's': 2,
 | |
| 	't': 3,
 | |
| 	'i': 5,
 | |
| }
 | |
| 
 | |
| type rulesElem struct {
 | |
| 	Rules struct {
 | |
| 		Common
 | |
| 		Any []*struct {
 | |
| 			XMLName xml.Name
 | |
| 			rule
 | |
| 		} `xml:",any"`
 | |
| 	} `xml:"rules"`
 | |
| }
 | |
| 
 | |
| type rule struct {
 | |
| 	Value  string `xml:",chardata"`
 | |
| 	Before string `xml:"before,attr"`
 | |
| 	Any    []*struct {
 | |
| 		XMLName xml.Name
 | |
| 		rule
 | |
| 	} `xml:",any"`
 | |
| }
 | |
| 
 | |
| var emptyValueError = errors.New("cldr: empty rule value")
 | |
| 
 | |
| func (r *rule) value() (string, error) {
 | |
| 	// Convert hexadecimal Unicode codepoint notation to a string.
 | |
| 	s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
 | |
| 	r.Value = s
 | |
| 	if s == "" {
 | |
| 		if len(r.Any) != 1 {
 | |
| 			return "", emptyValueError
 | |
| 		}
 | |
| 		r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
 | |
| 		r.Any = nil
 | |
| 	} else if len(r.Any) != 0 {
 | |
| 		return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
 | |
| 	}
 | |
| 	return r.Value, nil
 | |
| }
 | |
| 
 | |
| func (r rule) process(p RuleProcessor, name, context, extend string) error {
 | |
| 	v, err := r.value()
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	switch name {
 | |
| 	case "p", "s", "t", "i":
 | |
| 		if strings.HasPrefix(v, cldrIndex) {
 | |
| 			p.Index(v[len(cldrIndex):])
 | |
| 			return nil
 | |
| 		}
 | |
| 		if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	case "pc", "sc", "tc", "ic":
 | |
| 		level := lmap[name[0]]
 | |
| 		for _, s := range v {
 | |
| 			if err := p.Insert(level, string(s), context, extend); err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 	default:
 | |
| 		return fmt.Errorf("cldr: unsupported tag: %q", name)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // processXML parses the format of CLDR versions 24 and older.
 | |
| func (c Collation) processXML(p RuleProcessor) (err error) {
 | |
| 	// Collation is generated and defined in xml.go.
 | |
| 	var v string
 | |
| 	for _, r := range c.Rules.Any {
 | |
| 		switch r.XMLName.Local {
 | |
| 		case "reset":
 | |
| 			level := 0
 | |
| 			switch r.Before {
 | |
| 			case "primary", "1":
 | |
| 				level = 1
 | |
| 			case "secondary", "2":
 | |
| 				level = 2
 | |
| 			case "tertiary", "3":
 | |
| 				level = 3
 | |
| 			case "":
 | |
| 			default:
 | |
| 				return fmt.Errorf("cldr: unknown level %q", r.Before)
 | |
| 			}
 | |
| 			v, err = r.value()
 | |
| 			if err == nil {
 | |
| 				err = p.Reset(v, level)
 | |
| 			}
 | |
| 		case "x":
 | |
| 			var context, extend string
 | |
| 			for _, r1 := range r.Any {
 | |
| 				v, err = r1.value()
 | |
| 				switch r1.XMLName.Local {
 | |
| 				case "context":
 | |
| 					context = v
 | |
| 				case "extend":
 | |
| 					extend = v
 | |
| 				}
 | |
| 			}
 | |
| 			for _, r1 := range r.Any {
 | |
| 				if t := r1.XMLName.Local; t == "context" || t == "extend" {
 | |
| 					continue
 | |
| 				}
 | |
| 				r1.rule.process(p, r1.XMLName.Local, context, extend)
 | |
| 			}
 | |
| 		default:
 | |
| 			err = r.rule.process(p, r.XMLName.Local, "", "")
 | |
| 		}
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	return nil
 | |
| }
 |