githaven/vendor/github.com/dlclark/regexp2/match.go

348 lines
8.6 KiB
Go
Raw Normal View History

package regexp2
import (
"bytes"
"fmt"
)
// Match is a single regex result match that contains groups and repeated captures
// -Groups
// -Capture
type Match struct {
Group //embeded group 0
regex *Regexp
otherGroups []Group
// input to the match
textpos int
textstart int
capcount int
caps []int
sparseCaps map[int]int
// output from the match
matches [][]int
matchcount []int
// whether we've done any balancing with this match. If we
// have done balancing, we'll need to do extra work in Tidy().
balancing bool
}
// Group is an explicit or implit (group 0) matched group within the pattern
type Group struct {
Capture // the last capture of this group is embeded for ease of use
Name string // group name
Captures []Capture // captures of this group
}
// Capture is a single capture of text within the larger original string
type Capture struct {
// the original string
text []rune
// the position in the original string where the first character of
// captured substring was found.
Index int
// the length of the captured substring.
Length int
}
// String returns the captured text as a String
func (c *Capture) String() string {
return string(c.text[c.Index : c.Index+c.Length])
}
// Runes returns the captured text as a rune slice
func (c *Capture) Runes() []rune {
return c.text[c.Index : c.Index+c.Length]
}
func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
m := Match{
regex: regex,
matchcount: make([]int, capcount),
matches: make([][]int, capcount),
textstart: startpos,
balancing: false,
}
m.Name = "0"
m.text = text
m.matches[0] = make([]int, 2)
return &m
}
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
m := newMatch(regex, capcount, text, startpos)
m.sparseCaps = caps
return m
}
func (m *Match) reset(text []rune, textstart int) {
m.text = text
m.textstart = textstart
for i := 0; i < len(m.matchcount); i++ {
m.matchcount[i] = 0
}
m.balancing = false
}
func (m *Match) tidy(textpos int) {
interval := m.matches[0]
m.Index = interval[0]
m.Length = interval[1]
m.textpos = textpos
m.capcount = m.matchcount[0]
//copy our root capture to the list
m.Group.Captures = []Capture{m.Group.Capture}
if m.balancing {
// The idea here is that we want to compact all of our unbalanced captures. To do that we
// use j basically as a count of how many unbalanced captures we have at any given time
// (really j is an index, but j/2 is the count). First we skip past all of the real captures
// until we find a balance captures. Then we check each subsequent entry. If it's a balance
// capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
// it down to the last free position.
for cap := 0; cap < len(m.matchcount); cap++ {
limit := m.matchcount[cap] * 2
matcharray := m.matches[cap]
var i, j int
for i = 0; i < limit; i++ {
if matcharray[i] < 0 {
break
}
}
for j = i; i < limit; i++ {
if matcharray[i] < 0 {
// skip negative values
j--
} else {
// but if we find something positive (an actual capture), copy it back to the last
// unbalanced position.
if i != j {
matcharray[j] = matcharray[i]
}
j++
}
}
m.matchcount[cap] = j / 2
}
m.balancing = false
}
}
// isMatched tells if a group was matched by capnum
func (m *Match) isMatched(cap int) bool {
return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
}
// matchIndex returns the index of the last specified matched group by capnum
func (m *Match) matchIndex(cap int) int {
i := m.matches[cap][m.matchcount[cap]*2-2]
if i >= 0 {
return i
}
return m.matches[cap][-3-i]
}
// matchLength returns the length of the last specified matched group by capnum
func (m *Match) matchLength(cap int) int {
i := m.matches[cap][m.matchcount[cap]*2-1]
if i >= 0 {
return i
}
return m.matches[cap][-3-i]
}
// Nonpublic builder: add a capture to the group specified by "c"
func (m *Match) addMatch(c, start, l int) {
if m.matches[c] == nil {
m.matches[c] = make([]int, 2)
}
capcount := m.matchcount[c]
if capcount*2+2 > len(m.matches[c]) {
oldmatches := m.matches[c]
newmatches := make([]int, capcount*8)
copy(newmatches, oldmatches[:capcount*2])
m.matches[c] = newmatches
}
m.matches[c][capcount*2] = start
m.matches[c][capcount*2+1] = l
m.matchcount[c] = capcount + 1
//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
}
// Nonpublic builder: Add a capture to balance the specified group. This is used by the
// balanced match construct. (?<foo-foo2>...)
//
// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
// However, since we have backtracking, we need to keep track of everything.
func (m *Match) balanceMatch(c int) {
m.balancing = true
// we'll look at the last capture first
capcount := m.matchcount[c]
target := capcount*2 - 2
// first see if it is negative, and therefore is a reference to the next available
// capture group for balancing. If it is, we'll reset target to point to that capture.
if m.matches[c][target] < 0 {
target = -3 - m.matches[c][target]
}
// move back to the previous capture
target -= 2
// if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
if target >= 0 && m.matches[c][target] < 0 {
m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
} else {
m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
}
}
// Nonpublic builder: removes a group match by capnum
func (m *Match) removeMatch(c int) {
m.matchcount[c]--
}
// GroupCount returns the number of groups this match has matched
func (m *Match) GroupCount() int {
return len(m.matchcount)
}
// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
func (m *Match) GroupByName(name string) *Group {
num := m.regex.GroupNumberFromName(name)
if num < 0 {
return nil
}
return m.GroupByNumber(num)
}
// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
func (m *Match) GroupByNumber(num int) *Group {
// check our sparse map
if m.sparseCaps != nil {
if newNum, ok := m.sparseCaps[num]; ok {
num = newNum
}
}
if num >= len(m.matchcount) || num < 0 {
return nil
}
if num == 0 {
return &m.Group
}
m.populateOtherGroups()
return &m.otherGroups[num-1]
}
// Groups returns all the capture groups, starting with group 0 (the full match)
func (m *Match) Groups() []Group {
m.populateOtherGroups()
g := make([]Group, len(m.otherGroups)+1)
g[0] = m.Group
copy(g[1:], m.otherGroups)
return g
}
func (m *Match) populateOtherGroups() {
// Construct all the Group objects first time called
if m.otherGroups == nil {
m.otherGroups = make([]Group, len(m.matchcount)-1)
for i := 0; i < len(m.otherGroups); i++ {
m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
}
}
}
func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
c := m.matchcount[groupnum]
if c == 0 {
return
}
matches := m.matches[groupnum]
index := matches[(c-1)*2]
last := index + matches[(c*2)-1]
for ; index < last; index++ {
buf.WriteRune(m.text[index])
}
}
func newGroup(name string, text []rune, caps []int, capcount int) Group {
g := Group{}
g.text = text
if capcount > 0 {
g.Index = caps[(capcount-1)*2]
g.Length = caps[(capcount*2)-1]
}
g.Name = name
g.Captures = make([]Capture, capcount)
for i := 0; i < capcount; i++ {
g.Captures[i] = Capture{
text: text,
Index: caps[i*2],
Length: caps[i*2+1],
}
}
//log.Printf("newGroup! capcount %v, %+v", capcount, g)
return g
}
func (m *Match) dump() string {
buf := &bytes.Buffer{}
buf.WriteRune('\n')
if len(m.sparseCaps) > 0 {
for k, v := range m.sparseCaps {
fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
}
}
for i, g := range m.Groups() {
fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
for _, c := range g.Captures {
fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
}
}
/*
for i := 0; i < len(m.matchcount); i++ {
fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
for j := 0; j < m.matchcount[i]; j++ {
text := ""
if m.matches[i][j*2] >= 0 {
start := m.matches[i][j*2]
text = m.text[start : start+m.matches[i][j*2+1]]
}
fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
}
}
*/
return buf.String()
}