mark2web/vendor/github.com/dlclark/regexp2/runner.go

1635 lines
35 KiB
Go
Raw Permalink Normal View History

2022-02-28 10:28:34 +01:00
package regexp2
import (
"bytes"
"errors"
"fmt"
"math"
"strconv"
"strings"
"time"
"unicode"
"github.com/dlclark/regexp2/syntax"
)
type runner struct {
re *Regexp
code *syntax.Code
runtextstart int // starting point for search
runtext []rune // text to search
runtextpos int // current position in text
runtextend int
// The backtracking stack. Opcodes use this to store data regarding
// what they have matched and where to backtrack to. Each "frame" on
// the stack takes the form of [CodePosition Data1 Data2...], where
// CodePosition is the position of the current opcode and
// the data values are all optional. The CodePosition can be negative, and
// these values (also called "back2") are used by the BranchMark family of opcodes
// to indicate whether they are backtracking after a successful or failed
// match.
// When we backtrack, we pop the CodePosition off the stack, set the current
// instruction pointer to that code position, and mark the opcode
// with a backtracking flag ("Back"). Each opcode then knows how to
// handle its own data.
runtrack []int
runtrackpos int
// This stack is used to track text positions across different opcodes.
// For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
// pair. SetMark records the text position before we match a*b. Then
// CaptureMark uses that position to figure out where the capture starts.
// Opcodes which push onto this stack are always paired with other opcodes
// which will pop the value from it later. A successful match should mean
// that this stack is empty.
runstack []int
runstackpos int
// The crawl stack is used to keep track of captures. Every time a group
// has a capture, we push its group number onto the runcrawl stack. In
// the case of a balanced match, we push BOTH groups onto the stack.
runcrawl []int
runcrawlpos int
runtrackcount int // count of states that may do backtracking
runmatch *Match // result object
ignoreTimeout bool
timeout time.Duration // timeout in milliseconds (needed for actual)
timeoutChecksToSkip int
timeoutAt time.Time
operator syntax.InstOp
codepos int
rightToLeft bool
caseInsensitive bool
}
// run searches for matches and can continue from the previous match
//
// quick is usually false, but can be true to not return matches, just put it in caches
// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
// input is the string to search for our regex pattern
func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
// get a cached runner
runner := re.getRunner()
defer re.putRunner(runner)
if textstart < 0 {
if re.RightToLeft() {
textstart = len(input)
} else {
textstart = 0
}
}
return runner.scan(input, textstart, quick, re.MatchTimeout)
}
// Scans the string to find the first match. Uses the Match object
// both to feed text in and as a place to store matches that come out.
//
// All the action is in the Go() method. Our
// responsibility is to load up the class members before
// calling Go.
//
// The optimizer can compute a set of candidate starting characters,
// and we could use a separate method Skip() that will quickly scan past
// any characters that we know can't match.
func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
r.timeout = timeout
r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
r.runtextstart = textstart
r.runtext = rt
r.runtextend = len(rt)
stoppos := r.runtextend
bump := 1
if r.re.RightToLeft() {
bump = -1
stoppos = 0
}
r.runtextpos = textstart
initted := false
r.startTimeoutWatch()
for {
if r.re.Debug() {
//fmt.Printf("\nSearch content: %v\n", string(r.runtext))
fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
}
if r.findFirstChar() {
if err := r.checkTimeout(); err != nil {
return nil, err
}
if !initted {
r.initMatch()
initted = true
}
if r.re.Debug() {
fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
}
if err := r.execute(); err != nil {
return nil, err
}
if r.runmatch.matchcount[0] > 0 {
// We'll return a match even if it touches a previous empty match
return r.tidyMatch(quick), nil
}
// reset state for another go
r.runtrackpos = len(r.runtrack)
r.runstackpos = len(r.runstack)
r.runcrawlpos = len(r.runcrawl)
}
// failure!
if r.runtextpos == stoppos {
r.tidyMatch(true)
return nil, nil
}
// Recognize leading []* and various anchors, and bump on failure accordingly
// r.bump by one and start again
r.runtextpos += bump
}
// We never get here
}
func (r *runner) execute() error {
r.goTo(0)
for {
if r.re.Debug() {
r.dumpState()
}
if err := r.checkTimeout(); err != nil {
return err
}
switch r.operator {
case syntax.Stop:
return nil
case syntax.Nothing:
break
case syntax.Goto:
r.goTo(r.operand(0))
continue
case syntax.Testref:
if !r.runmatch.isMatched(r.operand(0)) {
break
}
r.advance(1)
continue
case syntax.Lazybranch:
r.trackPush1(r.textPos())
r.advance(1)
continue
case syntax.Lazybranch | syntax.Back:
r.trackPop()
r.textto(r.trackPeek())
r.goTo(r.operand(0))
continue
case syntax.Setmark:
r.stackPush(r.textPos())
r.trackPush()
r.advance(0)
continue
case syntax.Nullmark:
r.stackPush(-1)
r.trackPush()
r.advance(0)
continue
case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
r.stackPop()
break
case syntax.Getmark:
r.stackPop()
r.trackPush1(r.stackPeek())
r.textto(r.stackPeek())
r.advance(0)
continue
case syntax.Getmark | syntax.Back:
r.trackPop()
r.stackPush(r.trackPeek())
break
case syntax.Capturemark:
if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
break
}
r.stackPop()
if r.operand(1) != -1 {
r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
} else {
r.capture(r.operand(0), r.stackPeek(), r.textPos())
}
r.trackPush1(r.stackPeek())
r.advance(2)
continue
case syntax.Capturemark | syntax.Back:
r.trackPop()
r.stackPush(r.trackPeek())
r.uncapture()
if r.operand(0) != -1 && r.operand(1) != -1 {
r.uncapture()
}
break
case syntax.Branchmark:
r.stackPop()
matched := r.textPos() - r.stackPeek()
if matched != 0 { // Nonempty match -> loop now
r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
r.stackPush(r.textPos()) // Make new mark
r.goTo(r.operand(0)) // Loop
} else { // Empty match -> straight now
r.trackPushNeg1(r.stackPeek()) // Save old mark
r.advance(1) // Straight
}
continue
case syntax.Branchmark | syntax.Back:
r.trackPopN(2)
r.stackPop()
r.textto(r.trackPeekN(1)) // Recall position
r.trackPushNeg1(r.trackPeek()) // Save old mark
r.advance(1) // Straight
continue
case syntax.Branchmark | syntax.Back2:
r.trackPop()
r.stackPush(r.trackPeek()) // Recall old mark
break // Backtrack
case syntax.Lazybranchmark:
{
// We hit this the first time through a lazy loop and after each
// successful match of the inner expression. It simply continues
// on and doesn't loop.
r.stackPop()
oldMarkPos := r.stackPeek()
if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
if oldMarkPos != -1 {
r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
} else {
r.trackPush2(r.textPos(), r.textPos())
}
} else {
// The inner expression found an empty match, so we'll go directly to 'back2' if we
// backtrack. In this case, we need to push something on the stack, since back2 pops.
// However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
// position associated with that empty match.
r.stackPush(oldMarkPos)
r.trackPushNeg1(r.stackPeek()) // Save old mark
}
r.advance(1)
continue
}
case syntax.Lazybranchmark | syntax.Back:
// After the first time, Lazybranchmark | syntax.Back occurs
// with each iteration of the loop, and therefore with every attempted
// match of the inner expression. We'll try to match the inner expression,
// then go back to Lazybranchmark if successful. If the inner expression
// fails, we go to Lazybranchmark | syntax.Back2
r.trackPopN(2)
pos := r.trackPeekN(1)
r.trackPushNeg1(r.trackPeek()) // Save old mark
r.stackPush(pos) // Make new mark
r.textto(pos) // Recall position
r.goTo(r.operand(0)) // Loop
continue
case syntax.Lazybranchmark | syntax.Back2:
// The lazy loop has failed. We'll do a true backtrack and
// start over before the lazy loop.
r.stackPop()
r.trackPop()
r.stackPush(r.trackPeek()) // Recall old mark
break
case syntax.Setcount:
r.stackPush2(r.textPos(), r.operand(0))
r.trackPush()
r.advance(1)
continue
case syntax.Nullcount:
r.stackPush2(-1, r.operand(0))
r.trackPush()
r.advance(1)
continue
case syntax.Setcount | syntax.Back:
r.stackPopN(2)
break
case syntax.Nullcount | syntax.Back:
r.stackPopN(2)
break
case syntax.Branchcount:
// r.stackPush:
// 0: Mark
// 1: Count
r.stackPopN(2)
mark := r.stackPeek()
count := r.stackPeekN(1)
matched := r.textPos() - mark
if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
r.trackPushNeg2(mark, count) // Save old mark, count
r.advance(2) // Straight
} else { // Nonempty match -> count+loop now
r.trackPush1(mark) // remember mark
r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
r.goTo(r.operand(0)) // Loop
}
continue
case syntax.Branchcount | syntax.Back:
// r.trackPush:
// 0: Previous mark
// r.stackPush:
// 0: Mark (= current pos, discarded)
// 1: Count
r.trackPop()
r.stackPopN(2)
if r.stackPeekN(1) > 0 { // Positive -> can go straight
r.textto(r.stackPeek()) // Zap to mark
r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
r.advance(2) // Straight
continue
}
r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
break
case syntax.Branchcount | syntax.Back2:
// r.trackPush:
// 0: Previous mark
// 1: Previous count
r.trackPopN(2)
r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
break // Backtrack
case syntax.Lazybranchcount:
// r.stackPush:
// 0: Mark
// 1: Count
r.stackPopN(2)
mark := r.stackPeek()
count := r.stackPeekN(1)
if count < 0 { // Negative count -> loop now
r.trackPushNeg1(mark) // Save old mark
r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
r.goTo(r.operand(0)) // Loop
} else { // Nonneg count -> straight now
r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
r.advance(2) // Straight
}
continue
case syntax.Lazybranchcount | syntax.Back:
// r.trackPush:
// 0: Mark
// 1: Count
// 2: r.textPos
r.trackPopN(3)
mark := r.trackPeek()
textpos := r.trackPeekN(2)
if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
r.textto(textpos) // Recall position
r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
r.trackPushNeg1(mark) // Save old mark
r.goTo(r.operand(0)) // Loop
continue
} else { // Max loops or empty match -> backtrack
r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
break // backtrack
}
case syntax.Lazybranchcount | syntax.Back2:
// r.trackPush:
// 0: Previous mark
// r.stackPush:
// 0: Mark (== current pos, discarded)
// 1: Count
r.trackPop()
r.stackPopN(2)
r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
break // Backtrack
case syntax.Setjump:
r.stackPush2(r.trackpos(), r.crawlpos())
r.trackPush()
r.advance(0)
continue
case syntax.Setjump | syntax.Back:
r.stackPopN(2)
break
case syntax.Backjump:
// r.stackPush:
// 0: Saved trackpos
// 1: r.crawlpos
r.stackPopN(2)
r.trackto(r.stackPeek())
for r.crawlpos() != r.stackPeekN(1) {
r.uncapture()
}
break
case syntax.Forejump:
// r.stackPush:
// 0: Saved trackpos
// 1: r.crawlpos
r.stackPopN(2)
r.trackto(r.stackPeek())
r.trackPush1(r.stackPeekN(1))
r.advance(0)
continue
case syntax.Forejump | syntax.Back:
// r.trackPush:
// 0: r.crawlpos
r.trackPop()
for r.crawlpos() != r.trackPeek() {
r.uncapture()
}
break
case syntax.Bol:
if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
break
}
r.advance(0)
continue
case syntax.Eol:
if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
break
}
r.advance(0)
continue
case syntax.Boundary:
if !r.isBoundary(r.textPos(), 0, r.runtextend) {
break
}
r.advance(0)
continue
case syntax.Nonboundary:
if r.isBoundary(r.textPos(), 0, r.runtextend) {
break
}
r.advance(0)
continue
case syntax.ECMABoundary:
if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
break
}
r.advance(0)
continue
case syntax.NonECMABoundary:
if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
break
}
r.advance(0)
continue
case syntax.Beginning:
if r.leftchars() > 0 {
break
}
r.advance(0)
continue
case syntax.Start:
if r.textPos() != r.textstart() {
break
}
r.advance(0)
continue
case syntax.EndZ:
rchars := r.rightchars()
if rchars > 1 {
break
}
// RE2 and EcmaScript define $ as "asserts position at the end of the string"
// PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
if (r.re.options & (RE2 | ECMAScript)) != 0 {
// RE2/Ecmascript mode
if rchars > 0 {
break
}
} else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
// "regular" mode
break
}
r.advance(0)
continue
case syntax.End:
if r.rightchars() > 0 {
break
}
r.advance(0)
continue
case syntax.One:
if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
break
}
r.advance(1)
continue
case syntax.Notone:
if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
break
}
r.advance(1)
continue
case syntax.Set:
if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
break
}
r.advance(1)
continue
case syntax.Multi:
if !r.runematch(r.code.Strings[r.operand(0)]) {
break
}
r.advance(1)
continue
case syntax.Ref:
capnum := r.operand(0)
if r.runmatch.isMatched(capnum) {
if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
break
}
} else {
if (r.re.options & ECMAScript) == 0 {
break
}
}
r.advance(1)
continue
case syntax.Onerep:
c := r.operand(1)
if r.forwardchars() < c {
break
}
ch := rune(r.operand(0))
for c > 0 {
if r.forwardcharnext() != ch {
goto BreakBackward
}
c--
}
r.advance(2)
continue
case syntax.Notonerep:
c := r.operand(1)
if r.forwardchars() < c {
break
}
ch := rune(r.operand(0))
for c > 0 {
if r.forwardcharnext() == ch {
goto BreakBackward
}
c--
}
r.advance(2)
continue
case syntax.Setrep:
c := r.operand(1)
if r.forwardchars() < c {
break
}
set := r.code.Sets[r.operand(0)]
for c > 0 {
if !set.CharIn(r.forwardcharnext()) {
goto BreakBackward
}
c--
}
r.advance(2)
continue
case syntax.Oneloop:
c := r.operand(1)
if c > r.forwardchars() {
c = r.forwardchars()
}
ch := rune(r.operand(0))
i := c
for ; i > 0; i-- {
if r.forwardcharnext() != ch {
r.backwardnext()
break
}
}
if c > i {
r.trackPush2(c-i-1, r.textPos()-r.bump())
}
r.advance(2)
continue
case syntax.Notoneloop:
c := r.operand(1)
if c > r.forwardchars() {
c = r.forwardchars()
}
ch := rune(r.operand(0))
i := c
for ; i > 0; i-- {
if r.forwardcharnext() == ch {
r.backwardnext()
break
}
}
if c > i {
r.trackPush2(c-i-1, r.textPos()-r.bump())
}
r.advance(2)
continue
case syntax.Setloop:
c := r.operand(1)
if c > r.forwardchars() {
c = r.forwardchars()
}
set := r.code.Sets[r.operand(0)]
i := c
for ; i > 0; i-- {
if !set.CharIn(r.forwardcharnext()) {
r.backwardnext()
break
}
}
if c > i {
r.trackPush2(c-i-1, r.textPos()-r.bump())
}
r.advance(2)
continue
case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
r.trackPopN(2)
i := r.trackPeek()
pos := r.trackPeekN(1)
r.textto(pos)
if i > 0 {
r.trackPush2(i-1, pos-r.bump())
}
r.advance(2)
continue
case syntax.Setloop | syntax.Back:
r.trackPopN(2)
i := r.trackPeek()
pos := r.trackPeekN(1)
r.textto(pos)
if i > 0 {
r.trackPush2(i-1, pos-r.bump())
}
r.advance(2)
continue
case syntax.Onelazy, syntax.Notonelazy:
c := r.operand(1)
if c > r.forwardchars() {
c = r.forwardchars()
}
if c > 0 {
r.trackPush2(c-1, r.textPos())
}
r.advance(2)
continue
case syntax.Setlazy:
c := r.operand(1)
if c > r.forwardchars() {
c = r.forwardchars()
}
if c > 0 {
r.trackPush2(c-1, r.textPos())
}
r.advance(2)
continue
case syntax.Onelazy | syntax.Back:
r.trackPopN(2)
pos := r.trackPeekN(1)
r.textto(pos)
if r.forwardcharnext() != rune(r.operand(0)) {
break
}
i := r.trackPeek()
if i > 0 {
r.trackPush2(i-1, pos+r.bump())
}
r.advance(2)
continue
case syntax.Notonelazy | syntax.Back:
r.trackPopN(2)
pos := r.trackPeekN(1)
r.textto(pos)
if r.forwardcharnext() == rune(r.operand(0)) {
break
}
i := r.trackPeek()
if i > 0 {
r.trackPush2(i-1, pos+r.bump())
}
r.advance(2)
continue
case syntax.Setlazy | syntax.Back:
r.trackPopN(2)
pos := r.trackPeekN(1)
r.textto(pos)
if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
break
}
i := r.trackPeek()
if i > 0 {
r.trackPush2(i-1, pos+r.bump())
}
r.advance(2)
continue
default:
return errors.New("unknown state in regex runner")
}
BreakBackward:
;
// "break Backward" comes here:
r.backtrack()
}
}
// increase the size of stack and track storage
func (r *runner) ensureStorage() {
if r.runstackpos < r.runtrackcount*4 {
doubleIntSlice(&r.runstack, &r.runstackpos)
}
if r.runtrackpos < r.runtrackcount*4 {
doubleIntSlice(&r.runtrack, &r.runtrackpos)
}
}
func doubleIntSlice(s *[]int, pos *int) {
oldLen := len(*s)
newS := make([]int, oldLen*2)
copy(newS[oldLen:], *s)
*pos += oldLen
*s = newS
}
// Save a number on the longjump unrolling stack
func (r *runner) crawl(i int) {
if r.runcrawlpos == 0 {
doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
}
r.runcrawlpos--
r.runcrawl[r.runcrawlpos] = i
}
// Remove a number from the longjump unrolling stack
func (r *runner) popcrawl() int {
val := r.runcrawl[r.runcrawlpos]
r.runcrawlpos++
return val
}
// Get the height of the stack
func (r *runner) crawlpos() int {
return len(r.runcrawl) - r.runcrawlpos
}
func (r *runner) advance(i int) {
r.codepos += (i + 1)
r.setOperator(r.code.Codes[r.codepos])
}
func (r *runner) goTo(newpos int) {
// when branching backward or in place, ensure storage
if newpos <= r.codepos {
r.ensureStorage()
}
r.setOperator(r.code.Codes[newpos])
r.codepos = newpos
}
func (r *runner) textto(newpos int) {
r.runtextpos = newpos
}
func (r *runner) trackto(newpos int) {
r.runtrackpos = len(r.runtrack) - newpos
}
func (r *runner) textstart() int {
return r.runtextstart
}
func (r *runner) textPos() int {
return r.runtextpos
}
// push onto the backtracking stack
func (r *runner) trackpos() int {
return len(r.runtrack) - r.runtrackpos
}
func (r *runner) trackPush() {
r.runtrackpos--
r.runtrack[r.runtrackpos] = r.codepos
}
func (r *runner) trackPush1(I1 int) {
r.runtrackpos--
r.runtrack[r.runtrackpos] = I1
r.runtrackpos--
r.runtrack[r.runtrackpos] = r.codepos
}
func (r *runner) trackPush2(I1, I2 int) {
r.runtrackpos--
r.runtrack[r.runtrackpos] = I1
r.runtrackpos--
r.runtrack[r.runtrackpos] = I2
r.runtrackpos--
r.runtrack[r.runtrackpos] = r.codepos
}
func (r *runner) trackPush3(I1, I2, I3 int) {
r.runtrackpos--
r.runtrack[r.runtrackpos] = I1
r.runtrackpos--
r.runtrack[r.runtrackpos] = I2
r.runtrackpos--
r.runtrack[r.runtrackpos] = I3
r.runtrackpos--
r.runtrack[r.runtrackpos] = r.codepos
}
func (r *runner) trackPushNeg1(I1 int) {
r.runtrackpos--
r.runtrack[r.runtrackpos] = I1
r.runtrackpos--
r.runtrack[r.runtrackpos] = -r.codepos
}
func (r *runner) trackPushNeg2(I1, I2 int) {
r.runtrackpos--
r.runtrack[r.runtrackpos] = I1
r.runtrackpos--
r.runtrack[r.runtrackpos] = I2
r.runtrackpos--
r.runtrack[r.runtrackpos] = -r.codepos
}
func (r *runner) backtrack() {
newpos := r.runtrack[r.runtrackpos]
r.runtrackpos++
if r.re.Debug() {
if newpos < 0 {
fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
} else {
fmt.Printf(" Backtracking to code position %v\n", newpos)
}
}
if newpos < 0 {
newpos = -newpos
r.setOperator(r.code.Codes[newpos] | syntax.Back2)
} else {
r.setOperator(r.code.Codes[newpos] | syntax.Back)
}
// When branching backward, ensure storage
if newpos < r.codepos {
r.ensureStorage()
}
r.codepos = newpos
}
func (r *runner) setOperator(op int) {
r.caseInsensitive = (0 != (op & syntax.Ci))
r.rightToLeft = (0 != (op & syntax.Rtl))
r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
}
func (r *runner) trackPop() {
r.runtrackpos++
}
// pop framesize items from the backtracking stack
func (r *runner) trackPopN(framesize int) {
r.runtrackpos += framesize
}
// Technically we are actually peeking at items already popped. So if you want to
// get and pop the top item from the stack, you do
// r.trackPop();
// r.trackPeek();
func (r *runner) trackPeek() int {
return r.runtrack[r.runtrackpos-1]
}
// get the ith element down on the backtracking stack
func (r *runner) trackPeekN(i int) int {
return r.runtrack[r.runtrackpos-i-1]
}
// Push onto the grouping stack
func (r *runner) stackPush(I1 int) {
r.runstackpos--
r.runstack[r.runstackpos] = I1
}
func (r *runner) stackPush2(I1, I2 int) {
r.runstackpos--
r.runstack[r.runstackpos] = I1
r.runstackpos--
r.runstack[r.runstackpos] = I2
}
func (r *runner) stackPop() {
r.runstackpos++
}
// pop framesize items from the grouping stack
func (r *runner) stackPopN(framesize int) {
r.runstackpos += framesize
}
// Technically we are actually peeking at items already popped. So if you want to
// get and pop the top item from the stack, you do
// r.stackPop();
// r.stackPeek();
func (r *runner) stackPeek() int {
return r.runstack[r.runstackpos-1]
}
// get the ith element down on the grouping stack
func (r *runner) stackPeekN(i int) int {
return r.runstack[r.runstackpos-i-1]
}
func (r *runner) operand(i int) int {
return r.code.Codes[r.codepos+i+1]
}
func (r *runner) leftchars() int {
return r.runtextpos
}
func (r *runner) rightchars() int {
return r.runtextend - r.runtextpos
}
func (r *runner) bump() int {
if r.rightToLeft {
return -1
}
return 1
}
func (r *runner) forwardchars() int {
if r.rightToLeft {
return r.runtextpos
}
return r.runtextend - r.runtextpos
}
func (r *runner) forwardcharnext() rune {
var ch rune
if r.rightToLeft {
r.runtextpos--
ch = r.runtext[r.runtextpos]
} else {
ch = r.runtext[r.runtextpos]
r.runtextpos++
}
if r.caseInsensitive {
return unicode.ToLower(ch)
}
return ch
}
func (r *runner) runematch(str []rune) bool {
var pos int
c := len(str)
if !r.rightToLeft {
if r.runtextend-r.runtextpos < c {
return false
}
pos = r.runtextpos + c
} else {
if r.runtextpos-0 < c {
return false
}
pos = r.runtextpos
}
if !r.caseInsensitive {
for c != 0 {
c--
pos--
if str[c] != r.runtext[pos] {
return false
}
}
} else {
for c != 0 {
c--
pos--
if str[c] != unicode.ToLower(r.runtext[pos]) {
return false
}
}
}
if !r.rightToLeft {
pos += len(str)
}
r.runtextpos = pos
return true
}
func (r *runner) refmatch(index, len int) bool {
var c, pos, cmpos int
if !r.rightToLeft {
if r.runtextend-r.runtextpos < len {
return false
}
pos = r.runtextpos + len
} else {
if r.runtextpos-0 < len {
return false
}
pos = r.runtextpos
}
cmpos = index + len
c = len
if !r.caseInsensitive {
for c != 0 {
c--
cmpos--
pos--
if r.runtext[cmpos] != r.runtext[pos] {
return false
}
}
} else {
for c != 0 {
c--
cmpos--
pos--
if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
return false
}
}
}
if !r.rightToLeft {
pos += len
}
r.runtextpos = pos
return true
}
func (r *runner) backwardnext() {
if r.rightToLeft {
r.runtextpos++
} else {
r.runtextpos--
}
}
func (r *runner) charAt(j int) rune {
return r.runtext[j]
}
func (r *runner) findFirstChar() bool {
if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
if !r.code.RightToLeft {
if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
r.runtextpos = r.runtextend
return false
}
if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
r.runtextpos = r.runtextend - 1
} else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
r.runtextpos = r.runtextend
}
} else {
if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
(0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
(r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
r.runtextpos = 0
return false
}
if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
r.runtextpos = 0
}
}
if r.code.BmPrefix != nil {
return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
}
return true // found a valid start or end anchor
} else if r.code.BmPrefix != nil {
r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
if r.runtextpos == -1 {
if r.code.RightToLeft {
r.runtextpos = 0
} else {
r.runtextpos = r.runtextend
}
return false
}
return true
} else if r.code.FcPrefix == nil {
return true
}
r.rightToLeft = r.code.RightToLeft
r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
set := r.code.FcPrefix.PrefixSet
if set.IsSingleton() {
ch := set.SingletonChar()
for i := r.forwardchars(); i > 0; i-- {
if ch == r.forwardcharnext() {
r.backwardnext()
return true
}
}
} else {
for i := r.forwardchars(); i > 0; i-- {
n := r.forwardcharnext()
//fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
if set.CharIn(n) {
r.backwardnext()
return true
}
}
}
return false
}
func (r *runner) initMatch() {
// Use a hashtable'ed Match object if the capture numbers are sparse
if r.runmatch == nil {
if r.re.caps != nil {
r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
} else {
r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
}
} else {
r.runmatch.reset(r.runtext, r.runtextstart)
}
// note we test runcrawl, because it is the last one to be allocated
// If there is an alloc failure in the middle of the three allocations,
// we may still return to reuse this instance, and we want to behave
// as if the allocations didn't occur. (we used to test _trackcount != 0)
if r.runcrawl != nil {
r.runtrackpos = len(r.runtrack)
r.runstackpos = len(r.runstack)
r.runcrawlpos = len(r.runcrawl)
return
}
r.initTrackCount()
tracksize := r.runtrackcount * 8
stacksize := r.runtrackcount * 8
if tracksize < 32 {
tracksize = 32
}
if stacksize < 16 {
stacksize = 16
}
r.runtrack = make([]int, tracksize)
r.runtrackpos = tracksize
r.runstack = make([]int, stacksize)
r.runstackpos = stacksize
r.runcrawl = make([]int, 32)
r.runcrawlpos = 32
}
func (r *runner) tidyMatch(quick bool) *Match {
if !quick {
match := r.runmatch
r.runmatch = nil
match.tidy(r.runtextpos)
return match
} else {
// send back our match -- it's not leaving the package, so it's safe to not clean it up
// this reduces allocs for frequent calls to the "IsMatch" bool-only functions
return r.runmatch
}
}
// capture captures a subexpression. Note that the
// capnum used here has already been mapped to a non-sparse
// index (by the code generator RegexWriter).
func (r *runner) capture(capnum, start, end int) {
if end < start {
T := end
end = start
start = T
}
r.crawl(capnum)
r.runmatch.addMatch(capnum, start, end-start)
}
// transferCapture captures a subexpression. Note that the
// capnum used here has already been mapped to a non-sparse
// index (by the code generator RegexWriter).
func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
var start2, end2 int
// these are the two intervals that are cancelling each other
if end < start {
T := end
end = start
start = T
}
start2 = r.runmatch.matchIndex(uncapnum)
end2 = start2 + r.runmatch.matchLength(uncapnum)
// The new capture gets the innermost defined interval
if start >= end2 {
end = start
start = end2
} else if end <= start2 {
start = start2
} else {
if end > end2 {
end = end2
}
if start2 > start {
start = start2
}
}
r.crawl(uncapnum)
r.runmatch.balanceMatch(uncapnum)
if capnum != -1 {
r.crawl(capnum)
r.runmatch.addMatch(capnum, start, end-start)
}
}
// revert the last capture
func (r *runner) uncapture() {
capnum := r.popcrawl()
r.runmatch.removeMatch(capnum)
}
//debug
func (r *runner) dumpState() {
back := ""
if r.operator&syntax.Back != 0 {
back = " Back"
}
if r.operator&syntax.Back2 != 0 {
back += " Back2"
}
fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
r.textposDescription(),
r.stackDescription(r.runtrack, r.runtrackpos),
r.stackDescription(r.runstack, r.runstackpos),
r.code.OpcodeDescription(r.codepos),
back)
}
func (r *runner) stackDescription(a []int, index int) string {
buf := &bytes.Buffer{}
fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
if buf.Len() < 8 {
buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
}
buf.WriteRune('(')
for i := index; i < len(a); i++ {
if i > index {
buf.WriteRune(' ')
}
buf.WriteString(strconv.Itoa(a[i]))
}
buf.WriteRune(')')
return buf.String()
}
func (r *runner) textposDescription() string {
buf := &bytes.Buffer{}
buf.WriteString(strconv.Itoa(r.runtextpos))
if buf.Len() < 8 {
buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
}
if r.runtextpos > 0 {
buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
} else {
buf.WriteRune('^')
}
buf.WriteRune('>')
for i := r.runtextpos; i < r.runtextend; i++ {
buf.WriteString(syntax.CharDescription(r.runtext[i]))
}
if buf.Len() >= 64 {
buf.Truncate(61)
buf.WriteString("...")
} else {
buf.WriteRune('$')
}
return buf.String()
}
// decide whether the pos
// at the specified index is a boundary or not. It's just not worth
// emitting inline code for this logic.
func (r *runner) isBoundary(index, startpos, endpos int) bool {
return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
(index < endpos && syntax.IsWordChar(r.runtext[index]))
}
func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
(index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
}
// this seems like a comment to justify randomly picking 1000 :-P
// We have determined this value in a series of experiments where x86 retail
// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
// of TimeoutCheckFrequency did not tend to increase performance; smaller values
// of TimeoutCheckFrequency tended to slow down the execution.
const timeoutCheckFrequency int = 1000
func (r *runner) startTimeoutWatch() {
if r.ignoreTimeout {
return
}
r.timeoutChecksToSkip = timeoutCheckFrequency
r.timeoutAt = time.Now().Add(r.timeout)
}
func (r *runner) checkTimeout() error {
if r.ignoreTimeout {
return nil
}
r.timeoutChecksToSkip--
if r.timeoutChecksToSkip != 0 {
return nil
}
r.timeoutChecksToSkip = timeoutCheckFrequency
return r.doCheckTimeout()
}
func (r *runner) doCheckTimeout() error {
current := time.Now()
if current.Before(r.timeoutAt) {
return nil
}
if r.re.Debug() {
//Debug.WriteLine("")
//Debug.WriteLine("RegEx match timeout occurred!")
//Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
//Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
//Debug.WriteLine("Search pattern: " + _runregex._pattern)
//Debug.WriteLine("Input: " + r.runtext)
//Debug.WriteLine("About to throw RegexMatchTimeoutException.")
}
return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
}
func (r *runner) initTrackCount() {
r.runtrackcount = r.code.TrackCount
}
// getRunner returns a run to use for matching re.
// It uses the re's runner cache if possible, to avoid
// unnecessary allocation.
func (re *Regexp) getRunner() *runner {
re.muRun.Lock()
if n := len(re.runner); n > 0 {
z := re.runner[n-1]
re.runner = re.runner[:n-1]
re.muRun.Unlock()
return z
}
re.muRun.Unlock()
z := &runner{
re: re,
code: re.code,
}
return z
}
// putRunner returns a runner to the re's cache.
// There is no attempt to limit the size of the cache, so it will
// grow to the maximum number of simultaneous matches
// run using re. (The cache empties when re gets garbage collected.)
func (re *Regexp) putRunner(r *runner) {
re.muRun.Lock()
re.runner = append(re.runner, r)
re.muRun.Unlock()
}