godog/gherkin/matcher.go

package gherkin

import (
	"regexp"
	"strings"
	"unicode/utf8"
)

const (
	DEFAULT_DIALECT                 = "en"
	COMMENT_PREFIX                  = "#"
	TAG_PREFIX                      = "@"
	TITLE_KEYWORD_SEPARATOR         = ":"
	TABLE_CELL_SEPARATOR            = '|'
	ESCAPE_CHAR                     = '\\'
	ESCAPED_NEWLINE                 = 'n'
	DOCSTRING_SEPARATOR             = "\"\"\""
	DOCSTRING_ALTERNATIVE_SEPARATOR = "```"
)

type matcher struct {
	gdp                      GherkinDialectProvider
	default_lang             string
	lang                     string
	dialect                  *GherkinDialect
	activeDocStringSeparator string
	indentToRemove           int
	languagePattern          *regexp.Regexp
}

func NewMatcher(gdp GherkinDialectProvider) Matcher {
	return &matcher{
		gdp:             gdp,
		default_lang:    DEFAULT_DIALECT,
		lang:            DEFAULT_DIALECT,
		dialect:         gdp.GetDialect(DEFAULT_DIALECT),
		languagePattern: regexp.MustCompile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)\\s*$"),
	}
}

func NewLanguageMatcher(gdp GherkinDialectProvider, language string) Matcher {
	return &matcher{
		gdp:             gdp,
		default_lang:    language,
		lang:            language,
		dialect:         gdp.GetDialect(language),
		languagePattern: regexp.MustCompile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)\\s*$"),
	}
}

func (m *matcher) Reset() {
	m.indentToRemove = 0
	m.activeDocStringSeparator = ""
	if m.lang != "en" {
		m.dialect = m.gdp.GetDialect(m.default_lang)
		m.lang = "en"
	}
}

func (m *matcher) newTokenAtLocation(line, index int) (token *Token) {
	column := index + 1
	token = new(Token)
	token.GherkinDialect = m.lang
	token.Location = &Location{line, column}
	return
}

func (m *matcher) MatchEOF(line *Line) (ok bool, token *Token, err error) {
	if line.IsEof() {
		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
		token.Type = TokenType_EOF
	}
	return
}

func (m *matcher) MatchEmpty(line *Line) (ok bool, token *Token, err error) {
	if line.IsEmpty() {
		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
		token.Type = TokenType_Empty
	}
	return
}

func (m *matcher) MatchComment(line *Line) (ok bool, token *Token, err error) {
	if line.StartsWith(COMMENT_PREFIX) {
		token, ok = m.newTokenAtLocation(line.LineNumber, 0), true
		token.Type = TokenType_Comment
		token.Text = line.LineText
	}
	return
}

func (m *matcher) MatchTagLine(line *Line) (ok bool, token *Token, err error) {
	if line.StartsWith(TAG_PREFIX) {
		var tags []*LineSpan
		var column = line.Indent()
		splits := strings.Split(line.TrimmedLineText, TAG_PREFIX)
		for i := range splits {
			txt := strings.Trim(splits[i], " ")
			if txt != "" {
				tags = append(tags, &LineSpan{column, TAG_PREFIX + txt})
			}
			column = column + len(splits[i]) + 1
		}

		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
		token.Type = TokenType_TagLine
		token.Items = tags
	}
	return
}

func (m *matcher) matchTitleLine(line *Line, tokenType TokenType, keywords []string) (ok bool, token *Token, err error) {
	for i := range keywords {
		keyword := keywords[i]
		if line.StartsWith(keyword + TITLE_KEYWORD_SEPARATOR) {
			token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
			token.Type = tokenType
			token.Keyword = keyword
			token.Text = strings.Trim(line.TrimmedLineText[len(keyword)+1:], " ")
			return
		}
	}
	return
}

func (m *matcher) MatchFeatureLine(line *Line) (ok bool, token *Token, err error) {
	return m.matchTitleLine(line, TokenType_FeatureLine, m.dialect.FeatureKeywords())
}
func (m *matcher) MatchBackgroundLine(line *Line) (ok bool, token *Token, err error) {
	return m.matchTitleLine(line, TokenType_BackgroundLine, m.dialect.BackgroundKeywords())
}
func (m *matcher) MatchScenarioLine(line *Line) (ok bool, token *Token, err error) {
	return m.matchTitleLine(line, TokenType_ScenarioLine, m.dialect.ScenarioKeywords())
}
func (m *matcher) MatchScenarioOutlineLine(line *Line) (ok bool, token *Token, err error) {
	return m.matchTitleLine(line, TokenType_ScenarioOutlineLine, m.dialect.ScenarioOutlineKeywords())
}
func (m *matcher) MatchExamplesLine(line *Line) (ok bool, token *Token, err error) {
	return m.matchTitleLine(line, TokenType_ExamplesLine, m.dialect.ExamplesKeywords())
}
func (m *matcher) MatchStepLine(line *Line) (ok bool, token *Token, err error) {
	keywords := m.dialect.StepKeywords()
	for i := range keywords {
		keyword := keywords[i]
		if line.StartsWith(keyword) {
			token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
			token.Type = TokenType_StepLine
			token.Keyword = keyword
			token.Text = strings.Trim(line.TrimmedLineText[len(keyword):], " ")
			return
		}
	}
	return
}

func (m *matcher) MatchDocStringSeparator(line *Line) (ok bool, token *Token, err error) {
	if m.activeDocStringSeparator != "" {
		if line.StartsWith(m.activeDocStringSeparator) {
			// close
			token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
			token.Type = TokenType_DocStringSeparator

			m.indentToRemove = 0
			m.activeDocStringSeparator = ""
		}
		return
	}
	if line.StartsWith(DOCSTRING_SEPARATOR) {
		m.activeDocStringSeparator = DOCSTRING_SEPARATOR
	} else if line.StartsWith(DOCSTRING_ALTERNATIVE_SEPARATOR) {
		m.activeDocStringSeparator = DOCSTRING_ALTERNATIVE_SEPARATOR
	}
	if m.activeDocStringSeparator != "" {
		// open
		contentType := line.TrimmedLineText[len(m.activeDocStringSeparator):]
		m.indentToRemove = line.Indent()
		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
		token.Type = TokenType_DocStringSeparator
		token.Text = contentType
	}
	return
}

func (m *matcher) MatchTableRow(line *Line) (ok bool, token *Token, err error) {
	var firstChar, firstPos = utf8.DecodeRuneInString(line.TrimmedLineText)
	if firstChar == TABLE_CELL_SEPARATOR {
		var cells []*LineSpan
		var cell []rune
		var startCol = line.Indent() + 2 // column where the current cell started
		// start after the first separator, it's not included in the cell
		for i, w, col := firstPos, 0, startCol; i < len(line.TrimmedLineText); i += w {
			var char rune
			char, w = utf8.DecodeRuneInString(line.TrimmedLineText[i:])
			if char == TABLE_CELL_SEPARATOR {
				// append current cell
				txt := string(cell)
				txtTrimmed := strings.TrimLeft(txt, " ")
				ind := len(txt) - len(txtTrimmed)
				cells = append(cells, &LineSpan{startCol + ind, strings.TrimRight(txtTrimmed, " ")})
				// start building next
				cell = make([]rune, 0)
				startCol = col + 1
			} else if char == ESCAPE_CHAR {
				// skip this character but count the column
				i += w
				col++
				char, w = utf8.DecodeRuneInString(line.TrimmedLineText[i:])
				if char == ESCAPED_NEWLINE {
					cell = append(cell, '\n')
				} else {
					if char != TABLE_CELL_SEPARATOR && char != ESCAPE_CHAR {
						cell = append(cell, ESCAPE_CHAR)
					}
					cell = append(cell, char)
				}
			} else {
				cell = append(cell, char)
			}
			col++
		}

		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
		token.Type = TokenType_TableRow
		token.Items = cells
	}
	return
}

func (m *matcher) MatchLanguage(line *Line) (ok bool, token *Token, err error) {
	matches := m.languagePattern.FindStringSubmatch(line.TrimmedLineText)
	if len(matches) > 0 {
		lang := matches[1]
		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
		token.Type = TokenType_Language
		token.Text = lang

		dialect := m.gdp.GetDialect(lang)
		if dialect == nil {
			err = &parseError{"Language not supported: " + lang, token.Location}
		} else {
			m.lang = lang
			m.dialect = dialect
		}
	}
	return
}

func (m *matcher) MatchOther(line *Line) (ok bool, token *Token, err error) {
	token, ok = m.newTokenAtLocation(line.LineNumber, 0), true
	token.Type = TokenType_Other

	element := line.LineText
	txt := strings.TrimLeft(element, " ")

	if len(element)-len(txt) > m.indentToRemove {
		token.Text = m.unescapeDocString(element[m.indentToRemove:])
	} else {
		token.Text = m.unescapeDocString(txt)
	}
	return
}

func (m *matcher) unescapeDocString(text string) string {
	if m.activeDocStringSeparator != "" {
		return strings.Replace(text, "\\\"\\\"\\\"", "\"\"\"", -1)
	} else {
		return text
	}
}