initial commit with gherkin lexer

Этот коммит содержится в:
gedi 2015-06-06 20:07:54 +03:00
коммит 832c0999d0
6 изменённых файлов: 470 добавлений и 0 удалений

145
gherkin/lexer/lexer.go Обычный файл
Просмотреть файл

@ -0,0 +1,145 @@
package lexer
import (
"bufio"
"io"
"strings"
"unicode"
)
type Lexer struct {
reader *bufio.Reader
peek *Token
lines int
}
func New(r io.Reader) *Lexer {
return &Lexer{
reader: bufio.NewReader(r),
}
}
func (l *Lexer) Next() (t *Token) {
if l.peek != nil {
t = l.peek
l.peek = nil
return
}
return l.read()
}
func (l *Lexer) Peek() *Token {
if l.peek == nil {
l.peek = l.read()
}
return l.peek
}
func (l *Lexer) read() *Token {
line, err := l.reader.ReadString(byte('\n'))
if err != nil && len(line) == 0 {
return &Token{
Type: EOF,
Line: l.lines,
}
}
l.lines++
line = strings.TrimRightFunc(line, unicode.IsSpace)
// newline
if len(line) == 0 {
return &Token{
Type: NEW_LINE,
Line: l.lines - 1,
}
}
// comment
if m := matchers["comment"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: COMMENT,
Indent: len(m[1]),
Line: l.lines - 1,
Value: m[2],
}
}
// pystring
if m := matchers["pystring"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: PYSTRING,
Indent: len(m[1]),
Line: l.lines - 1,
}
}
// step
if m := matchers["step"].FindStringSubmatch(line); len(m) > 0 {
tok := &Token{
Indent: len(m[1]),
Line: l.lines - 1,
Value: m[3],
}
switch m[2] {
case "Given":
tok.Type = GIVEN
case "When":
tok.Type = WHEN
case "Then":
tok.Type = THEN
case "And":
tok.Type = AND
case "But":
tok.Type = BUT
}
return tok
}
// scenario
if m := matchers["scenario"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: SCENARIO,
Indent: len(m[1]),
Line: l.lines - 1,
Value: m[2],
}
}
// background
if m := matchers["background"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: BACKGROUND,
Indent: len(m[1]),
Line: l.lines - 1,
}
}
// feature
if m := matchers["feature"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: FEATURE,
Indent: len(m[1]),
Line: l.lines - 1,
Value: m[2],
}
}
// tags
if m := matchers["tags"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: TAGS,
Indent: len(m[1]),
Line: l.lines - 1,
Value: m[2],
}
}
// table row
if m := matchers["table_row"].FindStringSubmatch(line); len(m) > 0 {
return &Token{
Type: TABLE_ROW,
Indent: len(m[1]),
Line: l.lines - 1,
Value: m[2],
}
}
// text
text := strings.TrimLeftFunc(line, unicode.IsSpace)
return &Token{
Type: TEXT,
Line: l.lines - 1,
Value: text,
Indent: len(line) - len(text),
}
}

185
gherkin/lexer/lexer_test.go Обычный файл
Просмотреть файл

@ -0,0 +1,185 @@
package lexer
import (
"strings"
"testing"
)
var samples = map[string]string{
"feature": `Feature: gherkin lexer
in order to run features
as gherkin lexer
I need to be able to parse a feature`,
"background": `Background:`,
"scenario": "Scenario: tokenize feature file",
"step_given": `Given a feature file`,
"step_when": `When I try to read it`,
"comment": `# an important comment`,
"step_then": `Then it should give me tokens`,
"step_given_table": `Given there are users:
| name | lastname | num |
| Jack | Sparrow | 4 |
| John | Doe | 79 |`,
}
func indent(n int, s string) string {
return strings.Repeat(" ", n) + s
}
func Test_feature_read(t *testing.T) {
l := New(strings.NewReader(samples["feature"]))
tok := l.Next()
if tok.Type != FEATURE {
t.Fatalf("Expected a 'feature' type, but got: '%s'", tok.Type)
}
val := "gherkin lexer"
if tok.Value != val {
t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
}
if tok.Line != 0 {
t.Fatalf("Expected a token line to be '0', but got: '%d'", tok.Line)
}
if tok.Indent != 0 {
t.Fatalf("Expected a token identation to be '0', but got: '%d'", tok.Indent)
}
tok = l.Next()
if tok.Type != TEXT {
t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type)
}
val = "in order to run features"
if tok.Value != val {
t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
}
if tok.Line != 1 {
t.Fatalf("Expected a token line to be '1', but got: '%d'", tok.Line)
}
if tok.Indent != 2 {
t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent)
}
tok = l.Next()
if tok.Type != TEXT {
t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type)
}
val = "as gherkin lexer"
if tok.Value != val {
t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
}
if tok.Line != 2 {
t.Fatalf("Expected a token line to be '2', but got: '%d'", tok.Line)
}
if tok.Indent != 2 {
t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent)
}
tok = l.Next()
if tok.Type != TEXT {
t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type)
}
val = "I need to be able to parse a feature"
if tok.Value != val {
t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
}
if tok.Line != 3 {
t.Fatalf("Expected a token line to be '3', but got: '%d'", tok.Line)
}
if tok.Indent != 2 {
t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent)
}
tok = l.Next()
if tok.Type != EOF {
t.Fatalf("Expected an 'eof' type, but got: '%s'", tok.Type)
}
}
func Test_minimal_feature(t *testing.T) {
file := strings.Join([]string{
samples["feature"] + "\n",
indent(2, samples["background"]),
indent(4, samples["step_given"]) + "\n",
indent(2, samples["comment"]),
indent(2, samples["scenario"]),
indent(4, samples["step_given"]),
indent(4, samples["step_when"]),
indent(4, samples["step_then"]),
}, "\n")
l := New(strings.NewReader(file))
var tokens []TokenType
for tok := l.Next(); tok.Type != EOF; tok = l.Next() {
tokens = append(tokens, tok.Type)
}
expected := []TokenType{
FEATURE,
TEXT,
TEXT,
TEXT,
NEW_LINE,
BACKGROUND,
GIVEN,
NEW_LINE,
COMMENT,
SCENARIO,
GIVEN,
WHEN,
THEN,
}
for i := 0; i < len(expected); i++ {
if expected[i] != tokens[i] {
t.Fatalf("expected token '%s' at position: %d, is not the same as actual token: '%s'", expected[i], i, tokens[i])
}
}
}
func Test_table_row_reading(t *testing.T) {
file := strings.Join([]string{
indent(2, samples["background"]),
indent(4, samples["step_given_table"]),
indent(4, samples["step_given"]),
}, "\n")
l := New(strings.NewReader(file))
var types []TokenType
var values []string
var indents []int
for tok := l.Next(); tok.Type != EOF; tok = l.Next() {
types = append(types, tok.Type)
values = append(values, tok.Value)
indents = append(indents, tok.Indent)
}
expectedTypes := []TokenType{
BACKGROUND,
GIVEN,
TABLE_ROW,
TABLE_ROW,
TABLE_ROW,
GIVEN,
}
expectedIndents := []int{2, 4, 6, 6, 6, 4}
for i := 0; i < len(expectedTypes); i++ {
if expectedTypes[i] != types[i] {
t.Fatalf("expected token type '%s' at position: %d, is not the same as actual: '%s'", expectedTypes[i], i, types[i])
}
}
for i := 0; i < len(expectedIndents); i++ {
if expectedIndents[i] != indents[i] {
t.Fatalf("expected token indentation '%d' at position: %d, is not the same as actual: '%d'", expectedIndents[i], i, indents[i])
}
}
if values[2] != "| name | lastname | num |" {
t.Fatalf("table row value '%s' was not expected", values[2])
}
}

14
gherkin/lexer/matchers.go Обычный файл
Просмотреть файл

@ -0,0 +1,14 @@
package lexer
import "regexp"
var matchers = map[string]*regexp.Regexp{
"feature": regexp.MustCompile("^(\\s*)Feature:\\s*(.*)"),
"scenario": regexp.MustCompile("^(\\s*)Scenario:\\s*(.*)"),
"background": regexp.MustCompile("^(\\s*)Background:"),
"step": regexp.MustCompile("^(\\s*)(Given|When|Then|And|But)\\s+(.+)"),
"comment": regexp.MustCompile("^(\\s*)#(.+)"),
"pystring": regexp.MustCompile("^(\\s*)\\\"\\\"\\\""),
"tags": regexp.MustCompile("^(\\s*)(@.+)"),
"table_row": regexp.MustCompile("^(\\s*)(\\|.+)"),
}

16
gherkin/lexer/token.go Обычный файл
Просмотреть файл

@ -0,0 +1,16 @@
package lexer
type Token struct {
Type TokenType
Line, Indent int
Value string
}
func (t *Token) OfType(all ...TokenType) bool {
for _, typ := range all {
if typ == t.Type {
return true
}
}
return false
}

64
gherkin/lexer/token_type.go Обычный файл
Просмотреть файл

@ -0,0 +1,64 @@
package lexer
type TokenType int
const (
ILLEGAL TokenType = iota
specials
COMMENT
NEW_LINE
EOF
elements
TEXT
TAGS
TABLE_ROW
PYSTRING
keywords
FEATURE
BACKGROUND
SCENARIO
GIVEN
WHEN
THEN
AND
BUT
)
func (t TokenType) String() string {
switch t {
case COMMENT:
return "comment"
case NEW_LINE:
return "new line"
case EOF:
return "end of file"
case TEXT:
return "text"
case TAGS:
return "tags"
case TABLE_ROW:
return "table row"
case PYSTRING:
return "pystring"
case FEATURE:
return "feature"
case BACKGROUND:
return "background"
case SCENARIO:
return "scenario"
case GIVEN:
return "given step"
case WHEN:
return "when step"
case THEN:
return "then step"
case AND:
return "and step"
case BUT:
return "but step"
}
return "illegal"
}

46
gherkin/parse.go Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
package gherkin
type Tag string
type Scenario struct {
Steps []*Step
Tags []Tag
Line string
}
type Background struct {
Steps []*Step
Line string
}
type StepType string
const (
Given StepType = "Given"
When StepType = "When"
Then StepType = "Then"
)
type Step struct {
Line string
Text string
Type StepType
}
type Feature struct {
Tags []Tag
Description string
Line string
Title string
Filename string
Background *Background
Scenarios []*Scenario
}
// func Parse(r io.Reader) (*Feature, error) {
// in := bufio.NewReader(r)
// for line, err := in.ReadString(byte('\n')); err != nil; line, err = in.ReadString(byte('\n')) {
// ln := strings.TrimFunc(string(line), unicode.IsSpace)
// }
// return nil, nil
// }