initial commit with gherkin lexer

2015-06-06 20:07:54 +03:00 · 2015-06-06 20:07:54 +03:00 · 832c0999d0
--- a/gherkin/lexer/lexer.go
+++ b/gherkin/lexer/lexer.go
@ -0,0 +1,145 @@
 package lexer
 import (
 	"bufio"
 	"io"
 	"strings"
 	"unicode"
 )
 type Lexer struct {
 	reader *bufio.Reader
 	peek   *Token
 	lines  int
 }
 func New(r io.Reader) *Lexer {
 	return &Lexer{
 		reader: bufio.NewReader(r),
 	}
 }
 func (l *Lexer) Next() (t *Token) {
 	if l.peek != nil {
 		t = l.peek
 		l.peek = nil
 		return
 	}
 	return l.read()
 }
 func (l *Lexer) Peek() *Token {
 	if l.peek == nil {
 		l.peek = l.read()
 	}
 	return l.peek
 }
 func (l *Lexer) read() *Token {
 	line, err := l.reader.ReadString(byte('\n'))
 	if err != nil && len(line) == 0 {
 		return &Token{
 			Type: EOF,
 			Line: l.lines,
 		}
 	}
 	l.lines++
 	line = strings.TrimRightFunc(line, unicode.IsSpace)
 	// newline
 	if len(line) == 0 {
 		return &Token{
 			Type: NEW_LINE,
 			Line: l.lines - 1,
 		}
 	}
 	// comment
 	if m := matchers["comment"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   COMMENT,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 			Value:  m[2],
 		}
 	}
 	// pystring
 	if m := matchers["pystring"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   PYSTRING,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 		}
 	}
 	// step
 	if m := matchers["step"].FindStringSubmatch(line); len(m) > 0 {
 		tok := &Token{
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 			Value:  m[3],
 		}
 		switch m[2] {
 		case "Given":
 			tok.Type = GIVEN
 		case "When":
 			tok.Type = WHEN
 		case "Then":
 			tok.Type = THEN
 		case "And":
 			tok.Type = AND
 		case "But":
 			tok.Type = BUT
 		}
 		return tok
 	}
 	// scenario
 	if m := matchers["scenario"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   SCENARIO,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 			Value:  m[2],
 		}
 	}
 	// background
 	if m := matchers["background"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   BACKGROUND,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 		}
 	}
 	// feature
 	if m := matchers["feature"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   FEATURE,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 			Value:  m[2],
 		}
 	}
 	// tags
 	if m := matchers["tags"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   TAGS,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 			Value:  m[2],
 		}
 	}
 	// table row
 	if m := matchers["table_row"].FindStringSubmatch(line); len(m) > 0 {
 		return &Token{
 			Type:   TABLE_ROW,
 			Indent: len(m[1]),
 			Line:   l.lines - 1,
 			Value:  m[2],
 		}
 	}
 	// text
 	text := strings.TrimLeftFunc(line, unicode.IsSpace)
 	return &Token{
 		Type:   TEXT,
 		Line:   l.lines - 1,
 		Value:  text,
 		Indent: len(line) - len(text),
 	}
 }
--- a/gherkin/lexer/lexer_test.go
+++ b/gherkin/lexer/lexer_test.go
@ -0,0 +1,185 @@
 package lexer
 import (
 	"strings"
 	"testing"
 )
 var samples = map[string]string{
 	"feature": `Feature: gherkin lexer
  in order to run features
  as gherkin lexer
  I need to be able to parse a feature`,
 	"background": `Background:`,
 	"scenario": "Scenario: tokenize feature file",
 	"step_given": `Given a feature file`,
 	"step_when": `When I try to read it`,
 	"comment": `# an important comment`,
 	"step_then": `Then it should give me tokens`,
 	"step_given_table": `Given there are users:
      | name | lastname | num |
      | Jack | Sparrow  | 4   |
      | John | Doe      | 79  |`,
 }
 func indent(n int, s string) string {
 	return strings.Repeat(" ", n) + s
 }
 func Test_feature_read(t *testing.T) {
 	l := New(strings.NewReader(samples["feature"]))
 	tok := l.Next()
 	if tok.Type != FEATURE {
 		t.Fatalf("Expected a 'feature' type, but got: '%s'", tok.Type)
 	}
 	val := "gherkin lexer"
 	if tok.Value != val {
 		t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
 	}
 	if tok.Line != 0 {
 		t.Fatalf("Expected a token line to be '0', but got: '%d'", tok.Line)
 	}
 	if tok.Indent != 0 {
 		t.Fatalf("Expected a token identation to be '0', but got: '%d'", tok.Indent)
 	}
 	tok = l.Next()
 	if tok.Type != TEXT {
 		t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type)
 	}
 	val = "in order to run features"
 	if tok.Value != val {
 		t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
 	}
 	if tok.Line != 1 {
 		t.Fatalf("Expected a token line to be '1', but got: '%d'", tok.Line)
 	}
 	if tok.Indent != 2 {
 		t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent)
 	}
 	tok = l.Next()
 	if tok.Type != TEXT {
 		t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type)
 	}
 	val = "as gherkin lexer"
 	if tok.Value != val {
 		t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
 	}
 	if tok.Line != 2 {
 		t.Fatalf("Expected a token line to be '2', but got: '%d'", tok.Line)
 	}
 	if tok.Indent != 2 {
 		t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent)
 	}
 	tok = l.Next()
 	if tok.Type != TEXT {
 		t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type)
 	}
 	val = "I need to be able to parse a feature"
 	if tok.Value != val {
 		t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value)
 	}
 	if tok.Line != 3 {
 		t.Fatalf("Expected a token line to be '3', but got: '%d'", tok.Line)
 	}
 	if tok.Indent != 2 {
 		t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent)
 	}
 	tok = l.Next()
 	if tok.Type != EOF {
 		t.Fatalf("Expected an 'eof' type, but got: '%s'", tok.Type)
 	}
 }
 func Test_minimal_feature(t *testing.T) {
 	file := strings.Join([]string{
 		samples["feature"] + "\n",
 		indent(2, samples["background"]),
 		indent(4, samples["step_given"]) + "\n",
 		indent(2, samples["comment"]),
 		indent(2, samples["scenario"]),
 		indent(4, samples["step_given"]),
 		indent(4, samples["step_when"]),
 		indent(4, samples["step_then"]),
 	}, "\n")
 	l := New(strings.NewReader(file))
 	var tokens []TokenType
 	for tok := l.Next(); tok.Type != EOF; tok = l.Next() {
 		tokens = append(tokens, tok.Type)
 	}
 	expected := []TokenType{
 		FEATURE,
 		TEXT,
 		TEXT,
 		TEXT,
 		NEW_LINE,
 		BACKGROUND,
 		GIVEN,
 		NEW_LINE,
 		COMMENT,
 		SCENARIO,
 		GIVEN,
 		WHEN,
 		THEN,
 	}
 	for i := 0; i < len(expected); i++ {
 		if expected[i] != tokens[i] {
 			t.Fatalf("expected token '%s' at position: %d, is not the same as actual token: '%s'", expected[i], i, tokens[i])
 		}
 	}
 }
 func Test_table_row_reading(t *testing.T) {
 	file := strings.Join([]string{
 		indent(2, samples["background"]),
 		indent(4, samples["step_given_table"]),
 		indent(4, samples["step_given"]),
 	}, "\n")
 	l := New(strings.NewReader(file))
 	var types []TokenType
 	var values []string
 	var indents []int
 	for tok := l.Next(); tok.Type != EOF; tok = l.Next() {
 		types = append(types, tok.Type)
 		values = append(values, tok.Value)
 		indents = append(indents, tok.Indent)
 	}
 	expectedTypes := []TokenType{
 		BACKGROUND,
 		GIVEN,
 		TABLE_ROW,
 		TABLE_ROW,
 		TABLE_ROW,
 		GIVEN,
 	}
 	expectedIndents := []int{2, 4, 6, 6, 6, 4}
 	for i := 0; i < len(expectedTypes); i++ {
 		if expectedTypes[i] != types[i] {
 			t.Fatalf("expected token type '%s' at position: %d, is not the same as actual: '%s'", expectedTypes[i], i, types[i])
 		}
 	}
 	for i := 0; i < len(expectedIndents); i++ {
 		if expectedIndents[i] != indents[i] {
 			t.Fatalf("expected token indentation '%d' at position: %d, is not the same as actual: '%d'", expectedIndents[i], i, indents[i])
 		}
 	}
 	if values[2] != "| name | lastname | num |" {
 		t.Fatalf("table row value '%s' was not expected", values[2])
 	}
 }
--- a/gherkin/lexer/matchers.go
+++ b/gherkin/lexer/matchers.go
@ -0,0 +1,14 @@
 package lexer
 import "regexp"
 var matchers = map[string]*regexp.Regexp{
 	"feature":    regexp.MustCompile("^(\\s*)Feature:\\s*(.*)"),
 	"scenario":   regexp.MustCompile("^(\\s*)Scenario:\\s*(.*)"),
 	"background": regexp.MustCompile("^(\\s*)Background:"),
 	"step":       regexp.MustCompile("^(\\s*)(Given|When|Then|And|But)\\s+(.+)"),
 	"comment":    regexp.MustCompile("^(\\s*)#(.+)"),
 	"pystring":   regexp.MustCompile("^(\\s*)\\\"\\\"\\\""),
 	"tags":       regexp.MustCompile("^(\\s*)(@.+)"),
 	"table_row":  regexp.MustCompile("^(\\s*)(\\|.+)"),
 }
--- a/gherkin/lexer/token.go
+++ b/gherkin/lexer/token.go
@ -0,0 +1,16 @@
 package lexer
 type Token struct {
 	Type         TokenType
 	Line, Indent int
 	Value        string
 }
 func (t *Token) OfType(all ...TokenType) bool {
 	for _, typ := range all {
 		if typ == t.Type {
 			return true
 		}
 	}
 	return false
 }
--- a/gherkin/lexer/token_type.go
+++ b/gherkin/lexer/token_type.go
@ -0,0 +1,64 @@
 package lexer
 type TokenType int
 const (
 	ILLEGAL TokenType = iota
 	specials
 	COMMENT
 	NEW_LINE
 	EOF
 	elements
 	TEXT
 	TAGS
 	TABLE_ROW
 	PYSTRING
 	keywords
 	FEATURE
 	BACKGROUND
 	SCENARIO
 	GIVEN
 	WHEN
 	THEN
 	AND
 	BUT
 )
 func (t TokenType) String() string {
 	switch t {
 	case COMMENT:
 		return "comment"
 	case NEW_LINE:
 		return "new line"
 	case EOF:
 		return "end of file"
 	case TEXT:
 		return "text"
 	case TAGS:
 		return "tags"
 	case TABLE_ROW:
 		return "table row"
 	case PYSTRING:
 		return "pystring"
 	case FEATURE:
 		return "feature"
 	case BACKGROUND:
 		return "background"
 	case SCENARIO:
 		return "scenario"
 	case GIVEN:
 		return "given step"
 	case WHEN:
 		return "when step"
 	case THEN:
 		return "then step"
 	case AND:
 		return "and step"
 	case BUT:
 		return "but step"
 	}
 	return "illegal"
 }
--- a/gherkin/parse.go
+++ b/gherkin/parse.go
@ -0,0 +1,46 @@
 package gherkin
 type Tag string
 type Scenario struct {
 	Steps []*Step
 	Tags  []Tag
 	Line  string
 }
 type Background struct {
 	Steps []*Step
 	Line  string
 }
 type StepType string
 const (
 	Given StepType = "Given"
 	When  StepType = "When"
 	Then  StepType = "Then"
 )
 type Step struct {
 	Line string
 	Text string
 	Type StepType
 }
 type Feature struct {
 	Tags        []Tag
 	Description string
 	Line        string
 	Title       string
 	Filename    string
 	Background  *Background
 	Scenarios   []*Scenario
 }
 // func Parse(r io.Reader) (*Feature, error) {
 // 	in := bufio.NewReader(r)
 // 	for line, err := in.ReadString(byte('\n')); err != nil; line, err = in.ReadString(byte('\n')) {
 // 		ln := strings.TrimFunc(string(line), unicode.IsSpace)
 // 	}
 // 	return nil, nil
 // }