From 832c0999d0c027ba7bc54adc7e53c068424ef86a Mon Sep 17 00:00:00 2001 From: gedi Date: Sat, 6 Jun 2015 20:07:54 +0300 Subject: [PATCH] initial commit with gherkin lexer --- gherkin/lexer/lexer.go | 145 ++++++++++++++++++++++++++++ gherkin/lexer/lexer_test.go | 185 ++++++++++++++++++++++++++++++++++++ gherkin/lexer/matchers.go | 14 +++ gherkin/lexer/token.go | 16 ++++ gherkin/lexer/token_type.go | 64 +++++++++++++ gherkin/parse.go | 46 +++++++++ 6 files changed, 470 insertions(+) create mode 100644 gherkin/lexer/lexer.go create mode 100644 gherkin/lexer/lexer_test.go create mode 100644 gherkin/lexer/matchers.go create mode 100644 gherkin/lexer/token.go create mode 100644 gherkin/lexer/token_type.go create mode 100644 gherkin/parse.go diff --git a/gherkin/lexer/lexer.go b/gherkin/lexer/lexer.go new file mode 100644 index 0000000..48c6e17 --- /dev/null +++ b/gherkin/lexer/lexer.go @@ -0,0 +1,145 @@ +package lexer + +import ( + "bufio" + "io" + "strings" + "unicode" +) + +type Lexer struct { + reader *bufio.Reader + peek *Token + lines int +} + +func New(r io.Reader) *Lexer { + return &Lexer{ + reader: bufio.NewReader(r), + } +} + +func (l *Lexer) Next() (t *Token) { + if l.peek != nil { + t = l.peek + l.peek = nil + return + } + return l.read() +} + +func (l *Lexer) Peek() *Token { + if l.peek == nil { + l.peek = l.read() + } + return l.peek +} + +func (l *Lexer) read() *Token { + line, err := l.reader.ReadString(byte('\n')) + if err != nil && len(line) == 0 { + return &Token{ + Type: EOF, + Line: l.lines, + } + } + l.lines++ + line = strings.TrimRightFunc(line, unicode.IsSpace) + // newline + if len(line) == 0 { + return &Token{ + Type: NEW_LINE, + Line: l.lines - 1, + } + } + // comment + if m := matchers["comment"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: COMMENT, + Indent: len(m[1]), + Line: l.lines - 1, + Value: m[2], + } + } + // pystring + if m := matchers["pystring"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: PYSTRING, + Indent: len(m[1]), + Line: l.lines - 1, + } + } + // step + if m := matchers["step"].FindStringSubmatch(line); len(m) > 0 { + tok := &Token{ + Indent: len(m[1]), + Line: l.lines - 1, + Value: m[3], + } + switch m[2] { + case "Given": + tok.Type = GIVEN + case "When": + tok.Type = WHEN + case "Then": + tok.Type = THEN + case "And": + tok.Type = AND + case "But": + tok.Type = BUT + } + return tok + } + // scenario + if m := matchers["scenario"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: SCENARIO, + Indent: len(m[1]), + Line: l.lines - 1, + Value: m[2], + } + } + // background + if m := matchers["background"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: BACKGROUND, + Indent: len(m[1]), + Line: l.lines - 1, + } + } + // feature + if m := matchers["feature"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: FEATURE, + Indent: len(m[1]), + Line: l.lines - 1, + Value: m[2], + } + } + // tags + if m := matchers["tags"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: TAGS, + Indent: len(m[1]), + Line: l.lines - 1, + Value: m[2], + } + } + // table row + if m := matchers["table_row"].FindStringSubmatch(line); len(m) > 0 { + return &Token{ + Type: TABLE_ROW, + Indent: len(m[1]), + Line: l.lines - 1, + Value: m[2], + } + } + // text + text := strings.TrimLeftFunc(line, unicode.IsSpace) + return &Token{ + Type: TEXT, + Line: l.lines - 1, + Value: text, + Indent: len(line) - len(text), + } +} diff --git a/gherkin/lexer/lexer_test.go b/gherkin/lexer/lexer_test.go new file mode 100644 index 0000000..28e28b0 --- /dev/null +++ b/gherkin/lexer/lexer_test.go @@ -0,0 +1,185 @@ +package lexer + +import ( + "strings" + "testing" +) + +var samples = map[string]string{ + "feature": `Feature: gherkin lexer + in order to run features + as gherkin lexer + I need to be able to parse a feature`, + + "background": `Background:`, + + "scenario": "Scenario: tokenize feature file", + + "step_given": `Given a feature file`, + + "step_when": `When I try to read it`, + + "comment": `# an important comment`, + + "step_then": `Then it should give me tokens`, + + "step_given_table": `Given there are users: + | name | lastname | num | + | Jack | Sparrow | 4 | + | John | Doe | 79 |`, +} + +func indent(n int, s string) string { + return strings.Repeat(" ", n) + s +} + +func Test_feature_read(t *testing.T) { + l := New(strings.NewReader(samples["feature"])) + tok := l.Next() + if tok.Type != FEATURE { + t.Fatalf("Expected a 'feature' type, but got: '%s'", tok.Type) + } + val := "gherkin lexer" + if tok.Value != val { + t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value) + } + if tok.Line != 0 { + t.Fatalf("Expected a token line to be '0', but got: '%d'", tok.Line) + } + if tok.Indent != 0 { + t.Fatalf("Expected a token identation to be '0', but got: '%d'", tok.Indent) + } + + tok = l.Next() + if tok.Type != TEXT { + t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type) + } + val = "in order to run features" + if tok.Value != val { + t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value) + } + if tok.Line != 1 { + t.Fatalf("Expected a token line to be '1', but got: '%d'", tok.Line) + } + if tok.Indent != 2 { + t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent) + } + + tok = l.Next() + if tok.Type != TEXT { + t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type) + } + val = "as gherkin lexer" + if tok.Value != val { + t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value) + } + if tok.Line != 2 { + t.Fatalf("Expected a token line to be '2', but got: '%d'", tok.Line) + } + if tok.Indent != 2 { + t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent) + } + + tok = l.Next() + if tok.Type != TEXT { + t.Fatalf("Expected a 'text' type, but got: '%s'", tok.Type) + } + val = "I need to be able to parse a feature" + if tok.Value != val { + t.Fatalf("Expected a token value to be '%s', but got: '%s'", val, tok.Value) + } + if tok.Line != 3 { + t.Fatalf("Expected a token line to be '3', but got: '%d'", tok.Line) + } + if tok.Indent != 2 { + t.Fatalf("Expected a token identation to be '2', but got: '%d'", tok.Indent) + } + + tok = l.Next() + if tok.Type != EOF { + t.Fatalf("Expected an 'eof' type, but got: '%s'", tok.Type) + } +} + +func Test_minimal_feature(t *testing.T) { + file := strings.Join([]string{ + samples["feature"] + "\n", + + indent(2, samples["background"]), + indent(4, samples["step_given"]) + "\n", + + indent(2, samples["comment"]), + indent(2, samples["scenario"]), + indent(4, samples["step_given"]), + indent(4, samples["step_when"]), + indent(4, samples["step_then"]), + }, "\n") + l := New(strings.NewReader(file)) + + var tokens []TokenType + for tok := l.Next(); tok.Type != EOF; tok = l.Next() { + tokens = append(tokens, tok.Type) + } + expected := []TokenType{ + FEATURE, + TEXT, + TEXT, + TEXT, + NEW_LINE, + + BACKGROUND, + GIVEN, + NEW_LINE, + + COMMENT, + SCENARIO, + GIVEN, + WHEN, + THEN, + } + for i := 0; i < len(expected); i++ { + if expected[i] != tokens[i] { + t.Fatalf("expected token '%s' at position: %d, is not the same as actual token: '%s'", expected[i], i, tokens[i]) + } + } +} + +func Test_table_row_reading(t *testing.T) { + file := strings.Join([]string{ + indent(2, samples["background"]), + indent(4, samples["step_given_table"]), + indent(4, samples["step_given"]), + }, "\n") + l := New(strings.NewReader(file)) + + var types []TokenType + var values []string + var indents []int + for tok := l.Next(); tok.Type != EOF; tok = l.Next() { + types = append(types, tok.Type) + values = append(values, tok.Value) + indents = append(indents, tok.Indent) + } + expectedTypes := []TokenType{ + BACKGROUND, + GIVEN, + TABLE_ROW, + TABLE_ROW, + TABLE_ROW, + GIVEN, + } + expectedIndents := []int{2, 4, 6, 6, 6, 4} + for i := 0; i < len(expectedTypes); i++ { + if expectedTypes[i] != types[i] { + t.Fatalf("expected token type '%s' at position: %d, is not the same as actual: '%s'", expectedTypes[i], i, types[i]) + } + } + for i := 0; i < len(expectedIndents); i++ { + if expectedIndents[i] != indents[i] { + t.Fatalf("expected token indentation '%d' at position: %d, is not the same as actual: '%d'", expectedIndents[i], i, indents[i]) + } + } + if values[2] != "| name | lastname | num |" { + t.Fatalf("table row value '%s' was not expected", values[2]) + } +} diff --git a/gherkin/lexer/matchers.go b/gherkin/lexer/matchers.go new file mode 100644 index 0000000..e302574 --- /dev/null +++ b/gherkin/lexer/matchers.go @@ -0,0 +1,14 @@ +package lexer + +import "regexp" + +var matchers = map[string]*regexp.Regexp{ + "feature": regexp.MustCompile("^(\\s*)Feature:\\s*(.*)"), + "scenario": regexp.MustCompile("^(\\s*)Scenario:\\s*(.*)"), + "background": regexp.MustCompile("^(\\s*)Background:"), + "step": regexp.MustCompile("^(\\s*)(Given|When|Then|And|But)\\s+(.+)"), + "comment": regexp.MustCompile("^(\\s*)#(.+)"), + "pystring": regexp.MustCompile("^(\\s*)\\\"\\\"\\\""), + "tags": regexp.MustCompile("^(\\s*)(@.+)"), + "table_row": regexp.MustCompile("^(\\s*)(\\|.+)"), +} diff --git a/gherkin/lexer/token.go b/gherkin/lexer/token.go new file mode 100644 index 0000000..1537528 --- /dev/null +++ b/gherkin/lexer/token.go @@ -0,0 +1,16 @@ +package lexer + +type Token struct { + Type TokenType + Line, Indent int + Value string +} + +func (t *Token) OfType(all ...TokenType) bool { + for _, typ := range all { + if typ == t.Type { + return true + } + } + return false +} diff --git a/gherkin/lexer/token_type.go b/gherkin/lexer/token_type.go new file mode 100644 index 0000000..faef14a --- /dev/null +++ b/gherkin/lexer/token_type.go @@ -0,0 +1,64 @@ +package lexer + +type TokenType int + +const ( + ILLEGAL TokenType = iota + + specials + COMMENT + NEW_LINE + EOF + + elements + TEXT + TAGS + TABLE_ROW + PYSTRING + + keywords + FEATURE + BACKGROUND + SCENARIO + GIVEN + WHEN + THEN + AND + BUT +) + +func (t TokenType) String() string { + switch t { + case COMMENT: + return "comment" + case NEW_LINE: + return "new line" + case EOF: + return "end of file" + case TEXT: + return "text" + case TAGS: + return "tags" + case TABLE_ROW: + return "table row" + case PYSTRING: + return "pystring" + case FEATURE: + return "feature" + case BACKGROUND: + return "background" + case SCENARIO: + return "scenario" + case GIVEN: + return "given step" + case WHEN: + return "when step" + case THEN: + return "then step" + case AND: + return "and step" + case BUT: + return "but step" + } + return "illegal" +} diff --git a/gherkin/parse.go b/gherkin/parse.go new file mode 100644 index 0000000..5cb41e3 --- /dev/null +++ b/gherkin/parse.go @@ -0,0 +1,46 @@ +package gherkin + +type Tag string + +type Scenario struct { + Steps []*Step + Tags []Tag + Line string +} + +type Background struct { + Steps []*Step + Line string +} + +type StepType string + +const ( + Given StepType = "Given" + When StepType = "When" + Then StepType = "Then" +) + +type Step struct { + Line string + Text string + Type StepType +} + +type Feature struct { + Tags []Tag + Description string + Line string + Title string + Filename string + Background *Background + Scenarios []*Scenario +} + +// func Parse(r io.Reader) (*Feature, error) { +// in := bufio.NewReader(r) +// for line, err := in.ReadString(byte('\n')); err != nil; line, err = in.ReadString(byte('\n')) { +// ln := strings.TrimFunc(string(line), unicode.IsSpace) +// } +// return nil, nil +// }