cgo: implement the constant parser as a real parser

Previously it was just a combination of heuristics to try to fit a constant in an *ast.BasicLit. For more complex expressions, this is not enough. This change also introduces proper syntax error with locations, if parsing a constant failed. For example, this will print a real error message with source location: #define FOO 5)
2019-11-04 16:30:57 +01:00 · 2019-11-04 16:30:57 +01:00 · cadb75a4aa
--- a/cgo/cgo.go
+++ b/cgo/cgo.go
@ -42,7 +42,7 @@ type cgoPackage struct {
 // constantInfo stores some information about a CGo constant found by libclang
 // and declared in the Go AST.
 type constantInfo struct {
-	expr *ast.BasicLit
+	expr ast.Expr
 	pos  token.Pos
 }
--- a/cgo/const.go
+++ b/cgo/const.go
@ -4,56 +4,191 @@ package cgo
 // parse common #define statements to Go constant expressions.
 import (
 	"fmt"
 	"go/ast"
 	"go/scanner"
 	"go/token"
 	"strings"
 )
 // parseConst parses the given string as a C constant.
-func parseConst(pos token.Pos, value string) *ast.BasicLit {
+func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
-	for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' {
+	t := newTokenizer(pos, fset, value)
-		value = strings.TrimSpace(value[1 : len(value)-1])
+	expr, err := parseConstExpr(t)
-	}
+	if t.token != token.EOF {
-	if len(value) == 0 {
+		return nil, &scanner.Error{
-		// Pretend it doesn't exist at all.
+			Pos: t.fset.Position(t.pos),
-		return nil
+			Msg: "unexpected token " + t.token.String(),
-	}
+		}
-	// For information about integer literals:
+	}
-	// https://en.cppreference.com/w/cpp/language/integer_literal
+	return expr, err
-	if value[0] == '"' {
+}
-		// string constant
+
-		return &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: value}
+// parseConstExpr parses a stream of C tokens to a Go expression.
-	}
+func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
-	if value[0] == '\'' {
+	switch t.token {
-		// char constant
+	case token.LPAREN:
-		return &ast.BasicLit{ValuePos: pos, Kind: token.CHAR, Value: value}
+		lparen := t.pos
-	}
+		t.Next()
-	// assume it's a number (int or float)
+		x, err := parseConstExpr(t)
-	value = strings.Replace(value, "'", "", -1) // remove ' chars
+		if err != nil {
-	value = strings.TrimRight(value, "lu")      // remove llu suffixes etc.
+			return nil, err
-	// find the first non-number
+		}
-	nonnum := byte(0)
+		if t.token != token.RPAREN {
-	for i := 0; i < len(value); i++ {
+			return nil, unexpectedToken(t, token.RPAREN)
-		if value[i] < '0' || value[i] > '9' {
+		}
-			nonnum = value[i]
+		expr := &ast.ParenExpr{
-			break
+			Lparen: lparen,
 			X:      x,
 			Rparen: t.pos,
 		}
 		t.Next()
 		return expr, nil
 	case token.INT, token.FLOAT, token.STRING, token.CHAR:
 		expr := &ast.BasicLit{
 			ValuePos: t.pos,
 			Kind:     t.token,
 			Value:    t.value,
 		}
 		t.Next()
 		return expr, nil
 	case token.EOF:
 		return nil, &scanner.Error{
 			Pos: t.fset.Position(t.pos),
 			Msg: "empty constant",
 		}
 	default:
 		return nil, &scanner.Error{
 			Pos: t.fset.Position(t.pos),
 			Msg: fmt.Sprintf("unexpected token %s", t.token),
 		}
 	}
 }
 // unexpectedToken returns an error of the form "unexpected token FOO, expected
 // BAR".
 func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
 	return &scanner.Error{
 		Pos: t.fset.Position(t.pos),
 		Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected),
 	}
 }
 // tokenizer reads C source code and converts it to Go tokens.
 type tokenizer struct {
 	pos   token.Pos
 	fset  *token.FileSet
 	token token.Token
 	value string
 	buf   string
 }
 // newTokenizer initializes a new tokenizer, positioned at the first token in
 // the string.
 func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
 	t := &tokenizer{
 		pos:   start,
 		fset:  fset,
 		buf:   buf,
 		token: token.ILLEGAL,
 	}
 	t.Next() // Parse the first token.
 	return t
 }
 // Next consumes the next token in the stream. There is no return value, read
 // the next token from the pos, token and value properties.
 func (t *tokenizer) Next() {
 	t.pos += token.Pos(len(t.value))
 	for {
 		if len(t.buf) == 0 {
 			t.token = token.EOF
 			return
 		}
 		c := t.buf[0]
 		switch {
 		case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
 			// Skip whitespace.
 			// Based on this source, not sure whether it represents C whitespace:
 			// https://en.cppreference.com/w/cpp/string/byte/isspace
 			t.pos++
 			t.buf = t.buf[1:]
 		case c == '(' || c == ')':
 			// Single-character tokens.
 			switch c {
 			case '(':
 				t.token = token.LPAREN
 			case ')':
 				t.token = token.RPAREN
 			}
 			t.value = t.buf[:1]
 			t.buf = t.buf[1:]
 			return
 		case c >= '0' && c <= '9':
 			// Numeric constant (int, float, etc.).
 			// Find the last non-numeric character.
 			tokenLen := len(t.buf)
 			hasDot := false
 			for i, c := range t.buf {
 				if c == '.' {
 					hasDot = true
 				}
 				if (c >= '0' && c <= '9') || c == '.' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
 					tokenLen = i + 1
 				}
 			}
 			t.value = t.buf[:tokenLen]
 			t.buf = t.buf[tokenLen:]
 			if hasDot {
 				// Integer constants are more complicated than this but this is
 				// a close approximation.
 				// https://en.cppreference.com/w/cpp/language/integer_literal
 				t.token = token.FLOAT
 				t.value = strings.TrimRight(t.value, "f")
 			} else {
 				t.token = token.INT
 				t.value = strings.TrimRight(t.value, "uUlL")
 			}
 			return
 		case c == '"':
 			// String constant. Find the first '"' character that is not
 			// preceded by a backslash.
 			escape := false
 			tokenLen := len(t.buf)
 			for i, c := range t.buf {
 				if i != 0 && c == '"' && !escape {
 					tokenLen = i + 1
 					break
 				}
 				if !escape {
 					escape = c == '\\'
 				}
 			}
 			t.token = token.STRING
 			t.value = t.buf[:tokenLen]
 			t.buf = t.buf[tokenLen:]
 			return
 		case c == '\'':
 			// Char (rune) constant. Find the first '\'' character that is not
 			// preceded by a backslash.
 			escape := false
 			tokenLen := len(t.buf)
 			for i, c := range t.buf {
 				if i != 0 && c == '\'' && !escape {
 					tokenLen = i + 1
 					break
 				}
 				if !escape {
 					escape = c == '\\'
 				}
 			}
 			t.token = token.CHAR
 			t.value = t.buf[:tokenLen]
 			t.buf = t.buf[tokenLen:]
 			return
 		default:
 			t.token = token.ILLEGAL
 			return
 		}
 	}
 	// determine number type based on the first non-number
 	switch nonnum {
 	case 0:
 		// no non-number found, must be an integer
 		return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value}
 	case 'x', 'X':
 		// hex integer constant
 		// TODO: may also be a floating point number per C++17.
 		return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value}
 	case '.', 'e':
 		// float constant
 		value = strings.TrimRight(value, "fFlL")
 		return &ast.BasicLit{ValuePos: pos, Kind: token.FLOAT, Value: value}
 	default:
 		// unknown type, ignore
 	}
 	return nil
 }
--- a/cgo/const_test.go
+++ b/cgo/const_test.go
@ -4,6 +4,7 @@ import (
 	"bytes"
 	"go/format"
 	"go/token"
 	"strings"
 	"testing"
 )
@ -14,20 +15,33 @@ func TestParseConst(t *testing.T) {
 		Go string
 	}{
 		{`5`, `5`},
-		{`(5)`, `5`},
+		{`(5)`, `(5)`},
-		{`(((5)))`, `5`},
+		{`(((5)))`, `(5)`},
 		{`)`, `error: 1:1: unexpected token )`},
 		{`5)`, `error: 1:2: unexpected token )`},
 		{"  \t)", `error: 1:4: unexpected token )`},
 		{`5.8f`, `5.8`},
-		{`foo`, `<invalid>`}, // identifiers unimplemented
+		{`foo`, `error: 1:1: unexpected token ILLEGAL`}, // identifiers unimplemented
-		{``, `<invalid>`},    // empty constants not allowed in Go
+		{``, `error: 1:1: empty constant`},              // empty constants not allowed in Go
 		{`"foo"`, `"foo"`},
 		{`"a\\n"`, `"a\\n"`},
 		{`"a\n"`, `"a\n"`},
 		{`"a\""`, `"a\""`},
 		{`'a'`, `'a'`},
-		{`0b10`, `<invalid>`}, // binary number literals unimplemented
+		{`0b10`, `0b10`},
 		{`0x1234_5678`, `0x1234_5678`},
 	} {
 		fset := token.NewFileSet()
-		startPos := fset.AddFile("test.c", -1, 1000).Pos(0)
+		startPos := fset.AddFile("", -1, 1000).Pos(0)
-		expr := parseConst(startPos, tc.C)
+		expr, err := parseConst(startPos, fset, tc.C)
 		s := "<invalid>"
-		if expr != nil {
+		if err != nil {
 			if !strings.HasPrefix(tc.Go, "error: ") {
 				t.Errorf("expected value %#v for C constant %#v but got error %#v", tc.Go, tc.C, err.Error())
 				continue
 			}
 			s = "error: " + err.Error()
 		} else if expr != nil {
 			// Serialize the Go constant to a string, for more readable test
 			// cases.
 			buf := &bytes.Buffer{}
--- a/cgo/libclang.go
+++ b/cgo/libclang.go
@ -245,9 +245,12 @@ func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClient
 			p.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source))
 			break
 		}
-		value := strings.TrimSpace(source[len(name):])
+		value := source[len(name):]
 		// Try to convert this #define into a Go constant expression.
-		expr := parseConst(pos, value)
+		expr, err := parseConst(pos+token.Pos(len(name)), p.fset, value)
 		if err != nil {
 			p.errors = append(p.errors, err)
 		}
 		if expr != nil {
 			// Parsing was successful.
 			p.constants[name] = constantInfo{expr, pos}