cgo: implement the constant parser as a real parser
Previously it was just a combination of heuristics to try to fit a constant in an *ast.BasicLit. For more complex expressions, this is not enough. This change also introduces proper syntax error with locations, if parsing a constant failed. For example, this will print a real error message with source location: #define FOO 5)
Этот коммит содержится в:
родитель
5987233b99
коммит
cadb75a4aa
4 изменённых файлов: 207 добавлений и 55 удалений
|
@ -42,7 +42,7 @@ type cgoPackage struct {
|
||||||
// constantInfo stores some information about a CGo constant found by libclang
|
// constantInfo stores some information about a CGo constant found by libclang
|
||||||
// and declared in the Go AST.
|
// and declared in the Go AST.
|
||||||
type constantInfo struct {
|
type constantInfo struct {
|
||||||
expr *ast.BasicLit
|
expr ast.Expr
|
||||||
pos token.Pos
|
pos token.Pos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
223
cgo/const.go
223
cgo/const.go
|
@ -4,56 +4,191 @@ package cgo
|
||||||
// parse common #define statements to Go constant expressions.
|
// parse common #define statements to Go constant expressions.
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"go/ast"
|
"go/ast"
|
||||||
|
"go/scanner"
|
||||||
"go/token"
|
"go/token"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// parseConst parses the given string as a C constant.
|
// parseConst parses the given string as a C constant.
|
||||||
func parseConst(pos token.Pos, value string) *ast.BasicLit {
|
func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
|
||||||
for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' {
|
t := newTokenizer(pos, fset, value)
|
||||||
value = strings.TrimSpace(value[1 : len(value)-1])
|
expr, err := parseConstExpr(t)
|
||||||
}
|
if t.token != token.EOF {
|
||||||
if len(value) == 0 {
|
return nil, &scanner.Error{
|
||||||
// Pretend it doesn't exist at all.
|
Pos: t.fset.Position(t.pos),
|
||||||
return nil
|
Msg: "unexpected token " + t.token.String(),
|
||||||
}
|
}
|
||||||
// For information about integer literals:
|
}
|
||||||
// https://en.cppreference.com/w/cpp/language/integer_literal
|
return expr, err
|
||||||
if value[0] == '"' {
|
}
|
||||||
// string constant
|
|
||||||
return &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: value}
|
// parseConstExpr parses a stream of C tokens to a Go expression.
|
||||||
}
|
func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
|
||||||
if value[0] == '\'' {
|
switch t.token {
|
||||||
// char constant
|
case token.LPAREN:
|
||||||
return &ast.BasicLit{ValuePos: pos, Kind: token.CHAR, Value: value}
|
lparen := t.pos
|
||||||
}
|
t.Next()
|
||||||
// assume it's a number (int or float)
|
x, err := parseConstExpr(t)
|
||||||
value = strings.Replace(value, "'", "", -1) // remove ' chars
|
if err != nil {
|
||||||
value = strings.TrimRight(value, "lu") // remove llu suffixes etc.
|
return nil, err
|
||||||
// find the first non-number
|
}
|
||||||
nonnum := byte(0)
|
if t.token != token.RPAREN {
|
||||||
for i := 0; i < len(value); i++ {
|
return nil, unexpectedToken(t, token.RPAREN)
|
||||||
if value[i] < '0' || value[i] > '9' {
|
}
|
||||||
nonnum = value[i]
|
expr := &ast.ParenExpr{
|
||||||
break
|
Lparen: lparen,
|
||||||
|
X: x,
|
||||||
|
Rparen: t.pos,
|
||||||
|
}
|
||||||
|
t.Next()
|
||||||
|
return expr, nil
|
||||||
|
case token.INT, token.FLOAT, token.STRING, token.CHAR:
|
||||||
|
expr := &ast.BasicLit{
|
||||||
|
ValuePos: t.pos,
|
||||||
|
Kind: t.token,
|
||||||
|
Value: t.value,
|
||||||
|
}
|
||||||
|
t.Next()
|
||||||
|
return expr, nil
|
||||||
|
case token.EOF:
|
||||||
|
return nil, &scanner.Error{
|
||||||
|
Pos: t.fset.Position(t.pos),
|
||||||
|
Msg: "empty constant",
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil, &scanner.Error{
|
||||||
|
Pos: t.fset.Position(t.pos),
|
||||||
|
Msg: fmt.Sprintf("unexpected token %s", t.token),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unexpectedToken returns an error of the form "unexpected token FOO, expected
|
||||||
|
// BAR".
|
||||||
|
func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
|
||||||
|
return &scanner.Error{
|
||||||
|
Pos: t.fset.Position(t.pos),
|
||||||
|
Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// tokenizer reads C source code and converts it to Go tokens.
|
||||||
|
type tokenizer struct {
|
||||||
|
pos token.Pos
|
||||||
|
fset *token.FileSet
|
||||||
|
token token.Token
|
||||||
|
value string
|
||||||
|
buf string
|
||||||
|
}
|
||||||
|
|
||||||
|
// newTokenizer initializes a new tokenizer, positioned at the first token in
|
||||||
|
// the string.
|
||||||
|
func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
|
||||||
|
t := &tokenizer{
|
||||||
|
pos: start,
|
||||||
|
fset: fset,
|
||||||
|
buf: buf,
|
||||||
|
token: token.ILLEGAL,
|
||||||
|
}
|
||||||
|
t.Next() // Parse the first token.
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next consumes the next token in the stream. There is no return value, read
|
||||||
|
// the next token from the pos, token and value properties.
|
||||||
|
func (t *tokenizer) Next() {
|
||||||
|
t.pos += token.Pos(len(t.value))
|
||||||
|
for {
|
||||||
|
if len(t.buf) == 0 {
|
||||||
|
t.token = token.EOF
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c := t.buf[0]
|
||||||
|
switch {
|
||||||
|
case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
|
||||||
|
// Skip whitespace.
|
||||||
|
// Based on this source, not sure whether it represents C whitespace:
|
||||||
|
// https://en.cppreference.com/w/cpp/string/byte/isspace
|
||||||
|
t.pos++
|
||||||
|
t.buf = t.buf[1:]
|
||||||
|
case c == '(' || c == ')':
|
||||||
|
// Single-character tokens.
|
||||||
|
switch c {
|
||||||
|
case '(':
|
||||||
|
t.token = token.LPAREN
|
||||||
|
case ')':
|
||||||
|
t.token = token.RPAREN
|
||||||
|
}
|
||||||
|
t.value = t.buf[:1]
|
||||||
|
t.buf = t.buf[1:]
|
||||||
|
return
|
||||||
|
case c >= '0' && c <= '9':
|
||||||
|
// Numeric constant (int, float, etc.).
|
||||||
|
// Find the last non-numeric character.
|
||||||
|
tokenLen := len(t.buf)
|
||||||
|
hasDot := false
|
||||||
|
for i, c := range t.buf {
|
||||||
|
if c == '.' {
|
||||||
|
hasDot = true
|
||||||
|
}
|
||||||
|
if (c >= '0' && c <= '9') || c == '.' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
|
||||||
|
tokenLen = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.value = t.buf[:tokenLen]
|
||||||
|
t.buf = t.buf[tokenLen:]
|
||||||
|
if hasDot {
|
||||||
|
// Integer constants are more complicated than this but this is
|
||||||
|
// a close approximation.
|
||||||
|
// https://en.cppreference.com/w/cpp/language/integer_literal
|
||||||
|
t.token = token.FLOAT
|
||||||
|
t.value = strings.TrimRight(t.value, "f")
|
||||||
|
} else {
|
||||||
|
t.token = token.INT
|
||||||
|
t.value = strings.TrimRight(t.value, "uUlL")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
case c == '"':
|
||||||
|
// String constant. Find the first '"' character that is not
|
||||||
|
// preceded by a backslash.
|
||||||
|
escape := false
|
||||||
|
tokenLen := len(t.buf)
|
||||||
|
for i, c := range t.buf {
|
||||||
|
if i != 0 && c == '"' && !escape {
|
||||||
|
tokenLen = i + 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !escape {
|
||||||
|
escape = c == '\\'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.token = token.STRING
|
||||||
|
t.value = t.buf[:tokenLen]
|
||||||
|
t.buf = t.buf[tokenLen:]
|
||||||
|
return
|
||||||
|
case c == '\'':
|
||||||
|
// Char (rune) constant. Find the first '\'' character that is not
|
||||||
|
// preceded by a backslash.
|
||||||
|
escape := false
|
||||||
|
tokenLen := len(t.buf)
|
||||||
|
for i, c := range t.buf {
|
||||||
|
if i != 0 && c == '\'' && !escape {
|
||||||
|
tokenLen = i + 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !escape {
|
||||||
|
escape = c == '\\'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.token = token.CHAR
|
||||||
|
t.value = t.buf[:tokenLen]
|
||||||
|
t.buf = t.buf[tokenLen:]
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
t.token = token.ILLEGAL
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// determine number type based on the first non-number
|
|
||||||
switch nonnum {
|
|
||||||
case 0:
|
|
||||||
// no non-number found, must be an integer
|
|
||||||
return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value}
|
|
||||||
case 'x', 'X':
|
|
||||||
// hex integer constant
|
|
||||||
// TODO: may also be a floating point number per C++17.
|
|
||||||
return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value}
|
|
||||||
case '.', 'e':
|
|
||||||
// float constant
|
|
||||||
value = strings.TrimRight(value, "fFlL")
|
|
||||||
return &ast.BasicLit{ValuePos: pos, Kind: token.FLOAT, Value: value}
|
|
||||||
default:
|
|
||||||
// unknown type, ignore
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"go/format"
|
"go/format"
|
||||||
"go/token"
|
"go/token"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -14,20 +15,33 @@ func TestParseConst(t *testing.T) {
|
||||||
Go string
|
Go string
|
||||||
}{
|
}{
|
||||||
{`5`, `5`},
|
{`5`, `5`},
|
||||||
{`(5)`, `5`},
|
{`(5)`, `(5)`},
|
||||||
{`(((5)))`, `5`},
|
{`(((5)))`, `(5)`},
|
||||||
|
{`)`, `error: 1:1: unexpected token )`},
|
||||||
|
{`5)`, `error: 1:2: unexpected token )`},
|
||||||
|
{" \t)", `error: 1:4: unexpected token )`},
|
||||||
{`5.8f`, `5.8`},
|
{`5.8f`, `5.8`},
|
||||||
{`foo`, `<invalid>`}, // identifiers unimplemented
|
{`foo`, `error: 1:1: unexpected token ILLEGAL`}, // identifiers unimplemented
|
||||||
{``, `<invalid>`}, // empty constants not allowed in Go
|
{``, `error: 1:1: empty constant`}, // empty constants not allowed in Go
|
||||||
{`"foo"`, `"foo"`},
|
{`"foo"`, `"foo"`},
|
||||||
|
{`"a\\n"`, `"a\\n"`},
|
||||||
|
{`"a\n"`, `"a\n"`},
|
||||||
|
{`"a\""`, `"a\""`},
|
||||||
{`'a'`, `'a'`},
|
{`'a'`, `'a'`},
|
||||||
{`0b10`, `<invalid>`}, // binary number literals unimplemented
|
{`0b10`, `0b10`},
|
||||||
|
{`0x1234_5678`, `0x1234_5678`},
|
||||||
} {
|
} {
|
||||||
fset := token.NewFileSet()
|
fset := token.NewFileSet()
|
||||||
startPos := fset.AddFile("test.c", -1, 1000).Pos(0)
|
startPos := fset.AddFile("", -1, 1000).Pos(0)
|
||||||
expr := parseConst(startPos, tc.C)
|
expr, err := parseConst(startPos, fset, tc.C)
|
||||||
s := "<invalid>"
|
s := "<invalid>"
|
||||||
if expr != nil {
|
if err != nil {
|
||||||
|
if !strings.HasPrefix(tc.Go, "error: ") {
|
||||||
|
t.Errorf("expected value %#v for C constant %#v but got error %#v", tc.Go, tc.C, err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s = "error: " + err.Error()
|
||||||
|
} else if expr != nil {
|
||||||
// Serialize the Go constant to a string, for more readable test
|
// Serialize the Go constant to a string, for more readable test
|
||||||
// cases.
|
// cases.
|
||||||
buf := &bytes.Buffer{}
|
buf := &bytes.Buffer{}
|
||||||
|
|
|
@ -245,9 +245,12 @@ func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClient
|
||||||
p.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source))
|
p.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
value := strings.TrimSpace(source[len(name):])
|
value := source[len(name):]
|
||||||
// Try to convert this #define into a Go constant expression.
|
// Try to convert this #define into a Go constant expression.
|
||||||
expr := parseConst(pos, value)
|
expr, err := parseConst(pos+token.Pos(len(name)), p.fset, value)
|
||||||
|
if err != nil {
|
||||||
|
p.errors = append(p.errors, err)
|
||||||
|
}
|
||||||
if expr != nil {
|
if expr != nil {
|
||||||
// Parsing was successful.
|
// Parsing was successful.
|
||||||
p.constants[name] = constantInfo{expr, pos}
|
p.constants[name] = constantInfo{expr, pos}
|
||||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче