From fb6571e405afb8de121230f08b0e6a092a05f88d Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Wed, 3 Nov 2021 23:06:59 +0100 Subject: [PATCH] builder: add support for -size= flag for WebAssembly --- Makefile | 16 +-- builder/sizes.go | 300 ++++++++++++++++++++++++++++++++++------------- go.mod | 1 + go.sum | 2 + 4 files changed, 229 insertions(+), 90 deletions(-) diff --git a/Makefile b/Makefile index 6c569be8..19ca38ff 100644 --- a/Makefile +++ b/Makefile @@ -258,17 +258,17 @@ smoketest: $(TINYGO) build -size short -o test.hex -target=pca10040 examples/test @$(MD5SUM) test.hex # test simulated boards on play.tinygo.org - $(TINYGO) build -o test.wasm -tags=arduino examples/blinky1 + $(TINYGO) build -size short -o test.wasm -tags=arduino examples/blinky1 @$(MD5SUM) test.wasm - $(TINYGO) build -o test.wasm -tags=hifive1b examples/blinky1 + $(TINYGO) build -size short -o test.wasm -tags=hifive1b examples/blinky1 @$(MD5SUM) test.wasm - $(TINYGO) build -o test.wasm -tags=reelboard examples/blinky1 + $(TINYGO) build -size short -o test.wasm -tags=reelboard examples/blinky1 @$(MD5SUM) test.wasm - $(TINYGO) build -o test.wasm -tags=pca10040 examples/blinky2 + $(TINYGO) build -size short -o test.wasm -tags=pca10040 examples/blinky2 @$(MD5SUM) test.wasm - $(TINYGO) build -o test.wasm -tags=pca10056 examples/blinky2 + $(TINYGO) build -size short -o test.wasm -tags=pca10056 examples/blinky2 @$(MD5SUM) test.wasm - $(TINYGO) build -o test.wasm -tags=circuitplay_express examples/blinky1 + $(TINYGO) build -size short -o test.wasm -tags=circuitplay_express examples/blinky1 @$(MD5SUM) test.wasm # test all targets/boards $(TINYGO) build -size short -o test.hex -target=pca10040-s132v6 examples/blinky1 @@ -452,8 +452,8 @@ endif @$(MD5SUM) test.hex $(TINYGO) build -size short -o test.hex -target=maixbit examples/blinky1 @$(MD5SUM) test.hex - $(TINYGO) build -o wasm.wasm -target=wasm examples/wasm/export - $(TINYGO) build -o wasm.wasm -target=wasm examples/wasm/main + $(TINYGO) build -size short -o wasm.wasm -target=wasm examples/wasm/export + $(TINYGO) build -size short -o wasm.wasm -target=wasm examples/wasm/main # test various compiler flags $(TINYGO) build -size short -o test.hex -target=pca10040 -gc=none -scheduler=none examples/blinky1 @$(MD5SUM) test.hex diff --git a/builder/sizes.go b/builder/sizes.go index b9aef5b1..97879556 100644 --- a/builder/sizes.go +++ b/builder/sizes.go @@ -1,9 +1,11 @@ package builder import ( + "bytes" "debug/dwarf" "debug/elf" "encoding/binary" + "fmt" "io" "os" "path/filepath" @@ -11,8 +13,8 @@ import ( "sort" "strings" + "github.com/aykevl/go-wasm" "github.com/tinygo-org/tinygo/goenv" - "tinygo.org/x/go-llvm" ) // programSize contains size statistics per package of a compiled program. @@ -72,6 +74,25 @@ type addressLine struct { IsVariable bool // true if this is a variable (or constant), false if it is code } +// Sections defined in the input file. This struct defines them in a +// filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss, +// etc). +type memorySection struct { + Type memoryType + Address uint64 + Size uint64 +} + +type memoryType int + +const ( + memoryCode memoryType = iota + 1 + memoryData + memoryROData + memoryBSS + memoryStack +) + // Regular expressions to match particular symbol names. These are not stored as // DWARF variables because they have no mapping to source code global variables. var ( @@ -90,7 +111,7 @@ var ( // readProgramSizeFromDWARF reads the source location for each line of code and // each variable in the program, as far as this is stored in the DWARF debug // information. -func readProgramSizeFromDWARF(data *dwarf.Data) ([]addressLine, error) { +func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLine, error) { r := data.Reader() var lines []*dwarf.LineFile var addresses []addressLine @@ -156,7 +177,7 @@ func readProgramSizeFromDWARF(data *dwarf.Data) ([]addressLine, error) { // The chunk describes the code from prevLineEntry to // lineEntry. line := addressLine{ - Address: prevLineEntry.Address, + Address: prevLineEntry.Address + codeOffset, Length: lineEntry.Address - prevLineEntry.Address, File: prevLineEntry.File.Name, } @@ -225,61 +246,189 @@ func readProgramSizeFromDWARF(data *dwarf.Data) ([]addressLine, error) { // size will still have valid summaries but won't have complete size information // per package. func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) { - // Open the ELF file. - file, err := elf.Open(path) + // Open the binary file. + f, err := os.Open(path) if err != nil { return nil, err } - defer file.Close() + defer f.Close() // This stores all chunks of addresses found in the binary. var addresses []addressLine - // Read DWARF information. - // Intentionally ignoring the error here: if DWARF couldn't be loaded, just - // don't load symbol information from DWARF metadata. - data, _ := file.DWARF() - if file.Machine == elf.EM_AVR && strings.Split(llvm.Version, ".")[0] <= "10" { - // Hack to work around broken DWARF support for AVR in LLVM 10. - // This should be removed once support for LLVM 10 is dropped. - data = nil - } - if data != nil { - addresses, err = readProgramSizeFromDWARF(data) + // Load the binary file, which could be in a number of file formats. + var sections []memorySection + if file, err := elf.NewFile(f); err == nil { + // Read DWARF information. The error is intentionally ignored. + data, _ := file.DWARF() + if data != nil { + addresses, err = readProgramSizeFromDWARF(data, 0) + if err != nil { + // However, _do_ report an error here. Something must have gone + // wrong while trying to parse DWARF data. + return nil, err + } + } + + // Read the ELF symbols for some more chunks of location information. + // Some globals (such as strings) aren't stored in the DWARF debug + // information and therefore need to be obtained in a different way. + allSymbols, err := file.Symbols() if err != nil { - // However, _do_ report an error here. Something must have gone - // wrong while trying to parse DWARF data. return nil, err } - } + for _, symbol := range allSymbols { + symType := elf.ST_TYPE(symbol.Info) + if symbol.Size == 0 { + continue + } + if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE { + continue + } + section := file.Sections[symbol.Section] + if section.Flags&elf.SHF_ALLOC == 0 { + continue + } + if packageSymbolRegexp.MatchString(symbol.Name) || reflectDataRegexp.MatchString(symbol.Name) { + addresses = append(addresses, addressLine{ + Address: symbol.Value, + Length: symbol.Size, + File: symbol.Name, + IsVariable: true, + }) + } + } - // Read the ELF symbols for some more chunks of location information. - // Some globals (such as strings) aren't stored in the DWARF debug - // information and therefore need to be obtained in a different way. - allSymbols, err := file.Symbols() - if err != nil { - return nil, err - } - for _, symbol := range allSymbols { - symType := elf.ST_TYPE(symbol.Info) - if symbol.Size == 0 { - continue + // Load allocated sections. + for _, section := range file.Sections { + if section.Flags&elf.SHF_ALLOC == 0 { + continue + } + if section.Type == elf.SHT_NOBITS { + if section.Name == ".stack" { + // TinyGo emits stack sections on microcontroller using the + // ".stack" name. + // This is a bit ugly, but I don't think there is a way to + // mark the stack section in a linker script. + sections = append(sections, memorySection{ + Address: section.Addr, + Size: section.Size, + Type: memoryStack, + }) + } else { + // Regular .bss section. + sections = append(sections, memorySection{ + Address: section.Addr, + Size: section.Size, + Type: memoryBSS, + }) + } + } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 { + // .text + sections = append(sections, memorySection{ + Address: section.Addr, + Size: section.Size, + Type: memoryCode, + }) + } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 { + // .data + sections = append(sections, memorySection{ + Address: section.Addr, + Size: section.Size, + Type: memoryData, + }) + } else if section.Type == elf.SHT_PROGBITS { + // .rodata + sections = append(sections, memorySection{ + Address: section.Addr, + Size: section.Size, + Type: memoryROData, + }) + } } - if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE { - continue + } else if file, err := wasm.Parse(f); err == nil { + // File is in WebAssembly format. + + // Put code at a very high address, so that it won't conflict with the + // data in the memory section. + const codeOffset = 0x8000_0000_0000_0000 + + // Read DWARF information. The error is intentionally ignored. + data, err := file.DWARF() + if data != nil { + addresses, err = readProgramSizeFromDWARF(data, codeOffset) + if err != nil { + // However, _do_ report an error here. Something must have gone + // wrong while trying to parse DWARF data. + return nil, err + } } - section := file.Sections[symbol.Section] - if section.Flags&elf.SHF_ALLOC == 0 { - continue - } - if packageSymbolRegexp.MatchString(symbol.Name) || reflectDataRegexp.MatchString(symbol.Name) { - addresses = append(addresses, addressLine{ - Address: symbol.Value, - Length: symbol.Size, - File: symbol.Name, - IsVariable: true, - }) + + var linearMemorySize uint64 + for _, section := range file.Sections { + switch section := section.(type) { + case *wasm.SectionCode: + sections = append(sections, memorySection{ + Address: codeOffset, + Size: uint64(section.Size()), + Type: memoryCode, + }) + case *wasm.SectionMemory: + // This value is used when processing *wasm.SectionData (which + // always comes after *wasm.SectionMemory). + linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024 + case *wasm.SectionData: + // Data sections contain initial values for linear memory. + // First load the list of data sections, and sort them by + // address for easier processing. + var dataSections []memorySection + for _, entry := range section.Entries { + address, err := wasm.Eval(bytes.NewBuffer(entry.Offset)) + if err != nil { + return nil, fmt.Errorf("could not parse data section address: %w", err) + } + dataSections = append(dataSections, memorySection{ + Address: uint64(address[0].(int32)), + Size: uint64(len(entry.Data)), + Type: memoryData, + }) + } + sort.Slice(dataSections, func(i, j int) bool { + return dataSections[i].Address < dataSections[j].Address + }) + + // And now add all data sections for linear memory. + // Parts that are in the slice of data sections are added as + // memoryData, and parts that are not are added as memoryBSS. + addr := uint64(0) + for _, section := range dataSections { + if addr < section.Address { + sections = append(sections, memorySection{ + Address: addr, + Size: section.Address - addr, + Type: memoryBSS, + }) + } + if addr > section.Address { + // This might be allowed, I'm not sure. + // It certainly doesn't make a lot of sense. + return nil, fmt.Errorf("overlapping data section") + } + // addr == section.Address + sections = append(sections, section) + addr = section.Address + section.Size + } + if addr < linearMemorySize { + sections = append(sections, memorySection{ + Address: addr, + Size: linearMemorySize - addr, + Type: memoryBSS, + }) + } + } } + } else { + return nil, fmt.Errorf("could not parse file: %w", err) } // Sort the slice of address chunks by address, so that we can iterate @@ -296,31 +445,9 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz // Now finally determine the binary/RAM size usage per package by going // through each allocated section. sizes := make(map[string]packageSize) - for _, section := range file.Sections { - if section.Flags&elf.SHF_ALLOC == 0 { - continue - } - if section.Type != elf.SHT_PROGBITS && section.Type != elf.SHT_NOBITS { - continue - } - if section.Name == ".stack" { - // This is a bit ugly, but I don't think there is a way to mark the - // stack section in a linker script. - // We store the C stack as a pseudo-section. - sizes["C stack"] = packageSize{ - BSS: section.Size, - } - continue - } - if section.Type == elf.SHT_NOBITS { - // .bss - readSection(section, addresses, func(path string, size uint64, isVariable bool) { - field := sizes[path] - field.BSS += size - sizes[path] = field - }, packagePathMap) - } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 { - // .text + for _, section := range sections { + switch section.Type { + case memoryCode: readSection(section, addresses, func(path string, size uint64, isVariable bool) { field := sizes[path] if isVariable { @@ -330,20 +457,29 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz } sizes[path] = field }, packagePathMap) - } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 { - // .data - readSection(section, addresses, func(path string, size uint64, isVariable bool) { - field := sizes[path] - field.Data += size - sizes[path] = field - }, packagePathMap) - } else if section.Type == elf.SHT_PROGBITS { - // .rodata + case memoryROData: readSection(section, addresses, func(path string, size uint64, isVariable bool) { field := sizes[path] field.ROData += size sizes[path] = field }, packagePathMap) + case memoryData: + readSection(section, addresses, func(path string, size uint64, isVariable bool) { + field := sizes[path] + field.Data += size + sizes[path] = field + }, packagePathMap) + case memoryBSS: + readSection(section, addresses, func(path string, size uint64, isVariable bool) { + field := sizes[path] + field.BSS += size + sizes[path] = field + }, packagePathMap) + case memoryStack: + // We store the C stack as a pseudo-package. + sizes["C stack"] = packageSize{ + BSS: section.Size, + } } } @@ -362,13 +498,13 @@ func loadProgramSize(path string, packagePathMap map[string]string) (*programSiz // readSection determines for each byte in this section to which package it // belongs. It reports this usage through the addSize callback. -func readSection(section *elf.Section, addresses []addressLine, addSize func(string, uint64, bool), packagePathMap map[string]string) { +func readSection(section memorySection, addresses []addressLine, addSize func(string, uint64, bool), packagePathMap map[string]string) { // The addr variable tracks at which address we are while going through this // section. We start at the beginning. - addr := section.Addr - sectionEnd := section.Addr + section.Size + addr := section.Address + sectionEnd := section.Address + section.Size for _, line := range addresses { - if line.Address < section.Addr || line.Address+line.Length >= sectionEnd { + if line.Address < section.Address || line.Address+line.Length >= sectionEnd { // Check that this line is entirely within the section. // Don't bother dealing with line entries that cross sections (that // seems rather unlikely anyway). diff --git a/go.mod b/go.mod index 3dca0313..26a4b027 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/tinygo-org/tinygo go 1.15 require ( + github.com/aykevl/go-wasm v0.0.2-0.20211030161413-11881cb9032d // indirect github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2 github.com/chromedp/cdproto v0.0.0-20210113043257-dabd2f2e7693 github.com/chromedp/chromedp v0.6.4 diff --git a/go.sum b/go.sum index 59de9451..1daa9388 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/aykevl/go-wasm v0.0.2-0.20211030161413-11881cb9032d h1:JeuI5/546naK5hpOIX+Lq5xE8rvt7uwiTp6iL+pLQgk= +github.com/aykevl/go-wasm v0.0.2-0.20211030161413-11881cb9032d/go.mod h1:7sXyiaA0WtSogCu67R2252fQpVmJMh9JWJ9ddtGkpWw= github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2 h1:oMCHnXa6CCCafdPDbMh/lWRhRByN0VFLvv+g+ayx1SI= github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2/go.mod h1:PkYb9DJNAwrSvRx5DYA+gUcOIgTGVMNkfSCbZM8cWpI= github.com/chromedp/cdproto v0.0.0-20210113043257-dabd2f2e7693 h1:11eq/RkpaotwdF6b1TRMcdgQUPNmyFEJOB7zLvh0O/Y=