diff --git a/builder/build.go b/builder/build.go index b1ccdafb..db31b886 100644 --- a/builder/build.go +++ b/builder/build.go @@ -5,6 +5,7 @@ package builder import ( "debug/elf" + "encoding/binary" "errors" "fmt" "io/ioutil" @@ -115,6 +116,13 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(stri } } + // Make sure stack sizes are loaded from a separate section so they can be + // modified after linking. + var stackSizeLoads []string + if config.AutomaticStackSize() { + stackSizeLoads = transform.CreateStackSizeLoads(mod, config) + } + // Generate output. outext := filepath.Ext(outpath) switch outext { @@ -207,6 +215,26 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(stri return &commandError{"failed to link", executable, err} } + var calculatedStacks []string + var stackSizes map[string]functionStackSize + if config.Options.PrintStacks || config.AutomaticStackSize() { + // Try to determine stack sizes at compile time. + // Don't do this by default as it usually doesn't work on + // unsupported architectures. + calculatedStacks, stackSizes, err = determineStackSizes(mod, executable) + if err != nil { + return err + } + } + if config.AutomaticStackSize() { + // Modify the .tinygo_stacksizes section that contains a stack size + // for each goroutine. + err = modifyStackSizes(executable, stackSizeLoads, stackSizes) + if err != nil { + return fmt.Errorf("could not modify stack sizes: %w", err) + } + } + if config.Options.PrintSizes == "short" || config.Options.PrintSizes == "full" { sizes, err := loadProgramSize(executable) if err != nil { @@ -228,7 +256,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(stri // Print goroutine stack sizes, as far as possible. if config.Options.PrintStacks { - printStacks(mod, executable) + printStacks(calculatedStacks, stackSizes) } // Get an Intel .hex file or .bin file from the .elf file. @@ -250,19 +278,19 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(stri } } -// printStacks prints the maximum stack depth for functions that are started as -// goroutines. Stack sizes cannot always be determined statically, in particular -// recursive functions and functions that call interface methods or function -// pointers may have an unknown stack depth (depending on what the optimizer -// manages to optimize away). -// -// It might print something like the following: -// -// function stack usage (in bytes) -// Reset_Handler 316 -// examples/blinky2.led1 92 -// runtime.run$1 300 -func printStacks(mod llvm.Module, executable string) { +// functionStackSizes keeps stack size information about a single function +// (usually a goroutine). +type functionStackSize struct { + humanName string + stackSize uint64 + stackSizeType stacksize.SizeType + missingStackSize *stacksize.CallNode +} + +// determineStackSizes tries to determine the stack sizes of all started +// goroutines and of the reset vector. The LLVM module is necessary to find +// functions that call a function pointer. +func determineStackSizes(mod llvm.Module, executable string) ([]string, map[string]functionStackSize, error) { var callsIndirectFunction []string gowrappers := []string{} gowrapperNames := make(map[string]string) @@ -292,48 +320,176 @@ func printStacks(mod llvm.Module, executable string) { // Load the ELF binary. f, err := elf.Open(executable) if err != nil { - fmt.Fprintln(os.Stderr, "could not load executable for stack size analysis:", err) - return + return nil, nil, fmt.Errorf("could not load executable for stack size analysis: %w", err) } defer f.Close() // Determine the frame size of each function (if available) and the callgraph. functions, err := stacksize.CallGraph(f, callsIndirectFunction) if err != nil { - fmt.Fprintln(os.Stderr, "could not parse executable for stack size analysis:", err) - return + return nil, nil, fmt.Errorf("could not parse executable for stack size analysis: %w", err) } + // Goroutines need to be started and finished and take up some stack space + // that way. This can be measured by measuing the stack size of + // tinygo_startTask. + if numFuncs := len(functions["tinygo_startTask"]); numFuncs != 1 { + return nil, nil, fmt.Errorf("expected exactly one definition of tinygo_startTask, got %d", numFuncs) + } + baseStackSize, baseStackSizeType, baseStackSizeFailedAt := functions["tinygo_startTask"][0].StackSize() + + sizes := make(map[string]functionStackSize) + + // Add the reset handler function, for convenience. The reset handler runs + // startup code and the scheduler. The listed stack size is not the full + // stack size: interrupts are not counted. + var resetFunction string switch f.Machine { case elf.EM_ARM: - // Add the reset handler, which runs startup code and is the - // interrupt/scheduler stack with -scheduler=tasks. - // Note that because interrupts happen on this stack, the stack needed - // by just the Reset_Handler is not enough. Stacks needed by interrupt - // handlers should also be taken into account. - gowrappers = append([]string{"Reset_Handler"}, gowrappers...) - gowrapperNames["Reset_Handler"] = "Reset_Handler" + // Note: all interrupts happen on this stack so the real size is bigger. + resetFunction = "Reset_Handler" + } + if resetFunction != "" { + funcs := functions[resetFunction] + if len(funcs) != 1 { + return nil, nil, fmt.Errorf("expected exactly one definition of %s in the callgraph, found %d", resetFunction, len(funcs)) + } + stackSize, stackSizeType, missingStackSize := funcs[0].StackSize() + sizes[resetFunction] = functionStackSize{ + stackSize: stackSize, + stackSizeType: stackSizeType, + missingStackSize: missingStackSize, + humanName: resetFunction, + } } + // Add all goroutine wrapper functions. + for _, name := range gowrappers { + funcs := functions[name] + if len(funcs) != 1 { + return nil, nil, fmt.Errorf("expected exactly one definition of %s in the callgraph, found %d", name, len(funcs)) + } + humanName := gowrapperNames[name] + if humanName == "" { + humanName = name // fallback + } + stackSize, stackSizeType, missingStackSize := funcs[0].StackSize() + if baseStackSizeType != stacksize.Bounded { + // It was not possible to determine the stack size at compile time + // because tinygo_startTask does not have a fixed stack size. This + // can happen when using -opt=1. + stackSizeType = baseStackSizeType + missingStackSize = baseStackSizeFailedAt + } else if stackSize < baseStackSize { + // This goroutine has a very small stack, but still needs to fit all + // registers to start and suspend the goroutine. Otherwise a stack + // overflow will occur even before the goroutine is started. + stackSize = baseStackSize + } + sizes[name] = functionStackSize{ + stackSize: stackSize, + stackSizeType: stackSizeType, + missingStackSize: missingStackSize, + humanName: humanName, + } + } + + if resetFunction != "" { + return append([]string{resetFunction}, gowrappers...), sizes, nil + } + return gowrappers, sizes, nil +} + +// modifyStackSizes modifies the .tinygo_stacksizes section with the updated +// stack size information. Before this modification, all stack sizes in the +// section assume the default stack size (which is relatively big). +func modifyStackSizes(executable string, stackSizeLoads []string, stackSizes map[string]functionStackSize) error { + fp, err := os.OpenFile(executable, os.O_RDWR, 0) + if err != nil { + return err + } + defer fp.Close() + + elfFile, err := elf.NewFile(fp) + if err != nil { + return err + } + + section := elfFile.Section(".tinygo_stacksizes") + if section == nil { + return errors.New("could not find .tinygo_stacksizes section") + } + + if section.Size != section.FileSize { + // Sanity check. + return fmt.Errorf("expected .tinygo_stacksizes to have identical size and file size, got %d and %d", section.Size, section.FileSize) + } + + // Read all goroutine stack sizes. + data := make([]byte, section.Size) + _, err = fp.ReadAt(data, int64(section.Offset)) + if err != nil { + return err + } + + if len(stackSizeLoads)*4 != len(data) { + // Note: while AVR should use 2 byte stack sizes, even 64-bit platforms + // should probably stick to 4 byte stack sizes as a larger than 4GB + // stack doesn't make much sense. + return errors.New("expected 4 byte stack sizes") + } + + // Modify goroutine stack sizes with a compile-time known worst case stack + // size. + for i, name := range stackSizeLoads { + fn, ok := stackSizes[name] + if !ok { + return fmt.Errorf("could not find symbol %s in ELF file", name) + } + if fn.stackSizeType == stacksize.Bounded { + // Note: adding 4 for the stack canary. Even though the size may be + // automatically determined, stack overflow checking is still + // important as the stack size cannot be determined for all + // goroutines. + binary.LittleEndian.PutUint32(data[i*4:], uint32(fn.stackSize)+4) + } + } + + // Write back the modified stack sizes. + _, err = fp.WriteAt(data, int64(section.Offset)) + if err != nil { + return err + } + + return nil +} + +// printStacks prints the maximum stack depth for functions that are started as +// goroutines. Stack sizes cannot always be determined statically, in particular +// recursive functions and functions that call interface methods or function +// pointers may have an unknown stack depth (depending on what the optimizer +// manages to optimize away). +// +// It might print something like the following: +// +// function stack usage (in bytes) +// Reset_Handler 316 +// examples/blinky2.led1 92 +// runtime.run$1 300 +func printStacks(calculatedStacks []string, stackSizes map[string]functionStackSize) { // Print the sizes of all stacks. fmt.Printf("%-32s %s\n", "function", "stack usage (in bytes)") - for _, name := range gowrappers { - for _, fn := range functions[name] { - stackSize, stackSizeType, missingStackSize := fn.StackSize() - funcName := gowrapperNames[name] - if funcName == "" { - funcName = "" - } - switch stackSizeType { - case stacksize.Bounded: - fmt.Printf("%-32s %d\n", funcName, stackSize) - case stacksize.Unknown: - fmt.Printf("%-32s unknown, %s does not have stack frame information\n", funcName, missingStackSize) - case stacksize.Recursive: - fmt.Printf("%-32s recursive, %s may call itself\n", funcName, missingStackSize) - case stacksize.IndirectCall: - fmt.Printf("%-32s unknown, %s calls a function pointer\n", funcName, missingStackSize) - } + for _, name := range calculatedStacks { + fn := stackSizes[name] + switch fn.stackSizeType { + case stacksize.Bounded: + fmt.Printf("%-32s %d\n", fn.humanName, fn.stackSize) + case stacksize.Unknown: + fmt.Printf("%-32s unknown, %s does not have stack frame information\n", fn.humanName, fn.missingStackSize) + case stacksize.Recursive: + fmt.Printf("%-32s recursive, %s may call itself\n", fn.humanName, fn.missingStackSize) + case stacksize.IndirectCall: + fmt.Printf("%-32s unknown, %s calls a function pointer\n", fn.humanName, fn.missingStackSize) } } } diff --git a/compileopts/config.go b/compileopts/config.go index fdb26f01..fa82b72e 100644 --- a/compileopts/config.go +++ b/compileopts/config.go @@ -164,6 +164,16 @@ func (c *Config) PanicStrategy() string { return c.Options.PanicStrategy } +// AutomaticStackSize returns whether goroutine stack sizes should be determined +// automatically at compile time, if possible. If it is false, no attempt is +// made. +func (c *Config) AutomaticStackSize() bool { + if c.Target.AutoStackSize != nil && c.Scheduler() == "tasks" { + return *c.Target.AutoStackSize + } + return false +} + // CFlags returns the flags to pass to the C compiler. This is necessary for CGo // preprocessing. func (c *Config) CFlags() []string { diff --git a/compileopts/target.go b/compileopts/target.go index 256e7075..07625127 100644 --- a/compileopts/target.go +++ b/compileopts/target.go @@ -33,6 +33,8 @@ type TargetSpec struct { Linker string `json:"linker"` RTLib string `json:"rtlib"` // compiler runtime library (libgcc, compiler-rt) Libc string `json:"libc"` + AutoStackSize *bool `json:"automatic-stack-size"` // Determine stack size automatically at compile time. + DefaultStackSize uint64 `json:"default-stack-size"` // Default stack size if the size couldn't be determined at compile time. CFlags []string `json:"cflags"` LDFlags []string `json:"ldflags"` LinkerScript string `json:"linkerscript"` @@ -90,6 +92,12 @@ func (spec *TargetSpec) copyProperties(spec2 *TargetSpec) { if spec2.Libc != "" { spec.Libc = spec2.Libc } + if spec2.AutoStackSize != nil { + spec.AutoStackSize = spec2.AutoStackSize + } + if spec2.DefaultStackSize != 0 { + spec.DefaultStackSize = spec2.DefaultStackSize + } spec.CFlags = append(spec.CFlags, spec2.CFlags...) spec.LDFlags = append(spec.LDFlags, spec2.LDFlags...) if spec2.LinkerScript != "" { diff --git a/compiler/goroutine.go b/compiler/goroutine.go index dbb4bd7e..23978ccb 100644 --- a/compiler/goroutine.go +++ b/compiler/goroutine.go @@ -19,16 +19,30 @@ import ( // Because a go statement doesn't return anything, return undef. func (b *builder) createGoInstruction(funcPtr llvm.Value, params []llvm.Value, prefix string, pos token.Pos) llvm.Value { paramBundle := b.emitPointerPack(params) - var callee llvm.Value + var callee, stackSize llvm.Value switch b.Scheduler() { case "none", "tasks": callee = b.createGoroutineStartWrapper(funcPtr, prefix, pos) + if b.AutomaticStackSize() { + // The stack size is not known until after linking. Call a dummy + // function that will be replaced with a load from a special ELF + // section that contains the stack size (and is modified after + // linking). + stackSize = b.createCall(b.mod.NamedFunction("internal/task.getGoroutineStackSize"), []llvm.Value{callee, llvm.Undef(b.i8ptrType), llvm.Undef(b.i8ptrType)}, "stacksize") + } else { + // The stack size is fixed at compile time. By emitting it here as a + // constant, it can be optimized. + stackSize = llvm.ConstInt(b.uintptrType, b.Target.DefaultStackSize, false) + } case "coroutines": callee = b.CreatePtrToInt(funcPtr, b.uintptrType, "") + // There is no goroutine stack size: coroutines are used instead of + // stacks. + stackSize = llvm.Undef(b.uintptrType) default: panic("unreachable") } - b.createCall(b.mod.NamedFunction("internal/task.start"), []llvm.Value{callee, paramBundle, llvm.Undef(b.i8ptrType), llvm.ConstPointerNull(b.i8ptrType)}, "") + b.createCall(b.mod.NamedFunction("internal/task.start"), []llvm.Value{callee, paramBundle, stackSize, llvm.Undef(b.i8ptrType), llvm.ConstPointerNull(b.i8ptrType)}, "") return llvm.Undef(funcPtr.Type().ElementType().ReturnType()) } diff --git a/main_test.go b/main_test.go index 8597de1f..f66117b9 100644 --- a/main_test.go +++ b/main_test.go @@ -157,7 +157,7 @@ func runTest(path, target string, t *testing.T) { PrintIR: false, DumpSSA: false, VerifyIR: true, - Debug: false, + Debug: true, PrintSizes: "", WasmAbi: "js", } diff --git a/src/device/arm/cortexm.s b/src/device/arm/cortexm.s index c9123021..e9b15aaf 100644 --- a/src/device/arm/cortexm.s +++ b/src/device/arm/cortexm.s @@ -1,9 +1,11 @@ .syntax unified +.cfi_sections .debug_frame .section .text.HardFault_Handler .global HardFault_Handler .type HardFault_Handler, %function HardFault_Handler: + .cfi_startproc // Put the old stack pointer in the first argument, for easy debugging. This // is especially useful on Cortex-M0, which supports far fewer debug // facilities. @@ -19,6 +21,7 @@ HardFault_Handler: // Continue handling this error in Go. bl handleHardFault + .cfi_endproc .size HardFault_Handler, .-HardFault_Handler // This is a convenience function for semihosting support. @@ -27,5 +30,8 @@ HardFault_Handler: .global SemihostingCall .type SemihostingCall, %function SemihostingCall: + .cfi_startproc bkpt 0xab bx lr + .cfi_endproc +.size SemihostingCall, .-SemihostingCall diff --git a/src/internal/task/task.go b/src/internal/task/task.go index 57b29eb3..489400df 100644 --- a/src/internal/task/task.go +++ b/src/internal/task/task.go @@ -18,3 +18,8 @@ type Task struct { // state is the underlying running state of the task. state state } + +// getGoroutineStackSize is a compiler intrinsic that returns the stack size for +// the given function and falls back to the default stack size. It is replaced +// with a load from a special section just before codegen. +func getGoroutineStackSize(fn uintptr) uintptr diff --git a/src/internal/task/task_coroutine.go b/src/internal/task/task_coroutine.go index a9a00c61..bfeedc30 100644 --- a/src/internal/task/task_coroutine.go +++ b/src/internal/task/task_coroutine.go @@ -67,7 +67,7 @@ func createTask() *Task { // start invokes a function in a new goroutine. Calls to this are inserted by the compiler. // The created goroutine starts running immediately. // This is implemented inside the compiler. -func start(fn uintptr, args unsafe.Pointer) +func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) // Current returns the current active task. // This is implemented inside the compiler. diff --git a/src/internal/task/task_none.go b/src/internal/task/task_none.go index d30578ac..79d02c5c 100644 --- a/src/internal/task/task_none.go +++ b/src/internal/task/task_none.go @@ -17,7 +17,7 @@ func Current() *Task { } //go:noinline -func start(fn uintptr, args unsafe.Pointer) { +func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) { // The compiler will error if this is reachable. runtimePanic("scheduler is disabled") } diff --git a/src/internal/task/task_stack.go b/src/internal/task/task_stack.go index 2d8762fb..214f2a62 100644 --- a/src/internal/task/task_stack.go +++ b/src/internal/task/task_stack.go @@ -54,7 +54,7 @@ func (t *Task) Resume() { } // initialize the state and prepare to call the specified function with the specified argument bundle. -func (s *state) initialize(fn uintptr, args unsafe.Pointer) { +func (s *state) initialize(fn uintptr, args unsafe.Pointer, stackSize uintptr) { // Create a stack. stack := make([]uintptr, stackSize/unsafe.Sizeof(uintptr(0))) @@ -67,9 +67,9 @@ func runqueuePushBack(*Task) // start creates and starts a new goroutine with the given function and arguments. // The new goroutine is scheduled to run later. -func start(fn uintptr, args unsafe.Pointer) { +func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) { t := &Task{} - t.state.initialize(fn, args) + t.state.initialize(fn, args, stackSize) runqueuePushBack(t) } diff --git a/src/internal/task/task_stack_avr.go b/src/internal/task/task_stack_avr.go index cbd5a988..08c0b63e 100644 --- a/src/internal/task/task_stack_avr.go +++ b/src/internal/task/task_stack_avr.go @@ -4,8 +4,6 @@ package task import "unsafe" -const stackSize = 256 - // calleeSavedRegs is the list of registers that must be saved and restored when // switching between tasks. Also see scheduler_avr.S that relies on the // exact layout of this struct. diff --git a/src/internal/task/task_stack_cortexm.go b/src/internal/task/task_stack_cortexm.go index a7927d04..0417b922 100644 --- a/src/internal/task/task_stack_cortexm.go +++ b/src/internal/task/task_stack_cortexm.go @@ -4,8 +4,6 @@ package task import "unsafe" -const stackSize = 1024 - // calleeSavedRegs is the list of registers that must be saved and restored when // switching between tasks. Also see scheduler_cortexm.S that relies on the // exact layout of this struct. diff --git a/src/runtime/scheduler_cortexm.S b/src/runtime/scheduler_cortexm.S index a0485b38..2053deca 100644 --- a/src/runtime/scheduler_cortexm.S +++ b/src/runtime/scheduler_cortexm.S @@ -41,11 +41,7 @@ tinygo_getSystemStackPointer: .cfi_endproc .size tinygo_getSystemStackPointer, .-tinygo_getSystemStackPointer - -// switchToScheduler and switchToTask are also in the same section, to make sure -// relative branches work. -.section .text.tinygo_swapTask - +.section .text.tinygo_switchToScheduler .global tinygo_switchToScheduler .type tinygo_switchToScheduler, %function tinygo_switchToScheduler: @@ -62,6 +58,7 @@ tinygo_switchToScheduler: .cfi_endproc .size tinygo_switchToScheduler, .-tinygo_switchToScheduler +.section .text.tinygo_switchToTask .global tinygo_switchToTask .type tinygo_switchToTask, %function tinygo_switchToTask: @@ -72,11 +69,11 @@ tinygo_switchToTask: // and then we can invoke swapTask. msr PSP, r0 - // Continue executing in the swapTask function, which swaps the stack - // pointer. + b.n tinygo_swapTask .cfi_endproc .size tinygo_switchToTask, .-tinygo_switchToTask +.section .text.tinygo_swapTask .global tinygo_swapTask .type tinygo_swapTask, %function tinygo_swapTask: diff --git a/stacksize/stacksize.go b/stacksize/stacksize.go index 4161dc2f..2b111ddc 100644 --- a/stacksize/stacksize.go +++ b/stacksize/stacksize.go @@ -14,19 +14,38 @@ import ( // set to true to print information useful for debugging const debugPrint = false -type sizeType uint8 +// SizeType indicates whether a stack or frame size could be determined and if +// not, why. +type SizeType uint8 // Results after trying to determine the stack size of a function in the call // graph. The goal is to find a maximum (bounded) stack size, but sometimes this // is not possible for some reasons such as recursion or indirect calls. const ( - Undefined sizeType = iota // not yet calculated + Undefined SizeType = iota // not yet calculated Unknown // child has unknown stack size Bounded // stack size is fixed at compile time (no recursion etc) Recursive IndirectCall ) +func (s SizeType) String() string { + switch s { + case Undefined: + return "undefined" + case Unknown: + return "unknown" + case Bounded: + return "bounded" + case Recursive: + return "recursive" + case IndirectCall: + return "indirect call" + default: + return "" + } +} + // CallNode is a node in the call graph (that is, a function). Because this is // determined after linking, there may be multiple names for a single function // (due to aliases). It is also possible multiple functions have the same name @@ -37,9 +56,9 @@ type CallNode struct { Size uint64 // symbol size, in bytes Children []*CallNode // functions this function calls FrameSize uint64 // frame size, if FrameSizeType is Bounded - FrameSizeType sizeType // can be Undefined or Bounded + FrameSizeType SizeType // can be Undefined or Bounded stackSize uint64 - stackSizeType sizeType + stackSizeType SizeType missingFrameInfo *CallNode // the child function that is the cause for not being able to determine the stack size } @@ -236,7 +255,7 @@ func findSymbol(symbolList []*CallNode, address uint64) *CallNode { // returns the maximum stack size, whether this size can be known at compile // time and the call node responsible for failing to determine the maximum stack // usage. The stack size is only valid if sizeType is Bounded. -func (node *CallNode) StackSize() (uint64, sizeType, *CallNode) { +func (node *CallNode) StackSize() (uint64, SizeType, *CallNode) { if node.stackSizeType == Undefined { node.determineStackSize(make(map[*CallNode]struct{})) } diff --git a/targets/arm.ld b/targets/arm.ld index 347ed16c..e4155b90 100644 --- a/targets/arm.ld +++ b/targets/arm.ld @@ -16,6 +16,11 @@ SECTIONS . = ALIGN(4); } >FLASH_TEXT + .tinygo_stacksizes : + { + *(.tinygo_stacksizes) + } > FLASH_TEXT + /* Put the stack at the bottom of RAM, so that the application will * crash on stack overflow instead of silently corrupting memory. * See: http://blog.japaric.io/stack-overflow-protection/ */ diff --git a/targets/avr.json b/targets/avr.json index ae7c6860..4aafbea9 100644 --- a/targets/avr.json +++ b/targets/avr.json @@ -7,6 +7,7 @@ "gc": "conservative", "linker": "avr-gcc", "scheduler": "none", + "default-stack-size": 256, "ldflags": [ "-T", "targets/avr.ld", "-Wl,--gc-sections" diff --git a/targets/cortex-m-qemu.s b/targets/cortex-m-qemu.s index 2e2fa010..fdbecc8f 100644 --- a/targets/cortex-m-qemu.s +++ b/targets/cortex-m-qemu.s @@ -1,6 +1,7 @@ // Generic Cortex-M interrupt vector. // This vector is used by the Cortex-M QEMU target. +.cfi_sections .debug_frame .syntax unified // This is the default handler for interrupts, if triggered but not defined. @@ -8,8 +9,11 @@ .global Default_Handler .type Default_Handler, %function Default_Handler: + .cfi_startproc wfe b Default_Handler + .cfi_endproc +.size Default_Handler, .-Default_Handler // Avoid the need for repeated .weak and .set instructions. .macro IRQ handler diff --git a/targets/cortex-m.json b/targets/cortex-m.json index 2a91f1cd..958d51ee 100644 --- a/targets/cortex-m.json +++ b/targets/cortex-m.json @@ -8,6 +8,8 @@ "linker": "ld.lld", "rtlib": "compiler-rt", "libc": "picolibc", + "automatic-stack-size": true, + "default-stack-size": 1024, "cflags": [ "-Oz", "-mthumb", diff --git a/transform/func-lowering.go b/transform/func-lowering.go index 03d1f440..89c9f692 100644 --- a/transform/func-lowering.go +++ b/transform/func-lowering.go @@ -171,7 +171,7 @@ func LowerFuncValues(mod llvm.Module) { i8ptrType := llvm.PointerType(ctx.Int8Type(), 0) calleeValue := builder.CreatePtrToInt(funcPtr, uintptrType, "") start := mod.NamedFunction("internal/task.start") - builder.CreateCall(start, []llvm.Value{calleeValue, callIntPtr.Operand(1), llvm.Undef(i8ptrType), llvm.ConstNull(i8ptrType)}, "") + builder.CreateCall(start, []llvm.Value{calleeValue, callIntPtr.Operand(1), llvm.Undef(uintptrType), llvm.Undef(i8ptrType), llvm.ConstNull(i8ptrType)}, "") return llvm.Value{} // void so no return value }, functions) callIntPtr.EraseFromParentAsInstruction() diff --git a/transform/stacksize.go b/transform/stacksize.go new file mode 100644 index 00000000..8443166e --- /dev/null +++ b/transform/stacksize.go @@ -0,0 +1,64 @@ +package transform + +import ( + "github.com/tinygo-org/tinygo/compileopts" + "tinygo.org/x/go-llvm" +) + +// CreateStackSizeLoads replaces internal/task.getGoroutineStackSize calls with +// loads from internal/task.stackSizes that will be updated after linking. This +// way the stack sizes are loaded from a separate section and can easily be +// modified after linking. +func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string { + functionMap := map[llvm.Value][]llvm.Value{} + var functions []llvm.Value + var functionNames []string + for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) { + if use.FirstUse().IsNil() { + // Apparently this stack size isn't used. + use.EraseFromParentAsInstruction() + continue + } + ptrtoint := use.Operand(0) + if _, ok := functionMap[ptrtoint]; !ok { + functions = append(functions, ptrtoint) + functionNames = append(functionNames, ptrtoint.Operand(0).Name()) + } + functionMap[ptrtoint] = append(functionMap[ptrtoint], use) + } + + if len(functions) == 0 { + // Nothing to do. + return nil + } + + // Create the new global with stack sizes, that will be put in a new section + // just for itself. + stackSizesGlobalType := llvm.ArrayType(functions[0].Type(), len(functions)) + stackSizesGlobal := llvm.AddGlobal(mod, stackSizesGlobalType, "internal/task.stackSizes") + stackSizesGlobal.SetSection(".tinygo_stacksizes") + defaultStackSizes := make([]llvm.Value, len(functions)) + defaultStackSize := llvm.ConstInt(functions[0].Type(), config.Target.DefaultStackSize, false) + for i := range defaultStackSizes { + defaultStackSizes[i] = defaultStackSize + } + stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes)) + + // Replace the calls with loads from the new global with stack sizes. + irbuilder := mod.Context().NewBuilder() + defer irbuilder.Dispose() + for i, function := range functions { + for _, use := range functionMap[function] { + ptr := llvm.ConstGEP(stackSizesGlobal, []llvm.Value{ + llvm.ConstInt(mod.Context().Int32Type(), 0, false), + llvm.ConstInt(mod.Context().Int32Type(), uint64(i), false), + }) + irbuilder.SetInsertPointBefore(use) + stacksize := irbuilder.CreateLoad(ptr, "stacksize") + use.ReplaceAllUsesWith(stacksize) + use.EraseFromParentAsInstruction() + } + } + + return functionNames +} diff --git a/transform/stacksize_test.go b/transform/stacksize_test.go new file mode 100644 index 00000000..c7c6cdec --- /dev/null +++ b/transform/stacksize_test.go @@ -0,0 +1,20 @@ +package transform + +import ( + "testing" + + "github.com/tinygo-org/tinygo/compileopts" + "tinygo.org/x/go-llvm" +) + +func TestCreateStackSizeLoads(t *testing.T) { + t.Parallel() + testTransform(t, "testdata/stacksize", func(mod llvm.Module) { + // Run optimization pass. + CreateStackSizeLoads(mod, &compileopts.Config{ + Target: &compileopts.TargetSpec{ + DefaultStackSize: 1024, + }, + }) + }) +} diff --git a/transform/testdata/coroutines.ll b/transform/testdata/coroutines.ll index f2ace5a0..b51abb5e 100644 --- a/transform/testdata/coroutines.ll +++ b/transform/testdata/coroutines.ll @@ -4,7 +4,7 @@ target triple = "armv7m-none-eabi" %"internal/task.state" = type { i8* } %"internal/task.Task" = type { %"internal/task.Task", i8*, i32, %"internal/task.state" } -declare void @"internal/task.start"(i32, i8*, i8*, i8*) +declare void @"internal/task.start"(i32, i8*, i32, i8*, i8*) declare void @"internal/task.Pause"(i8*, i8*) declare void @runtime.scheduler(i8*, i8*) @@ -102,9 +102,9 @@ define void @sleepGoroutine(i8*, i8* %parentHandle) { define void @progMain(i8*, i8* %parentHandle) { entry: ; Call a sync func in a goroutine. - call void @"internal/task.start"(i32 ptrtoint (void (i8*, i8*)* @doNothing to i32), i8* undef, i8* undef, i8* null) + call void @"internal/task.start"(i32 ptrtoint (void (i8*, i8*)* @doNothing to i32), i8* undef, i32 undef, i8* undef, i8* null) ; Call an async func in a goroutine. - call void @"internal/task.start"(i32 ptrtoint (void (i8*, i8*)* @sleepGoroutine to i32), i8* undef, i8* undef, i8* null) + call void @"internal/task.start"(i32 ptrtoint (void (i8*, i8*)* @sleepGoroutine to i32), i8* undef, i32 undef, i8* undef, i8* null) ; Sleep a bit. call void @sleep(i64 2000000, i8* undef, i8* null) ; Done. @@ -114,7 +114,7 @@ entry: ; Entrypoint of runtime. define void @main() { entry: - call void @"internal/task.start"(i32 ptrtoint (void (i8*, i8*)* @progMain to i32), i8* undef, i8* undef, i8* null) + call void @"internal/task.start"(i32 ptrtoint (void (i8*, i8*)* @progMain to i32), i8* undef, i32 undef, i8* undef, i8* null) call void @runtime.scheduler(i8* undef, i8* null) ret void } diff --git a/transform/testdata/coroutines.out.ll b/transform/testdata/coroutines.out.ll index c7850a5b..af91beac 100644 --- a/transform/testdata/coroutines.out.ll +++ b/transform/testdata/coroutines.out.ll @@ -4,7 +4,7 @@ target triple = "armv7m-none-eabi" %"internal/task.Task" = type { %"internal/task.Task", i8*, i32, %"internal/task.state" } %"internal/task.state" = type { i8* } -declare void @"internal/task.start"(i32, i8*, i8*, i8*) +declare void @"internal/task.start"(i32, i8*, i32, i8*, i8*) declare void @"internal/task.Pause"(i8*, i8*) diff --git a/transform/testdata/func-lowering.ll b/transform/testdata/func-lowering.ll index 04241dc3..c00264d9 100644 --- a/transform/testdata/func-lowering.ll +++ b/transform/testdata/func-lowering.ll @@ -14,7 +14,7 @@ target triple = "wasm32-unknown-unknown-wasm" declare i32 @runtime.getFuncPtr(i8*, i32, %runtime.typecodeID*, i8*, i8*) -declare void @"internal/task.start"(i32, i8*, i8*, i8*) +declare void @"internal/task.start"(i32, i8*, i32, i8*, i8*) declare void @runtime.nilPanic(i8*, i8*) @@ -67,6 +67,6 @@ fpcall.next: define void @sleepFuncValue(i8*, i32, i8* nocapture readnone %context, i8* nocapture readnone %parentHandle) { entry: %2 = call i32 @runtime.getFuncPtr(i8* %0, i32 %1, %runtime.typecodeID* @"reflect/types.type:func:{basic:int}{}", i8* undef, i8* null) - call void @"internal/task.start"(i32 %2, i8* null, i8* undef, i8* null) + call void @"internal/task.start"(i32 %2, i8* null, i32 undef, i8* undef, i8* null) ret void } diff --git a/transform/testdata/func-lowering.out.ll b/transform/testdata/func-lowering.out.ll index 2f46baad..deba41b0 100644 --- a/transform/testdata/func-lowering.out.ll +++ b/transform/testdata/func-lowering.out.ll @@ -14,7 +14,7 @@ target triple = "wasm32-unknown-unknown-wasm" declare i32 @runtime.getFuncPtr(i8*, i32, %runtime.typecodeID*, i8*, i8*) -declare void @"internal/task.start"(i32, i8*, i8*, i8*) +declare void @"internal/task.start"(i32, i8*, i32, i8*, i8*) declare void @runtime.nilPanic(i8*, i8*) @@ -97,11 +97,11 @@ func.nil: ; preds = %entry unreachable func.call1: ; preds = %entry - call void @"internal/task.start"(i32 ptrtoint (void (i32, i8*, i8*)* @"main$1" to i32), i8* null, i8* undef, i8* null) + call void @"internal/task.start"(i32 ptrtoint (void (i32, i8*, i8*)* @"main$1" to i32), i8* null, i32 undef, i8* undef, i8* null) br label %func.next func.call2: ; preds = %entry - call void @"internal/task.start"(i32 ptrtoint (void (i32, i8*, i8*)* @"main$2" to i32), i8* null, i8* undef, i8* null) + call void @"internal/task.start"(i32 ptrtoint (void (i32, i8*, i8*)* @"main$2" to i32), i8* null, i32 undef, i8* undef, i8* null) br label %func.next func.next: ; preds = %func.call2, %func.call1 diff --git a/transform/testdata/stacksize.ll b/transform/testdata/stacksize.ll new file mode 100644 index 00000000..f80a7121 --- /dev/null +++ b/transform/testdata/stacksize.ll @@ -0,0 +1,15 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7m-none-eabi" + +declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*) + +declare void @"runtime.run$1$gowrapper"(i8*) + +declare void @"internal/task.start"(i32, i8*, i32) + +define void @Reset_Handler() { +entry: + %stacksize = call i32 @"internal/task.getGoroutineStackSize"(i32 ptrtoint (void (i8*)* @"runtime.run$1$gowrapper" to i32), i8* undef, i8* undef) + call void @"internal/task.start"(i32 ptrtoint (void (i8*)* @"runtime.run$1$gowrapper" to i32), i8* undef, i32 %stacksize) + ret void +} diff --git a/transform/testdata/stacksize.out.ll b/transform/testdata/stacksize.out.ll new file mode 100644 index 00000000..08ad3022 --- /dev/null +++ b/transform/testdata/stacksize.out.ll @@ -0,0 +1,17 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7m-none-eabi" + +@"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes" + +declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*) + +declare void @"runtime.run$1$gowrapper"(i8*) + +declare void @"internal/task.start"(i32, i8*, i32) + +define void @Reset_Handler() { +entry: + %stacksize1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @"internal/task.stackSizes", i32 0, i32 0) + call void @"internal/task.start"(i32 ptrtoint (void (i8*)* @"runtime.run$1$gowrapper" to i32), i8* undef, i32 %stacksize1) + ret void +}