From 67de8b490d0fe09f329e46eff4f75812ef7ff748 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Fri, 25 Sep 2020 16:09:37 +0200 Subject: [PATCH] gc: use raw stack access whenever possible The only architecture that actually needs special support for scanning the stack is WebAssembly. All others allow raw access to the stack with a small bit of assembly. Therefore, don't manually keep track of all these objects on the stack manually and instead just use conservative stack scanning. This results in a massive code size decrease in the affected targets (only tested linux/amd64 for code size) - sometimes around 33%. It also allows for future improvements such as using proper stackful goroutines. --- compileopts/config.go | 6 +++--- compileopts/target.go | 3 +++ src/runtime/arch_386.go | 6 +++++- src/runtime/arch_amd64.go | 6 +++++- src/runtime/arch_arm64.go | 7 +++++-- src/runtime/gc_386.S | 22 ++++++++++++++++++++ src/runtime/gc_amd64.S | 29 +++++++++++++++++++++++++++ src/runtime/gc_arm64.S | 23 +++++++++++++++++++++ src/runtime/gc_stack_portable.go | 2 +- src/runtime/gc_stack_raw.go | 2 +- src/runtime/runtime_nintendoswitch.go | 15 +++++++++++++- src/runtime/runtime_unix.go | 14 ++++++++++++- targets/nintendoswitch.json | 1 + 13 files changed, 125 insertions(+), 11 deletions(-) create mode 100644 src/runtime/gc_386.S create mode 100644 src/runtime/gc_amd64.S create mode 100644 src/runtime/gc_arm64.S diff --git a/compileopts/config.go b/compileopts/config.go index a08a9ba1..ae143297 100644 --- a/compileopts/config.go +++ b/compileopts/config.go @@ -118,12 +118,12 @@ func (c *Config) NeedsStackObjects() bool { switch c.GC() { case "conservative", "extalloc": for _, tag := range c.BuildTags() { - if tag == "baremetal" { - return false + if tag == "wasm" { + return true } } - return true + return false default: return false } diff --git a/compileopts/target.go b/compileopts/target.go index f95c4855..5fbf1640 100644 --- a/compileopts/target.go +++ b/compileopts/target.go @@ -246,6 +246,9 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) { } else { spec.LDFlags = append(spec.LDFlags, "-no-pie", "-Wl,--gc-sections") // WARNING: clang < 5.0 requires -nopie } + if goarch != "wasm" { + spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/gc_"+goarch+".S") + } if goarch != runtime.GOARCH { // Some educated guesses as to how to invoke helper programs. spec.GDB = "gdb-multiarch" diff --git a/src/runtime/arch_386.go b/src/runtime/arch_386.go index 46b061c8..2dd7c9ff 100644 --- a/src/runtime/arch_386.go +++ b/src/runtime/arch_386.go @@ -1,5 +1,7 @@ package runtime +import "device" + const GOARCH = "386" // The bitness of the CPU (e.g. 8, 32, 64). @@ -10,4 +12,6 @@ func align(ptr uintptr) uintptr { return (ptr + 3) &^ 3 } -func getCurrentStackPointer() uintptr +func getCurrentStackPointer() uintptr { + return device.AsmFull("movl %esp, {}", nil) +} diff --git a/src/runtime/arch_amd64.go b/src/runtime/arch_amd64.go index c24f2540..640e0330 100644 --- a/src/runtime/arch_amd64.go +++ b/src/runtime/arch_amd64.go @@ -1,5 +1,7 @@ package runtime +import "device" + const GOARCH = "amd64" // The bitness of the CPU (e.g. 8, 32, 64). @@ -12,4 +14,6 @@ func align(ptr uintptr) uintptr { return (ptr + 15) &^ 15 } -func getCurrentStackPointer() uintptr +func getCurrentStackPointer() uintptr { + return device.AsmFull("movq %rsp, {}", nil) +} diff --git a/src/runtime/arch_arm64.go b/src/runtime/arch_arm64.go index 15c5ca49..c5cced3b 100644 --- a/src/runtime/arch_arm64.go +++ b/src/runtime/arch_arm64.go @@ -1,5 +1,7 @@ package runtime +import "device/arm" + const GOARCH = "arm64" // The bitness of the CPU (e.g. 8, 32, 64). @@ -9,5 +11,6 @@ const TargetBits = 64 func align(ptr uintptr) uintptr { return (ptr + 7) &^ 7 } - -func getCurrentStackPointer() uintptr +func getCurrentStackPointer() uintptr { + return arm.AsmFull("mov {}, sp", nil) +} diff --git a/src/runtime/gc_386.S b/src/runtime/gc_386.S new file mode 100644 index 00000000..3ca80151 --- /dev/null +++ b/src/runtime/gc_386.S @@ -0,0 +1,22 @@ +.section .text.tinygo_scanCurrentStack +.global tinygo_scanCurrentStack +.type tinygo_scanCurrentStack, %function +tinygo_scanCurrentStack: + // Sources: + // * https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call + // * https://godbolt.org/z/q7e8dn + + // Save callee-saved registers. + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + + // Scan the stack. + pushl %esp + calll tinygo_scanstack + + // Restore the stack pointer. Registers do not need to be restored as they + // were only pushed to be discoverable by the GC. + addl $20, %esp + retl diff --git a/src/runtime/gc_amd64.S b/src/runtime/gc_amd64.S new file mode 100644 index 00000000..fa89479f --- /dev/null +++ b/src/runtime/gc_amd64.S @@ -0,0 +1,29 @@ +#ifdef __ELF__ +.section .text.tinygo_scanCurrentStack +.global tinygo_scanCurrentStack +tinygo_scanCurrentStack: +#else // Darwin +.global _tinygo_scanCurrentStack +_tinygo_scanCurrentStack: +#endif + // Save callee-saved registers. + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + // Scan the stack. + subq $8, %rsp // adjust the stack before the call to maintain 16-byte alignment + movq %rsp, %rdi +#ifdef __ELF__ + callq tinygo_scanstack +#else + callq _tinygo_scanstack // Darwin +#endif + + // Restore the stack pointer. Registers do not need to be restored as they + // were only pushed to be discoverable by the GC. + addq $56, %rsp + retq diff --git a/src/runtime/gc_arm64.S b/src/runtime/gc_arm64.S new file mode 100644 index 00000000..8a7f5335 --- /dev/null +++ b/src/runtime/gc_arm64.S @@ -0,0 +1,23 @@ +.section .text.tinygo_scanCurrentStack +.global tinygo_scanCurrentStack +.type tinygo_scanCurrentStack, %function +tinygo_scanCurrentStack: + // Sources: + // * https://developer.arm.com/architectures/learn-the-architecture/armv8-a-instruction-set-architecture/procedure-call-standard + // * https://godbolt.org/z/qrvrEh + + // Save callee-saved registers. + stp x29, x30, [sp, #-96]! + stp x28, x27, [sp, #16] + stp x26, x25, [sp, #32] + stp x24, x23, [sp, #48] + stp x22, x21, [sp, #64] + stp x20, x19, [sp, #80] + + // Scan the stack. + mov x0, sp + bl tinygo_scanstack + + // Restore stack state and return. + ldp x29, x30, [sp], #96 + ret diff --git a/src/runtime/gc_stack_portable.go b/src/runtime/gc_stack_portable.go index 865bdc8a..d48b2497 100644 --- a/src/runtime/gc_stack_portable.go +++ b/src/runtime/gc_stack_portable.go @@ -1,5 +1,5 @@ // +build gc.conservative gc.extalloc -// +build !baremetal +// +build wasm package runtime diff --git a/src/runtime/gc_stack_raw.go b/src/runtime/gc_stack_raw.go index c223a1c7..74be7fe8 100644 --- a/src/runtime/gc_stack_raw.go +++ b/src/runtime/gc_stack_raw.go @@ -1,5 +1,5 @@ // +build gc.conservative gc.extalloc -// +build baremetal +// +build !wasm package runtime diff --git a/src/runtime/runtime_nintendoswitch.go b/src/runtime/runtime_nintendoswitch.go index 97148b67..f5107fa8 100644 --- a/src/runtime/runtime_nintendoswitch.go +++ b/src/runtime/runtime_nintendoswitch.go @@ -8,19 +8,32 @@ type timeUnit int64 const asyncScheduler = false +var stackTop uintptr + func postinit() {} // Entry point for Go. Initialize all packages and call main.main(). //export main func main() int { preinit() - run() + + // Obtain the initial stack pointer right before calling the run() function. + // The run function has been moved to a separate (non-inlined) function so + // that the correct stack pointer is read. + stackTop = getCurrentStackPointer() + runMain() // Call exit to correctly finish the program // Without this, the application crashes at start, not sure why return exit(0) } +// Must be a separate function to get the correct stack pointer. +//go:noinline +func runMain() { + run() +} + // sleepTicks sleeps for the specified system ticks func sleepTicks(d timeUnit) { sleepThread(uint64(ticksToNanoseconds(d))) diff --git a/src/runtime/runtime_unix.go b/src/runtime/runtime_unix.go index ef4b8258..17940f69 100644 --- a/src/runtime/runtime_unix.go +++ b/src/runtime/runtime_unix.go @@ -37,6 +37,8 @@ type timespec struct { const CLOCK_MONOTONIC_RAW = 4 +var stackTop uintptr + func postinit() {} // Entry point for Go. Initialize all packages and call main.main(). @@ -44,12 +46,22 @@ func postinit() {} func main() int { preinit() - run() + // Obtain the initial stack pointer right before calling the run() function. + // The run function has been moved to a separate (non-inlined) function so + // that the correct stack pointer is read. + stackTop = getCurrentStackPointer() + runMain() // For libc compatibility. return 0 } +// Must be a separate function to get the correct stack pointer. +//go:noinline +func runMain() { + run() +} + func putchar(c byte) { _putchar(int(c)) } diff --git a/targets/nintendoswitch.json b/targets/nintendoswitch.json index 3a0e40bd..aa8567b0 100644 --- a/targets/nintendoswitch.json +++ b/targets/nintendoswitch.json @@ -27,6 +27,7 @@ "linkerscript": "targets/nintendoswitch.ld", "extra-files": [ "targets/nintendoswitch.s", + "src/runtime/gc_arm64.S", "src/runtime/runtime_nintendoswitch.s" ] }