gc: use raw stack access whenever possible
The only architecture that actually needs special support for scanning the stack is WebAssembly. All others allow raw access to the stack with a small bit of assembly. Therefore, don't manually keep track of all these objects on the stack manually and instead just use conservative stack scanning. This results in a massive code size decrease in the affected targets (only tested linux/amd64 for code size) - sometimes around 33%. It also allows for future improvements such as using proper stackful goroutines.
Этот коммит содержится в:
родитель
bfa29f17da
коммит
67de8b490d
13 изменённых файлов: 125 добавлений и 11 удалений
|
@ -118,12 +118,12 @@ func (c *Config) NeedsStackObjects() bool {
|
|||
switch c.GC() {
|
||||
case "conservative", "extalloc":
|
||||
for _, tag := range c.BuildTags() {
|
||||
if tag == "baremetal" {
|
||||
return false
|
||||
if tag == "wasm" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
return false
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -246,6 +246,9 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) {
|
|||
} else {
|
||||
spec.LDFlags = append(spec.LDFlags, "-no-pie", "-Wl,--gc-sections") // WARNING: clang < 5.0 requires -nopie
|
||||
}
|
||||
if goarch != "wasm" {
|
||||
spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/gc_"+goarch+".S")
|
||||
}
|
||||
if goarch != runtime.GOARCH {
|
||||
// Some educated guesses as to how to invoke helper programs.
|
||||
spec.GDB = "gdb-multiarch"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package runtime
|
||||
|
||||
import "device"
|
||||
|
||||
const GOARCH = "386"
|
||||
|
||||
// The bitness of the CPU (e.g. 8, 32, 64).
|
||||
|
@ -10,4 +12,6 @@ func align(ptr uintptr) uintptr {
|
|||
return (ptr + 3) &^ 3
|
||||
}
|
||||
|
||||
func getCurrentStackPointer() uintptr
|
||||
func getCurrentStackPointer() uintptr {
|
||||
return device.AsmFull("movl %esp, {}", nil)
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package runtime
|
||||
|
||||
import "device"
|
||||
|
||||
const GOARCH = "amd64"
|
||||
|
||||
// The bitness of the CPU (e.g. 8, 32, 64).
|
||||
|
@ -12,4 +14,6 @@ func align(ptr uintptr) uintptr {
|
|||
return (ptr + 15) &^ 15
|
||||
}
|
||||
|
||||
func getCurrentStackPointer() uintptr
|
||||
func getCurrentStackPointer() uintptr {
|
||||
return device.AsmFull("movq %rsp, {}", nil)
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package runtime
|
||||
|
||||
import "device/arm"
|
||||
|
||||
const GOARCH = "arm64"
|
||||
|
||||
// The bitness of the CPU (e.g. 8, 32, 64).
|
||||
|
@ -9,5 +11,6 @@ const TargetBits = 64
|
|||
func align(ptr uintptr) uintptr {
|
||||
return (ptr + 7) &^ 7
|
||||
}
|
||||
|
||||
func getCurrentStackPointer() uintptr
|
||||
func getCurrentStackPointer() uintptr {
|
||||
return arm.AsmFull("mov {}, sp", nil)
|
||||
}
|
||||
|
|
22
src/runtime/gc_386.S
Обычный файл
22
src/runtime/gc_386.S
Обычный файл
|
@ -0,0 +1,22 @@
|
|||
.section .text.tinygo_scanCurrentStack
|
||||
.global tinygo_scanCurrentStack
|
||||
.type tinygo_scanCurrentStack, %function
|
||||
tinygo_scanCurrentStack:
|
||||
// Sources:
|
||||
// * https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call
|
||||
// * https://godbolt.org/z/q7e8dn
|
||||
|
||||
// Save callee-saved registers.
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebp
|
||||
|
||||
// Scan the stack.
|
||||
pushl %esp
|
||||
calll tinygo_scanstack
|
||||
|
||||
// Restore the stack pointer. Registers do not need to be restored as they
|
||||
// were only pushed to be discoverable by the GC.
|
||||
addl $20, %esp
|
||||
retl
|
29
src/runtime/gc_amd64.S
Обычный файл
29
src/runtime/gc_amd64.S
Обычный файл
|
@ -0,0 +1,29 @@
|
|||
#ifdef __ELF__
|
||||
.section .text.tinygo_scanCurrentStack
|
||||
.global tinygo_scanCurrentStack
|
||||
tinygo_scanCurrentStack:
|
||||
#else // Darwin
|
||||
.global _tinygo_scanCurrentStack
|
||||
_tinygo_scanCurrentStack:
|
||||
#endif
|
||||
// Save callee-saved registers.
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
|
||||
// Scan the stack.
|
||||
subq $8, %rsp // adjust the stack before the call to maintain 16-byte alignment
|
||||
movq %rsp, %rdi
|
||||
#ifdef __ELF__
|
||||
callq tinygo_scanstack
|
||||
#else
|
||||
callq _tinygo_scanstack // Darwin
|
||||
#endif
|
||||
|
||||
// Restore the stack pointer. Registers do not need to be restored as they
|
||||
// were only pushed to be discoverable by the GC.
|
||||
addq $56, %rsp
|
||||
retq
|
23
src/runtime/gc_arm64.S
Обычный файл
23
src/runtime/gc_arm64.S
Обычный файл
|
@ -0,0 +1,23 @@
|
|||
.section .text.tinygo_scanCurrentStack
|
||||
.global tinygo_scanCurrentStack
|
||||
.type tinygo_scanCurrentStack, %function
|
||||
tinygo_scanCurrentStack:
|
||||
// Sources:
|
||||
// * https://developer.arm.com/architectures/learn-the-architecture/armv8-a-instruction-set-architecture/procedure-call-standard
|
||||
// * https://godbolt.org/z/qrvrEh
|
||||
|
||||
// Save callee-saved registers.
|
||||
stp x29, x30, [sp, #-96]!
|
||||
stp x28, x27, [sp, #16]
|
||||
stp x26, x25, [sp, #32]
|
||||
stp x24, x23, [sp, #48]
|
||||
stp x22, x21, [sp, #64]
|
||||
stp x20, x19, [sp, #80]
|
||||
|
||||
// Scan the stack.
|
||||
mov x0, sp
|
||||
bl tinygo_scanstack
|
||||
|
||||
// Restore stack state and return.
|
||||
ldp x29, x30, [sp], #96
|
||||
ret
|
|
@ -1,5 +1,5 @@
|
|||
// +build gc.conservative gc.extalloc
|
||||
// +build !baremetal
|
||||
// +build wasm
|
||||
|
||||
package runtime
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// +build gc.conservative gc.extalloc
|
||||
// +build baremetal
|
||||
// +build !wasm
|
||||
|
||||
package runtime
|
||||
|
||||
|
|
|
@ -8,19 +8,32 @@ type timeUnit int64
|
|||
|
||||
const asyncScheduler = false
|
||||
|
||||
var stackTop uintptr
|
||||
|
||||
func postinit() {}
|
||||
|
||||
// Entry point for Go. Initialize all packages and call main.main().
|
||||
//export main
|
||||
func main() int {
|
||||
preinit()
|
||||
run()
|
||||
|
||||
// Obtain the initial stack pointer right before calling the run() function.
|
||||
// The run function has been moved to a separate (non-inlined) function so
|
||||
// that the correct stack pointer is read.
|
||||
stackTop = getCurrentStackPointer()
|
||||
runMain()
|
||||
|
||||
// Call exit to correctly finish the program
|
||||
// Without this, the application crashes at start, not sure why
|
||||
return exit(0)
|
||||
}
|
||||
|
||||
// Must be a separate function to get the correct stack pointer.
|
||||
//go:noinline
|
||||
func runMain() {
|
||||
run()
|
||||
}
|
||||
|
||||
// sleepTicks sleeps for the specified system ticks
|
||||
func sleepTicks(d timeUnit) {
|
||||
sleepThread(uint64(ticksToNanoseconds(d)))
|
||||
|
|
|
@ -37,6 +37,8 @@ type timespec struct {
|
|||
|
||||
const CLOCK_MONOTONIC_RAW = 4
|
||||
|
||||
var stackTop uintptr
|
||||
|
||||
func postinit() {}
|
||||
|
||||
// Entry point for Go. Initialize all packages and call main.main().
|
||||
|
@ -44,12 +46,22 @@ func postinit() {}
|
|||
func main() int {
|
||||
preinit()
|
||||
|
||||
run()
|
||||
// Obtain the initial stack pointer right before calling the run() function.
|
||||
// The run function has been moved to a separate (non-inlined) function so
|
||||
// that the correct stack pointer is read.
|
||||
stackTop = getCurrentStackPointer()
|
||||
runMain()
|
||||
|
||||
// For libc compatibility.
|
||||
return 0
|
||||
}
|
||||
|
||||
// Must be a separate function to get the correct stack pointer.
|
||||
//go:noinline
|
||||
func runMain() {
|
||||
run()
|
||||
}
|
||||
|
||||
func putchar(c byte) {
|
||||
_putchar(int(c))
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
"linkerscript": "targets/nintendoswitch.ld",
|
||||
"extra-files": [
|
||||
"targets/nintendoswitch.s",
|
||||
"src/runtime/gc_arm64.S",
|
||||
"src/runtime/runtime_nintendoswitch.s"
|
||||
]
|
||||
}
|
||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче