From cce9c6d5a12ad7ba91310e501cb56b78d4eb6df0 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 19 Feb 2023 17:07:40 +0100 Subject: [PATCH] arm64: fix register save/restore to include vector registers Some vector registers must be preserved across calls, but this wasn't happening on Linux and MacOS. When I added support for windows/arm64, I saw that it required these vector registers to be preserved and assumed this was Windows deviating from the standard calling convention. But actually, Windows was just implementing the standard calling convention and the bug was on Linux and MacOS. This commit fixes the bug on Linux and MacOS and at the same time merges the Go and assembly files as they no longer need to be separate. --- compileopts/target.go | 6 +- src/internal/task/task_stack_arm64.S | 15 ++-- src/internal/task/task_stack_arm64.go | 12 +++- src/internal/task/task_stack_arm64_windows.S | 65 ----------------- src/internal/task/task_stack_arm64_windows.go | 72 ------------------- src/runtime/asm_arm64.S | 10 +-- src/runtime/asm_arm64_windows.S | 36 ---------- 7 files changed, 29 insertions(+), 187 deletions(-) delete mode 100644 src/internal/task/task_stack_arm64_windows.S delete mode 100644 src/internal/task/task_stack_arm64_windows.go delete mode 100644 src/runtime/asm_arm64_windows.S diff --git a/compileopts/target.go b/compileopts/target.go index 92e9315e..30573a86 100644 --- a/compileopts/target.go +++ b/compileopts/target.go @@ -326,9 +326,9 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) { } if goarch != "wasm" { suffix := "" - if goos == "windows" { - // Windows uses a different calling convention from other operating - // systems so we need separate assembly files. + if goos == "windows" && goarch == "amd64" { + // Windows uses a different calling convention on amd64 from other + // operating systems so we need separate assembly files. suffix = "_windows" } spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/asm_"+goarch+suffix+".S") diff --git a/src/internal/task/task_stack_arm64.S b/src/internal/task/task_stack_arm64.S index 93f0027a..1baacb49 100644 --- a/src/internal/task/task_stack_arm64.S +++ b/src/internal/task/task_stack_arm64.S @@ -4,7 +4,6 @@ _tinygo_startTask: #else .section .text.tinygo_startTask .global tinygo_startTask -.type tinygo_startTask, %function tinygo_startTask: #endif .cfi_startproc @@ -35,7 +34,6 @@ tinygo_startTask: #endif .cfi_endproc #ifndef __MACH__ -.size tinygo_startTask, .-tinygo_startTask #endif @@ -44,7 +42,6 @@ tinygo_startTask: _tinygo_swapTask: #else .global tinygo_swapTask -.type tinygo_swapTask, %function tinygo_swapTask: #endif // This function gets the following parameters: @@ -52,12 +49,16 @@ tinygo_swapTask: // x1 = oldStack *uintptr // Save all callee-saved registers: - stp x19, x20, [sp, #-96]! + stp x19, x20, [sp, #-160]! stp x21, x22, [sp, #16] stp x23, x24, [sp, #32] stp x25, x26, [sp, #48] stp x27, x28, [sp, #64] stp x29, x30, [sp, #80] + stp d8, d9, [sp, #96] + stp d10, d11, [sp, #112] + stp d12, d13, [sp, #128] + stp d14, d15, [sp, #144] // Save the current stack pointer in oldStack. mov x8, sp @@ -67,10 +68,14 @@ tinygo_swapTask: mov sp, x0 // Restore stack state and return. + ldp d14, d15, [sp, #144] + ldp d12, d13, [sp, #128] + ldp d10, d11, [sp, #112] + ldp d8, d9, [sp, #96] ldp x29, x30, [sp, #80] ldp x27, x28, [sp, #64] ldp x25, x26, [sp, #48] ldp x23, x24, [sp, #32] ldp x21, x22, [sp, #16] - ldp x19, x20, [sp], #96 + ldp x19, x20, [sp], #160 ret diff --git a/src/internal/task/task_stack_arm64.go b/src/internal/task/task_stack_arm64.go index 4cf500bd..164d62f1 100644 --- a/src/internal/task/task_stack_arm64.go +++ b/src/internal/task/task_stack_arm64.go @@ -1,4 +1,4 @@ -//go:build scheduler.tasks && arm64 && !windows +//go:build scheduler.tasks && arm64 package task @@ -21,8 +21,16 @@ type calleeSavedRegs struct { x27 uintptr x28 uintptr x29 uintptr + pc uintptr // aka x30 aka LR - pc uintptr // aka x30 aka LR + d8 uintptr + d9 uintptr + d10 uintptr + d11 uintptr + d12 uintptr + d13 uintptr + d14 uintptr + d15 uintptr } // archInit runs architecture-specific setup for the goroutine startup. diff --git a/src/internal/task/task_stack_arm64_windows.S b/src/internal/task/task_stack_arm64_windows.S deleted file mode 100644 index 45c1ec4d..00000000 --- a/src/internal/task/task_stack_arm64_windows.S +++ /dev/null @@ -1,65 +0,0 @@ -.section .text.tinygo_startTask,"ax" -.global tinygo_startTask -tinygo_startTask: - .cfi_startproc - // Small assembly stub for starting a goroutine. This is already run on the - // new stack, with the callee-saved registers already loaded. - // Most importantly, x19 contains the pc of the to-be-started function and - // x20 contains the only argument it is given. Multiple arguments are packed - // into one by storing them in a new allocation. - - // Indicate to the unwinder that there is nothing to unwind, this is the - // root frame. It avoids the following (bogus) error message in GDB: - // Backtrace stopped: previous frame identical to this frame (corrupt stack?) - .cfi_undefined lr - - // Set the first argument of the goroutine start wrapper, which contains all - // the arguments. - mov x0, x20 - - // Branch to the "goroutine start" function. By using blx instead of bx, - // we'll return here instead of tail calling. - blr x19 - - // After return, exit this goroutine. This is a tail call. - b tinygo_pause - .cfi_endproc - - -.global tinygo_swapTask -tinygo_swapTask: - // This function gets the following parameters: - // x0 = newStack uintptr - // x1 = oldStack *uintptr - - // Save all callee-saved registers: - stp x19, x20, [sp, #-160]! - stp x21, x22, [sp, #16] - stp x23, x24, [sp, #32] - stp x25, x26, [sp, #48] - stp x27, x28, [sp, #64] - stp x29, x30, [sp, #80] - stp d8, d9, [sp, #96] - stp d10, d11, [sp, #112] - stp d12, d13, [sp, #128] - stp d14, d15, [sp, #144] - - // Save the current stack pointer in oldStack. - mov x8, sp - str x8, [x1] - - // Switch to the new stack pointer. - mov sp, x0 - - // Restore stack state and return. - ldp d14, d15, [sp, #144] - ldp d12, d13, [sp, #128] - ldp d10, d11, [sp, #112] - ldp d8, d9, [sp, #96] - ldp x29, x30, [sp, #80] - ldp x27, x28, [sp, #64] - ldp x25, x26, [sp, #48] - ldp x23, x24, [sp, #32] - ldp x21, x22, [sp, #16] - ldp x19, x20, [sp], #160 - ret diff --git a/src/internal/task/task_stack_arm64_windows.go b/src/internal/task/task_stack_arm64_windows.go deleted file mode 100644 index c3c6e8f0..00000000 --- a/src/internal/task/task_stack_arm64_windows.go +++ /dev/null @@ -1,72 +0,0 @@ -//go:build scheduler.tasks && arm64 && windows - -package task - -import "unsafe" - -var systemStack uintptr - -// calleeSavedRegs is the list of registers that must be saved and restored -// when switching between tasks. Also see task_stack_arm64_windows.S that -// relies on the exact layout of this struct. -type calleeSavedRegs struct { - x19 uintptr - x20 uintptr - x21 uintptr - x22 uintptr - x23 uintptr - x24 uintptr - x25 uintptr - x26 uintptr - x27 uintptr - x28 uintptr - x29 uintptr - pc uintptr // aka x30 aka LR - - d8 uintptr - d9 uintptr - d10 uintptr - d11 uintptr - d12 uintptr - d13 uintptr - d14 uintptr - d15 uintptr -} - -// archInit runs architecture-specific setup for the goroutine startup. -func (s *state) archInit(r *calleeSavedRegs, fn uintptr, args unsafe.Pointer) { - // Store the initial sp for the startTask function (implemented in assembly). - s.sp = uintptr(unsafe.Pointer(r)) - - // Initialize the registers. - // These will be popped off of the stack on the first resume of the goroutine. - - // Start the function at tinygo_startTask (defined in src/internal/task/task_stack_arm64_windows.S). - // This assembly code calls a function (passed in x19) with a single argument - // (passed in x20). After the function returns, it calls Pause(). - r.pc = uintptr(unsafe.Pointer(&startTask)) - - // Pass the function to call in x19. - // This function is a compiler-generated wrapper which loads arguments out of a struct pointer. - // See createGoroutineStartWrapper (defined in compiler/goroutine.go) for more information. - r.x19 = fn - - // Pass the pointer to the arguments struct in x20. - r.x20 = uintptr(args) -} - -func (s *state) resume() { - swapTask(s.sp, &systemStack) -} - -func (s *state) pause() { - newStack := systemStack - systemStack = 0 - swapTask(newStack, &s.sp) -} - -// SystemStack returns the system stack pointer when called from a task stack. -// When called from the system stack, it returns 0. -func SystemStack() uintptr { - return systemStack -} diff --git a/src/runtime/asm_arm64.S b/src/runtime/asm_arm64.S index 679f55fa..267b6399 100644 --- a/src/runtime/asm_arm64.S +++ b/src/runtime/asm_arm64.S @@ -4,7 +4,6 @@ _tinygo_scanCurrentStack: #else .section .text.tinygo_scanCurrentStack .global tinygo_scanCurrentStack -.type tinygo_scanCurrentStack, %function tinygo_scanCurrentStack: #endif // Sources: @@ -12,12 +11,16 @@ tinygo_scanCurrentStack: // * https://godbolt.org/z/qrvrEh // Save callee-saved registers. - stp x29, x30, [sp, #-96]! + stp x29, x30, [sp, #-160]! stp x28, x27, [sp, #16] stp x26, x25, [sp, #32] stp x24, x23, [sp, #48] stp x22, x21, [sp, #64] stp x20, x19, [sp, #80] + stp d8, d9, [sp, #96] + stp d10, d11, [sp, #112] + stp d12, d13, [sp, #128] + stp d14, d15, [sp, #144] // Scan the stack. mov x0, sp @@ -28,7 +31,7 @@ tinygo_scanCurrentStack: #endif // Restore stack state and return. - ldp x29, x30, [sp], #96 + ldp x29, x30, [sp], #160 ret @@ -38,7 +41,6 @@ _tinygo_longjmp: #else .section .text.tinygo_longjmp .global tinygo_longjmp -.type tinygo_longjmp, %function tinygo_longjmp: #endif // Note: the code we jump to assumes x0 is set to a non-zero value if we diff --git a/src/runtime/asm_arm64_windows.S b/src/runtime/asm_arm64_windows.S deleted file mode 100644 index 24377899..00000000 --- a/src/runtime/asm_arm64_windows.S +++ /dev/null @@ -1,36 +0,0 @@ -.section .text.tinygo_scanCurrentStack,"ax" -.global tinygo_scanCurrentStack -tinygo_scanCurrentStack: - // Sources: - // * https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170 - // * https://godbolt.org/z/foc1xncvb - - // Save callee-saved registers. - stp x29, x30, [sp, #-160]! - stp x28, x27, [sp, #16] - stp x26, x25, [sp, #32] - stp x24, x23, [sp, #48] - stp x22, x21, [sp, #64] - stp x20, x19, [sp, #80] - stp d8, d9, [sp, #96] - stp d10, d11, [sp, #112] - stp d12, d13, [sp, #128] - stp d14, d15, [sp, #144] - - // Scan the stack. - mov x0, sp - bl tinygo_scanstack - - // Restore stack state and return. - ldp x29, x30, [sp], #160 - ret - - -.section .text.tinygo_longjmp,"ax" -.global tinygo_longjmp -tinygo_longjmp: - // Note: the code we jump to assumes x0 is set to a non-zero value if we - // jump from here (which is conveniently already the case). - ldp x1, x2, [x0] // jumpSP, jumpPC - mov sp, x1 - br x2