arm64: fix register save/restore to include vector registers
Some vector registers must be preserved across calls, but this wasn't happening on Linux and MacOS. When I added support for windows/arm64, I saw that it required these vector registers to be preserved and assumed this was Windows deviating from the standard calling convention. But actually, Windows was just implementing the standard calling convention and the bug was on Linux and MacOS. This commit fixes the bug on Linux and MacOS and at the same time merges the Go and assembly files as they no longer need to be separate.
Этот коммит содержится в:
родитель
f41b6a3b96
коммит
cce9c6d5a1
7 изменённых файлов: 29 добавлений и 187 удалений
|
@ -326,9 +326,9 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) {
|
|||
}
|
||||
if goarch != "wasm" {
|
||||
suffix := ""
|
||||
if goos == "windows" {
|
||||
// Windows uses a different calling convention from other operating
|
||||
// systems so we need separate assembly files.
|
||||
if goos == "windows" && goarch == "amd64" {
|
||||
// Windows uses a different calling convention on amd64 from other
|
||||
// operating systems so we need separate assembly files.
|
||||
suffix = "_windows"
|
||||
}
|
||||
spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/asm_"+goarch+suffix+".S")
|
||||
|
|
|
@ -4,7 +4,6 @@ _tinygo_startTask:
|
|||
#else
|
||||
.section .text.tinygo_startTask
|
||||
.global tinygo_startTask
|
||||
.type tinygo_startTask, %function
|
||||
tinygo_startTask:
|
||||
#endif
|
||||
.cfi_startproc
|
||||
|
@ -35,7 +34,6 @@ tinygo_startTask:
|
|||
#endif
|
||||
.cfi_endproc
|
||||
#ifndef __MACH__
|
||||
.size tinygo_startTask, .-tinygo_startTask
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -44,7 +42,6 @@ tinygo_startTask:
|
|||
_tinygo_swapTask:
|
||||
#else
|
||||
.global tinygo_swapTask
|
||||
.type tinygo_swapTask, %function
|
||||
tinygo_swapTask:
|
||||
#endif
|
||||
// This function gets the following parameters:
|
||||
|
@ -52,12 +49,16 @@ tinygo_swapTask:
|
|||
// x1 = oldStack *uintptr
|
||||
|
||||
// Save all callee-saved registers:
|
||||
stp x19, x20, [sp, #-96]!
|
||||
stp x19, x20, [sp, #-160]!
|
||||
stp x21, x22, [sp, #16]
|
||||
stp x23, x24, [sp, #32]
|
||||
stp x25, x26, [sp, #48]
|
||||
stp x27, x28, [sp, #64]
|
||||
stp x29, x30, [sp, #80]
|
||||
stp d8, d9, [sp, #96]
|
||||
stp d10, d11, [sp, #112]
|
||||
stp d12, d13, [sp, #128]
|
||||
stp d14, d15, [sp, #144]
|
||||
|
||||
// Save the current stack pointer in oldStack.
|
||||
mov x8, sp
|
||||
|
@ -67,10 +68,14 @@ tinygo_swapTask:
|
|||
mov sp, x0
|
||||
|
||||
// Restore stack state and return.
|
||||
ldp d14, d15, [sp, #144]
|
||||
ldp d12, d13, [sp, #128]
|
||||
ldp d10, d11, [sp, #112]
|
||||
ldp d8, d9, [sp, #96]
|
||||
ldp x29, x30, [sp, #80]
|
||||
ldp x27, x28, [sp, #64]
|
||||
ldp x25, x26, [sp, #48]
|
||||
ldp x23, x24, [sp, #32]
|
||||
ldp x21, x22, [sp, #16]
|
||||
ldp x19, x20, [sp], #96
|
||||
ldp x19, x20, [sp], #160
|
||||
ret
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//go:build scheduler.tasks && arm64 && !windows
|
||||
//go:build scheduler.tasks && arm64
|
||||
|
||||
package task
|
||||
|
||||
|
@ -21,8 +21,16 @@ type calleeSavedRegs struct {
|
|||
x27 uintptr
|
||||
x28 uintptr
|
||||
x29 uintptr
|
||||
|
||||
pc uintptr // aka x30 aka LR
|
||||
|
||||
d8 uintptr
|
||||
d9 uintptr
|
||||
d10 uintptr
|
||||
d11 uintptr
|
||||
d12 uintptr
|
||||
d13 uintptr
|
||||
d14 uintptr
|
||||
d15 uintptr
|
||||
}
|
||||
|
||||
// archInit runs architecture-specific setup for the goroutine startup.
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
.section .text.tinygo_startTask,"ax"
|
||||
.global tinygo_startTask
|
||||
tinygo_startTask:
|
||||
.cfi_startproc
|
||||
// Small assembly stub for starting a goroutine. This is already run on the
|
||||
// new stack, with the callee-saved registers already loaded.
|
||||
// Most importantly, x19 contains the pc of the to-be-started function and
|
||||
// x20 contains the only argument it is given. Multiple arguments are packed
|
||||
// into one by storing them in a new allocation.
|
||||
|
||||
// Indicate to the unwinder that there is nothing to unwind, this is the
|
||||
// root frame. It avoids the following (bogus) error message in GDB:
|
||||
// Backtrace stopped: previous frame identical to this frame (corrupt stack?)
|
||||
.cfi_undefined lr
|
||||
|
||||
// Set the first argument of the goroutine start wrapper, which contains all
|
||||
// the arguments.
|
||||
mov x0, x20
|
||||
|
||||
// Branch to the "goroutine start" function. By using blx instead of bx,
|
||||
// we'll return here instead of tail calling.
|
||||
blr x19
|
||||
|
||||
// After return, exit this goroutine. This is a tail call.
|
||||
b tinygo_pause
|
||||
.cfi_endproc
|
||||
|
||||
|
||||
.global tinygo_swapTask
|
||||
tinygo_swapTask:
|
||||
// This function gets the following parameters:
|
||||
// x0 = newStack uintptr
|
||||
// x1 = oldStack *uintptr
|
||||
|
||||
// Save all callee-saved registers:
|
||||
stp x19, x20, [sp, #-160]!
|
||||
stp x21, x22, [sp, #16]
|
||||
stp x23, x24, [sp, #32]
|
||||
stp x25, x26, [sp, #48]
|
||||
stp x27, x28, [sp, #64]
|
||||
stp x29, x30, [sp, #80]
|
||||
stp d8, d9, [sp, #96]
|
||||
stp d10, d11, [sp, #112]
|
||||
stp d12, d13, [sp, #128]
|
||||
stp d14, d15, [sp, #144]
|
||||
|
||||
// Save the current stack pointer in oldStack.
|
||||
mov x8, sp
|
||||
str x8, [x1]
|
||||
|
||||
// Switch to the new stack pointer.
|
||||
mov sp, x0
|
||||
|
||||
// Restore stack state and return.
|
||||
ldp d14, d15, [sp, #144]
|
||||
ldp d12, d13, [sp, #128]
|
||||
ldp d10, d11, [sp, #112]
|
||||
ldp d8, d9, [sp, #96]
|
||||
ldp x29, x30, [sp, #80]
|
||||
ldp x27, x28, [sp, #64]
|
||||
ldp x25, x26, [sp, #48]
|
||||
ldp x23, x24, [sp, #32]
|
||||
ldp x21, x22, [sp, #16]
|
||||
ldp x19, x20, [sp], #160
|
||||
ret
|
|
@ -1,72 +0,0 @@
|
|||
//go:build scheduler.tasks && arm64 && windows
|
||||
|
||||
package task
|
||||
|
||||
import "unsafe"
|
||||
|
||||
var systemStack uintptr
|
||||
|
||||
// calleeSavedRegs is the list of registers that must be saved and restored
|
||||
// when switching between tasks. Also see task_stack_arm64_windows.S that
|
||||
// relies on the exact layout of this struct.
|
||||
type calleeSavedRegs struct {
|
||||
x19 uintptr
|
||||
x20 uintptr
|
||||
x21 uintptr
|
||||
x22 uintptr
|
||||
x23 uintptr
|
||||
x24 uintptr
|
||||
x25 uintptr
|
||||
x26 uintptr
|
||||
x27 uintptr
|
||||
x28 uintptr
|
||||
x29 uintptr
|
||||
pc uintptr // aka x30 aka LR
|
||||
|
||||
d8 uintptr
|
||||
d9 uintptr
|
||||
d10 uintptr
|
||||
d11 uintptr
|
||||
d12 uintptr
|
||||
d13 uintptr
|
||||
d14 uintptr
|
||||
d15 uintptr
|
||||
}
|
||||
|
||||
// archInit runs architecture-specific setup for the goroutine startup.
|
||||
func (s *state) archInit(r *calleeSavedRegs, fn uintptr, args unsafe.Pointer) {
|
||||
// Store the initial sp for the startTask function (implemented in assembly).
|
||||
s.sp = uintptr(unsafe.Pointer(r))
|
||||
|
||||
// Initialize the registers.
|
||||
// These will be popped off of the stack on the first resume of the goroutine.
|
||||
|
||||
// Start the function at tinygo_startTask (defined in src/internal/task/task_stack_arm64_windows.S).
|
||||
// This assembly code calls a function (passed in x19) with a single argument
|
||||
// (passed in x20). After the function returns, it calls Pause().
|
||||
r.pc = uintptr(unsafe.Pointer(&startTask))
|
||||
|
||||
// Pass the function to call in x19.
|
||||
// This function is a compiler-generated wrapper which loads arguments out of a struct pointer.
|
||||
// See createGoroutineStartWrapper (defined in compiler/goroutine.go) for more information.
|
||||
r.x19 = fn
|
||||
|
||||
// Pass the pointer to the arguments struct in x20.
|
||||
r.x20 = uintptr(args)
|
||||
}
|
||||
|
||||
func (s *state) resume() {
|
||||
swapTask(s.sp, &systemStack)
|
||||
}
|
||||
|
||||
func (s *state) pause() {
|
||||
newStack := systemStack
|
||||
systemStack = 0
|
||||
swapTask(newStack, &s.sp)
|
||||
}
|
||||
|
||||
// SystemStack returns the system stack pointer when called from a task stack.
|
||||
// When called from the system stack, it returns 0.
|
||||
func SystemStack() uintptr {
|
||||
return systemStack
|
||||
}
|
|
@ -4,7 +4,6 @@ _tinygo_scanCurrentStack:
|
|||
#else
|
||||
.section .text.tinygo_scanCurrentStack
|
||||
.global tinygo_scanCurrentStack
|
||||
.type tinygo_scanCurrentStack, %function
|
||||
tinygo_scanCurrentStack:
|
||||
#endif
|
||||
// Sources:
|
||||
|
@ -12,12 +11,16 @@ tinygo_scanCurrentStack:
|
|||
// * https://godbolt.org/z/qrvrEh
|
||||
|
||||
// Save callee-saved registers.
|
||||
stp x29, x30, [sp, #-96]!
|
||||
stp x29, x30, [sp, #-160]!
|
||||
stp x28, x27, [sp, #16]
|
||||
stp x26, x25, [sp, #32]
|
||||
stp x24, x23, [sp, #48]
|
||||
stp x22, x21, [sp, #64]
|
||||
stp x20, x19, [sp, #80]
|
||||
stp d8, d9, [sp, #96]
|
||||
stp d10, d11, [sp, #112]
|
||||
stp d12, d13, [sp, #128]
|
||||
stp d14, d15, [sp, #144]
|
||||
|
||||
// Scan the stack.
|
||||
mov x0, sp
|
||||
|
@ -28,7 +31,7 @@ tinygo_scanCurrentStack:
|
|||
#endif
|
||||
|
||||
// Restore stack state and return.
|
||||
ldp x29, x30, [sp], #96
|
||||
ldp x29, x30, [sp], #160
|
||||
ret
|
||||
|
||||
|
||||
|
@ -38,7 +41,6 @@ _tinygo_longjmp:
|
|||
#else
|
||||
.section .text.tinygo_longjmp
|
||||
.global tinygo_longjmp
|
||||
.type tinygo_longjmp, %function
|
||||
tinygo_longjmp:
|
||||
#endif
|
||||
// Note: the code we jump to assumes x0 is set to a non-zero value if we
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
.section .text.tinygo_scanCurrentStack,"ax"
|
||||
.global tinygo_scanCurrentStack
|
||||
tinygo_scanCurrentStack:
|
||||
// Sources:
|
||||
// * https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170
|
||||
// * https://godbolt.org/z/foc1xncvb
|
||||
|
||||
// Save callee-saved registers.
|
||||
stp x29, x30, [sp, #-160]!
|
||||
stp x28, x27, [sp, #16]
|
||||
stp x26, x25, [sp, #32]
|
||||
stp x24, x23, [sp, #48]
|
||||
stp x22, x21, [sp, #64]
|
||||
stp x20, x19, [sp, #80]
|
||||
stp d8, d9, [sp, #96]
|
||||
stp d10, d11, [sp, #112]
|
||||
stp d12, d13, [sp, #128]
|
||||
stp d14, d15, [sp, #144]
|
||||
|
||||
// Scan the stack.
|
||||
mov x0, sp
|
||||
bl tinygo_scanstack
|
||||
|
||||
// Restore stack state and return.
|
||||
ldp x29, x30, [sp], #160
|
||||
ret
|
||||
|
||||
|
||||
.section .text.tinygo_longjmp,"ax"
|
||||
.global tinygo_longjmp
|
||||
tinygo_longjmp:
|
||||
// Note: the code we jump to assumes x0 is set to a non-zero value if we
|
||||
// jump from here (which is conveniently already the case).
|
||||
ldp x1, x2, [x0] // jumpSP, jumpPC
|
||||
mov sp, x1
|
||||
br x2
|
Загрузка…
Создание таблицы
Сослаться в новой задаче