arm64: fix register save/restore to include vector registers

Some vector registers must be preserved across calls, but this wasn't
happening on Linux and MacOS. When I added support for windows/arm64, I
saw that it required these vector registers to be preserved and assumed
this was Windows deviating from the standard calling convention. But
actually, Windows was just implementing the standard calling convention
and the bug was on Linux and MacOS.

This commit fixes the bug on Linux and MacOS and at the same time merges
the Go and assembly files as they no longer need to be separate.
Этот коммит содержится в:
Ayke van Laethem 2023-02-19 17:07:40 +01:00 коммит произвёл Ron Evans
родитель f41b6a3b96
коммит cce9c6d5a1
7 изменённых файлов: 29 добавлений и 187 удалений

Просмотреть файл

@ -326,9 +326,9 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) {
}
if goarch != "wasm" {
suffix := ""
if goos == "windows" {
// Windows uses a different calling convention from other operating
// systems so we need separate assembly files.
if goos == "windows" && goarch == "amd64" {
// Windows uses a different calling convention on amd64 from other
// operating systems so we need separate assembly files.
suffix = "_windows"
}
spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/asm_"+goarch+suffix+".S")

Просмотреть файл

@ -4,7 +4,6 @@ _tinygo_startTask:
#else
.section .text.tinygo_startTask
.global tinygo_startTask
.type tinygo_startTask, %function
tinygo_startTask:
#endif
.cfi_startproc
@ -35,7 +34,6 @@ tinygo_startTask:
#endif
.cfi_endproc
#ifndef __MACH__
.size tinygo_startTask, .-tinygo_startTask
#endif
@ -44,7 +42,6 @@ tinygo_startTask:
_tinygo_swapTask:
#else
.global tinygo_swapTask
.type tinygo_swapTask, %function
tinygo_swapTask:
#endif
// This function gets the following parameters:
@ -52,12 +49,16 @@ tinygo_swapTask:
// x1 = oldStack *uintptr
// Save all callee-saved registers:
stp x19, x20, [sp, #-96]!
stp x19, x20, [sp, #-160]!
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
stp d8, d9, [sp, #96]
stp d10, d11, [sp, #112]
stp d12, d13, [sp, #128]
stp d14, d15, [sp, #144]
// Save the current stack pointer in oldStack.
mov x8, sp
@ -67,10 +68,14 @@ tinygo_swapTask:
mov sp, x0
// Restore stack state and return.
ldp d14, d15, [sp, #144]
ldp d12, d13, [sp, #128]
ldp d10, d11, [sp, #112]
ldp d8, d9, [sp, #96]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp], #96
ldp x19, x20, [sp], #160
ret

Просмотреть файл

@ -1,4 +1,4 @@
//go:build scheduler.tasks && arm64 && !windows
//go:build scheduler.tasks && arm64
package task
@ -21,8 +21,16 @@ type calleeSavedRegs struct {
x27 uintptr
x28 uintptr
x29 uintptr
pc uintptr // aka x30 aka LR
pc uintptr // aka x30 aka LR
d8 uintptr
d9 uintptr
d10 uintptr
d11 uintptr
d12 uintptr
d13 uintptr
d14 uintptr
d15 uintptr
}
// archInit runs architecture-specific setup for the goroutine startup.

Просмотреть файл

@ -1,65 +0,0 @@
.section .text.tinygo_startTask,"ax"
.global tinygo_startTask
tinygo_startTask:
.cfi_startproc
// Small assembly stub for starting a goroutine. This is already run on the
// new stack, with the callee-saved registers already loaded.
// Most importantly, x19 contains the pc of the to-be-started function and
// x20 contains the only argument it is given. Multiple arguments are packed
// into one by storing them in a new allocation.
// Indicate to the unwinder that there is nothing to unwind, this is the
// root frame. It avoids the following (bogus) error message in GDB:
// Backtrace stopped: previous frame identical to this frame (corrupt stack?)
.cfi_undefined lr
// Set the first argument of the goroutine start wrapper, which contains all
// the arguments.
mov x0, x20
// Branch to the "goroutine start" function. By using blx instead of bx,
// we'll return here instead of tail calling.
blr x19
// After return, exit this goroutine. This is a tail call.
b tinygo_pause
.cfi_endproc
.global tinygo_swapTask
tinygo_swapTask:
// This function gets the following parameters:
// x0 = newStack uintptr
// x1 = oldStack *uintptr
// Save all callee-saved registers:
stp x19, x20, [sp, #-160]!
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
stp d8, d9, [sp, #96]
stp d10, d11, [sp, #112]
stp d12, d13, [sp, #128]
stp d14, d15, [sp, #144]
// Save the current stack pointer in oldStack.
mov x8, sp
str x8, [x1]
// Switch to the new stack pointer.
mov sp, x0
// Restore stack state and return.
ldp d14, d15, [sp, #144]
ldp d12, d13, [sp, #128]
ldp d10, d11, [sp, #112]
ldp d8, d9, [sp, #96]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp], #160
ret

Просмотреть файл

@ -1,72 +0,0 @@
//go:build scheduler.tasks && arm64 && windows
package task
import "unsafe"
var systemStack uintptr
// calleeSavedRegs is the list of registers that must be saved and restored
// when switching between tasks. Also see task_stack_arm64_windows.S that
// relies on the exact layout of this struct.
type calleeSavedRegs struct {
x19 uintptr
x20 uintptr
x21 uintptr
x22 uintptr
x23 uintptr
x24 uintptr
x25 uintptr
x26 uintptr
x27 uintptr
x28 uintptr
x29 uintptr
pc uintptr // aka x30 aka LR
d8 uintptr
d9 uintptr
d10 uintptr
d11 uintptr
d12 uintptr
d13 uintptr
d14 uintptr
d15 uintptr
}
// archInit runs architecture-specific setup for the goroutine startup.
func (s *state) archInit(r *calleeSavedRegs, fn uintptr, args unsafe.Pointer) {
// Store the initial sp for the startTask function (implemented in assembly).
s.sp = uintptr(unsafe.Pointer(r))
// Initialize the registers.
// These will be popped off of the stack on the first resume of the goroutine.
// Start the function at tinygo_startTask (defined in src/internal/task/task_stack_arm64_windows.S).
// This assembly code calls a function (passed in x19) with a single argument
// (passed in x20). After the function returns, it calls Pause().
r.pc = uintptr(unsafe.Pointer(&startTask))
// Pass the function to call in x19.
// This function is a compiler-generated wrapper which loads arguments out of a struct pointer.
// See createGoroutineStartWrapper (defined in compiler/goroutine.go) for more information.
r.x19 = fn
// Pass the pointer to the arguments struct in x20.
r.x20 = uintptr(args)
}
func (s *state) resume() {
swapTask(s.sp, &systemStack)
}
func (s *state) pause() {
newStack := systemStack
systemStack = 0
swapTask(newStack, &s.sp)
}
// SystemStack returns the system stack pointer when called from a task stack.
// When called from the system stack, it returns 0.
func SystemStack() uintptr {
return systemStack
}

Просмотреть файл

@ -4,7 +4,6 @@ _tinygo_scanCurrentStack:
#else
.section .text.tinygo_scanCurrentStack
.global tinygo_scanCurrentStack
.type tinygo_scanCurrentStack, %function
tinygo_scanCurrentStack:
#endif
// Sources:
@ -12,12 +11,16 @@ tinygo_scanCurrentStack:
// * https://godbolt.org/z/qrvrEh
// Save callee-saved registers.
stp x29, x30, [sp, #-96]!
stp x29, x30, [sp, #-160]!
stp x28, x27, [sp, #16]
stp x26, x25, [sp, #32]
stp x24, x23, [sp, #48]
stp x22, x21, [sp, #64]
stp x20, x19, [sp, #80]
stp d8, d9, [sp, #96]
stp d10, d11, [sp, #112]
stp d12, d13, [sp, #128]
stp d14, d15, [sp, #144]
// Scan the stack.
mov x0, sp
@ -28,7 +31,7 @@ tinygo_scanCurrentStack:
#endif
// Restore stack state and return.
ldp x29, x30, [sp], #96
ldp x29, x30, [sp], #160
ret
@ -38,7 +41,6 @@ _tinygo_longjmp:
#else
.section .text.tinygo_longjmp
.global tinygo_longjmp
.type tinygo_longjmp, %function
tinygo_longjmp:
#endif
// Note: the code we jump to assumes x0 is set to a non-zero value if we

Просмотреть файл

@ -1,36 +0,0 @@
.section .text.tinygo_scanCurrentStack,"ax"
.global tinygo_scanCurrentStack
tinygo_scanCurrentStack:
// Sources:
// * https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170
// * https://godbolt.org/z/foc1xncvb
// Save callee-saved registers.
stp x29, x30, [sp, #-160]!
stp x28, x27, [sp, #16]
stp x26, x25, [sp, #32]
stp x24, x23, [sp, #48]
stp x22, x21, [sp, #64]
stp x20, x19, [sp, #80]
stp d8, d9, [sp, #96]
stp d10, d11, [sp, #112]
stp d12, d13, [sp, #128]
stp d14, d15, [sp, #144]
// Scan the stack.
mov x0, sp
bl tinygo_scanstack
// Restore stack state and return.
ldp x29, x30, [sp], #160
ret
.section .text.tinygo_longjmp,"ax"
.global tinygo_longjmp
tinygo_longjmp:
// Note: the code we jump to assumes x0 is set to a non-zero value if we
// jump from here (which is conveniently already the case).
ldp x1, x2, [x0] // jumpSP, jumpPC
mov sp, x1
br x2