diff --git a/src/internal/task/task_stack_amd64_windows.S b/src/internal/task/task_stack_amd64_windows.S index bdbcfca6..30654e50 100644 --- a/src/internal/task/task_stack_amd64_windows.S +++ b/src/internal/task/task_stack_amd64_windows.S @@ -1,5 +1,8 @@ // Windows on amd64 has a slightly different ABI than other (*nix) systems. // Therefore, assembly functions need to be tweaked slightly. +// +// The calling convention is described here: +// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170 .section .text.tinygo_startTask,"ax" .global tinygo_startTask @@ -17,8 +20,9 @@ tinygo_startTask: // Branch to the "goroutine start" function. callq *%r12 - // After return, exit this goroutine. This is a tail call. - jmp tinygo_pause + // After return, exit this goroutine. + // This has to be a call, not a jump, to keep the stack correctly aligned. + callq tinygo_pause .global tinygo_swapTask .section .text.tinygo_swapTask,"ax" @@ -35,6 +39,17 @@ tinygo_swapTask: pushq %rsi pushq %rdi pushq %rbp + sub $160, %rsp + movaps %xmm6, 144(%rsp) + movaps %xmm7, 128(%rsp) + movaps %xmm8, 112(%rsp) + movaps %xmm9, 96(%rsp) + movaps %xmm10, 80(%rsp) + movaps %xmm11, 64(%rsp) + movaps %xmm12, 48(%rsp) + movaps %xmm13, 32(%rsp) + movaps %xmm14, 16(%rsp) + movaps %xmm15, 0(%rsp) pushq %rbx // Save the current stack pointer in oldStack. @@ -45,6 +60,17 @@ tinygo_swapTask: // Load saved register from the new stack. popq %rbx + movaps 0(%rsp), %xmm15 + movaps 16(%rsp), %xmm14 + movaps 32(%rsp), %xmm13 + movaps 48(%rsp), %xmm12 + movaps 64(%rsp), %xmm11 + movaps 80(%rsp), %xmm10 + movaps 96(%rsp), %xmm9 + movaps 112(%rsp), %xmm8 + movaps 128(%rsp), %xmm7 + movaps 144(%rsp), %xmm6 + add $160, %rsp popq %rbp popq %rdi popq %rsi diff --git a/src/internal/task/task_stack_amd64_windows.go b/src/internal/task/task_stack_amd64_windows.go index 41ddbe6c..ee90ec9c 100644 --- a/src/internal/task/task_stack_amd64_windows.go +++ b/src/internal/task/task_stack_amd64_windows.go @@ -13,15 +13,34 @@ var systemStack uintptr // calleeSavedRegs is the list of registers that must be saved and restored when // switching between tasks. Also see task_stack_amd64_windows.S that relies on // the exact layout of this struct. +// The calling convention is described here: +// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170 +// Most importantly, these are the registers we need to save/restore: +// +// > The x64 ABI considers registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, +// > R15, and XMM6-XMM15 nonvolatile. They must be saved and restored by a +// > function that uses them. type calleeSavedRegs struct { - rbx uintptr - rbp uintptr - rdi uintptr - rsi uintptr - r12 uintptr - r13 uintptr - r14 uintptr - r15 uintptr + // Note: rbx is placed here so that the stack is correctly aligned when + // loading/storing the xmm registers. + rbx uintptr + xmm15 [2]uint64 + xmm14 [2]uint64 + xmm13 [2]uint64 + xmm12 [2]uint64 + xmm11 [2]uint64 + xmm10 [2]uint64 + xmm9 [2]uint64 + xmm8 [2]uint64 + xmm7 [2]uint64 + xmm6 [2]uint64 + rbp uintptr + rdi uintptr + rsi uintptr + r12 uintptr + r13 uintptr + r14 uintptr + r15 uintptr pc uintptr }