diff --git a/src/device/esp/esp32.S b/src/device/esp/esp32.S index af8e9d66..f8fc6f93 100644 --- a/src/device/esp/esp32.S +++ b/src/device/esp/esp32.S @@ -24,7 +24,7 @@ call_start_cpu0: wsr.ps a2 rsync - // Set WINDOWBASE to 1 << WINDOWSTART. + // Set WINDOWSTART to 1 << WINDOWBASE. rsr.windowbase a2 ssl a2 movi a2, 1 @@ -43,7 +43,7 @@ call_start_cpu0: rsync // Jump to the runtime start function written in Go. - j main + call4 main .section .text.tinygo_scanCurrentStack .global tinygo_scanCurrentStack diff --git a/src/internal/task/task_stack_esp32.S b/src/internal/task/task_stack_esp32.S new file mode 100644 index 00000000..364759b1 --- /dev/null +++ b/src/internal/task/task_stack_esp32.S @@ -0,0 +1,86 @@ +.section .text.tinygo_startTask,"ax",@progbits +.global tinygo_startTask +.type tinygo_startTask, %function +tinygo_startTask: + // Small assembly stub for starting a goroutine. This already runs on the + // new stack, control reaches this function after returning from the initial + // tinygo_swapTask below (the retw.n instruction). + // + // The stack was set up in such a way that it looks as if this function was + // paused using tinygo_swapTask by setting up the parent register window and + // return pointer as a call4 instruction - except such a call never took + // place. Instead, the stack pointer is switched to the new stack after all + // live-but-invisible registers have been flushed to the stack. This means + // that all registers as present in tinygo_swapTask are moved four up (a2 in + // tinygo_swapTask is a6 in this function). We don't use any of those + // registers however. Instead, the retw.n instruction will load them through + // an underflow exception from the stack which means we get a0-a3 as defined + // in task_stack_esp32.go. + + // Branch to the "goroutine start" function. The first (and only) parameter + // is stored in a2, but has to be moved to a6 to make it appear as a2 in the + // goroutine start function (due to changing the register window by four + // with callx4). + mov.n a6, a2 + callx4 a3 + + // After return, exit this goroutine. This call never returns. + call4 tinygo_pause + +.section .text.tinygo_swapTask,"ax",@progbits +.global tinygo_swapTask +.type tinygo_swapTask, %function +tinygo_swapTask: + // This function gets the following parameters: + // a2 = newStack uintptr + // a3 = oldStack *uintptr + + // Reserve 32 bytes on the stack. It really needs to be 32 bytes, with 16 + // extra at the bottom to adhere to the ABI. + entry sp, 32 + + // Disable interrupts while flushing registers. This is necessary because + // interrupts might want to use the stack pointer (at a2) which will be some + // arbitrary register while registers are flushed. + rsil a4, 3 // XCHAL_EXCM_LEVEL + + // Flush all unsaved registers to the stack. + // This trick has been borrowed from the Zephyr project: + // https://github.com/zephyrproject-rtos/zephyr/blob/d79b003758/arch/xtensa/include/xtensa-asm2-s.h#L17 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 4 + + // Restore interrupts. + wsr.ps a4 + + // At this point, the following is true: + // WindowStart == 1 << WindowBase + // Therefore, we don't need to do this manually. + // It also means that the stack pointer can now be safely modified. + + // Save a0, which stores the return address and the parent register window + // in the upper two bits. + s32i.n a0, sp, 0 + + // Save the current stack pointer in oldStack. + s32i.n sp, a3, 0 + + // Switch to the new stack pointer (newStack). + mov.n sp, a2 + + // Load a0, which is the previous return addres from before the previous + // switch or the constructed return address to tinygo_startTask. This + // register also stores the parent register window. + l32i.n a0, sp, 0 + + // Return into the new stack. This instruction will trigger a window + // underflow, reloading the saved registers from the stack. + retw.n diff --git a/src/internal/task/task_stack_esp32.go b/src/internal/task/task_stack_esp32.go new file mode 100644 index 00000000..ee1046b0 --- /dev/null +++ b/src/internal/task/task_stack_esp32.go @@ -0,0 +1,76 @@ +// +build scheduler.tasks,esp32 + +package task + +// The windowed ABI (used on the ESP32) is as follows: +// a0: return address (link register) +// a1: stack pointer (must be 16-byte aligned) +// a2-a7: incoming arguments +// a7: stack frame pointer (optional, normally unused in TinyGo) +// Sources: +// http://cholla.mmto.org/esp8266/xtensa.html +// https://0x04.net/~mwk/doc/xtensa.pdf + +import ( + "unsafe" +) + +var systemStack uintptr + +// calleeSavedRegs is the list of registers that must be saved and restored when +// switching between tasks. Also see task_stack_esp8266.S that relies on the +// exact layout of this struct. +type calleeSavedRegs struct { + // Registers in the register window of tinygo_startTask. + a0 uintptr + a1 uintptr + a2 uintptr + a3 uintptr + + // Locals that can be used by tinygo_swapTask. + // The first field is the a0 loaded in tinygo_swapTask, the rest is unused. + locals [4]uintptr +} + +// archInit runs architecture-specific setup for the goroutine startup. +func (s *state) archInit(r *calleeSavedRegs, fn uintptr, args unsafe.Pointer) { + // Store the stack pointer for the tinygo_swapTask function (implemented in + // assembly). It needs to point to the locals field instead of a0 so that + // the retw.n at the end of tinygo_swapTask will return into + // tinygo_startTask with a0-a3 loaded (using the register window mechanism). + s.sp = uintptr(unsafe.Pointer(&r.locals[0])) + + // Start the goroutine at tinygo_startTask (defined in + // src/internal/task/task_stack_esp32.S). The topmost two bits are not part + // of the address but instead store the register window of the caller. + // In this case there is no caller, instead we set up the return address as + // if tinygo_startTask called tinygo_swapTask with a call4 instruction. + r.locals[0] = uintptr(unsafe.Pointer(&startTask))&^(3<<30) | (1 << 30) + + // Set up the stack pointer inside tinygo_startTask. + // Unlike most calling conventions, the windowed ABI actually saves the + // stack pointer on the stack to make register windowing work. + r.a1 = uintptr(unsafe.Pointer(r)) + 32 + + // Store the function pointer and the (only) parameter on the stack in a + // location that will be reloaded into registers when doing the + // pseudo-return to tinygo_startTask using the register window mechanism. + r.a3 = fn + r.a2 = uintptr(args) +} + +func (s *state) resume() { + swapTask(s.sp, &systemStack) +} + +func (s *state) pause() { + newStack := systemStack + systemStack = 0 + swapTask(newStack, &s.sp) +} + +// SystemStack returns the system stack pointer when called from a task stack. +// When called from the system stack, it returns 0. +func SystemStack() uintptr { + return systemStack +} diff --git a/src/runtime/arch_xtensa.go b/src/runtime/arch_xtensa.go index 440521b6..a9854af0 100644 --- a/src/runtime/arch_xtensa.go +++ b/src/runtime/arch_xtensa.go @@ -2,6 +2,8 @@ package runtime +import "device" + const GOARCH = "arm" // xtensa pretends to be arm // The bitness of the CPU (e.g. 8, 32, 64). @@ -12,4 +14,7 @@ func align(ptr uintptr) uintptr { return (ptr + 3) &^ 3 } -func getCurrentStackPointer() uintptr +func getCurrentStackPointer() uintptr { + // The stack pointer (sp) is a1. + return device.AsmFull("mov {}, sp", nil) +} diff --git a/targets/esp32.json b/targets/esp32.json index 9e7b69b9..b83ab4f1 100644 --- a/targets/esp32.json +++ b/targets/esp32.json @@ -2,13 +2,16 @@ "inherits": ["xtensa"], "cpu": "esp32", "build-tags": ["esp32", "esp"], + "scheduler": "tasks", "linker": "xtensa-esp32-elf-ld", + "default-stack-size": 2048, "cflags": [ "-mcpu=esp32" ], "linkerscript": "targets/esp32.ld", "extra-files": [ - "src/device/esp/esp32.S" + "src/device/esp/esp32.S", + "src/internal/task/task_stack_esp32.S" ], "binary-format": "esp32", "flash-command": "esptool.py --chip=esp32 --port {port} write_flash 0x1000 {bin} -ff 80m -fm dout"