runtime: use MSP/PSP registers for scheduling on Cortex-M

The Cortex-M architecture contains two stack pointers, designed to be
used by RTOSes: MSP and PSP (where MSP is the default at reset). In
fact, the ARM documentation recommends using the PSP for tasks in a
RTOS.

This commit switches to using the PSP for goroutine stacks. Aside from
being the recommended operation, this has the big advantage that the
NVIC automatically switches to the MSP when handling interrupts. This
avoids having to make every goroutine stack big enough that interrupts
can be handled on it.

Additionally, I've optimized the assembly code to save/restore registers
(made possible by this change). For Cortex-M3 and up, saving all
registers is just a single push instruction and restoring+branching is a
single pop instruction. For Cortex-M0 it's a bit more work because the
push/pop instructions there don't support most high registers.

Sidenote: the fact that you can pop a number of registers and branch at
the same time makes ARM not exactly a true RISC system. However, it's
very useful in this case.
Этот коммит содержится в:
Ayke van Laethem 2019-11-19 21:39:43 +01:00 коммит произвёл Ron Evans
родитель ea5df0f214
коммит 3d3e48179e
2 изменённых файлов: 106 добавлений и 82 удалений

Просмотреть файл

@ -19,76 +19,96 @@ tinygo_startTask:
// After return, exit this goroutine. This is a tail call.
bl runtime.yield
.section .text.tinygo_getSystemStackPointer
.global tinygo_getSystemStackPointer
.type tinygo_getSystemStackPointer, %function
tinygo_getSystemStackPointer:
// The system stack pointer is always stored in the MSP register.
mrs r0, MSP
bx lr
// switchToScheduler and switchToTask are also in the same section, to make sure
// relative branches work.
.section .text.tinygo_swapTask
.global tinygo_switchToScheduler
.type tinygo_switchToScheduler, %function
tinygo_switchToScheduler:
// r0 = oldTask *task
// Currently on the task stack (SP=PSP). We need to store the position on
// the stack where the in-use registers will be stored.
mov r1, sp
subs r1, #36
str r1, [r0, #36]
b tinygo_swapTask
.global tinygo_switchToTask
.type tinygo_switchToTask, %function
tinygo_switchToTask:
// r0 = newTask *task
// Currently on the scheduler stack (SP=MSP). We'll have to update the PSP,
// and then we can invoke swapTask.
ldr r0, [r0, #36]
msr PSP, r0
// Continue executing in the swapTask function, which swaps the stack
// pointer.
.global tinygo_swapTask
.type tinygo_swapTask, %function
tinygo_swapTask:
// r0 = oldTask *task
// r1 = newTask *task
// This function stores the current register state to a task struct and
// loads the state of another task to replace the current state. Apart from
// saving and restoring all relevant callee-saved registers, it also ends
// with branching to the last program counter (saved as the lr register, to
// follow the ARM calling convention).
// This function stores the current register state to the stack, switches to
// the other stack (MSP/PSP), and loads the register state from the other
// stack. Apart from saving and restoring all relevant callee-saved
// registers, it also ends with branching to the last program counter (saved
// as the lr register, to follow the ARM calling convention).
// On pre-Thumb2 CPUs (Cortex-M0 in particular), registers r8-r15 cannot be
// used directly. Only very few operations work on them, such as mov. That's
// why the higher register values are first stored in the temporary register
// r3 when loading/storing them.
// It is possible to reduce the swapTask by two instructions (~2 cycles) on
// Cortex-M0 by reordering the layout of the pushed registers from {r4-r11,
// lr} to {r8-r11, r4-r8, lr}. However, that also requires a change on the
// Go side (depending on thumb1/thumb2!) and so is not really worth the
// complexity.
// Store state to old task. It saves the lr instead of the pc, because that
// will be the pc after returning back to the old task (in a different
// invocation of swapTask).
str r4, [r0, #0]
str r5, [r0, #4]
str r6, [r0, #8]
str r7, [r0, #12]
#if defined(__thumb2__)
str r8, [r0, #16]
str r9, [r0, #20]
str r10, [r0, #24]
str r11, [r0, #28]
str sp, [r0, #32]
str lr, [r0, #36]
push {r4-r11, lr}
#else
mov r3, r8
str r3, [r0, #16]
mov r3, r9
str r3, [r0, #20]
mov r3, r10
str r3, [r0, #24]
mov r0, r8
mov r1, r9
mov r2, r10
mov r3, r11
str r3, [r0, #28]
mov r3, sp
str r3, [r0, #32]
mov r3, lr
str r3, [r0, #36]
push {r0-r3, lr}
push {r4-r7}
#endif
// Switch the stack. This could either switch from PSP to MSP, or from MSP
// to PSP. By using an XOR (eor), it will just switch to the other stack.
mrs r0, CONTROL // load CONTROL register
movs r3, #2
eors r0, r0, r3 // flip the SPSEL (active stack pointer) bit
msr CONTROL, r0 // store CONTROL register
isb // required to flush the pipeline
// Load state from new task and branch to the previous position in the
// program.
ldr r4, [r1, #0]
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
#if defined(__thumb2__)
ldr r8, [r1, #16]
ldr r9, [r1, #20]
ldr r10, [r1, #24]
ldr r11, [r1, #28]
ldr sp, [r1, #32]
pop {r4-r11, pc}
#else
ldr r3, [r1, #16]
mov r8, r3
ldr r3, [r1, #20]
mov r9, r3
ldr r3, [r1, #24]
mov r10, r3
ldr r3, [r1, #28]
pop {r4-r7}
pop {r0-r3}
mov r8, r0
mov r9, r1
mov r10, r2
mov r11, r3
ldr r3, [r1, #32]
mov sp, r3
pop {pc}
#endif
ldr r3, [r1, #36]
bx r3

Просмотреть файл

@ -12,8 +12,7 @@ const stackSize = 1024
const stackCanary = uintptr(uint64(0x670c1333b83bf575) & uint64(^uintptr(0)))
var (
schedulerState = task{canary: stackCanary}
currentTask *task // currently running goroutine, or nil
currentTask *task // currently running goroutine, or nil
)
// This type points to the bottom of the goroutine stack and contains some state
@ -22,10 +21,10 @@ var (
type task struct {
// The order of fields in this structs must be kept in sync with assembly!
calleeSavedRegs
sp uintptr
pc uintptr
sp uintptr
taskState
canary uintptr // used to detect stack overflows
canaryPtr *uintptr // used to detect stack overflows
}
// getCoroutine returns the currently executing goroutine. It is used as an
@ -47,26 +46,24 @@ func (t *task) state() *taskState {
// to the scheduler.
func (t *task) resume() {
currentTask = t
swapTask(&schedulerState, t)
switchToTask(t)
currentTask = nil
}
// swapTask saves the current state to oldTask (which must contain the current
// task state) and switches to newTask. Note that this function usually does
// return, when another task (perhaps newTask) switches back to the current
// task.
//
// As an additional protection, before switching tasks, it checks whether this
// goroutine has overflowed the stack.
func swapTask(oldTask, newTask *task) {
if oldTask.canary != stackCanary {
runtimePanic("goroutine stack overflow")
}
swapTaskLower(oldTask, newTask)
}
// switchToScheduler saves the current state on the stack, saves the current
// stack pointer in the task, and switches to the scheduler. It must only be
// called when actually running on this task.
// When it returns, the scheduler has switched back to this task (for example,
// after a blocking operation completed).
//export tinygo_switchToScheduler
func switchToScheduler(t *task)
//go:linkname swapTaskLower tinygo_swapTask
func swapTaskLower(oldTask, newTask *task)
// switchToTask switches from the scheduler to the task. It must only be called
// from the scheduler.
// When this function returns, the task just yielded control back to the
// scheduler.
//export tinygo_switchToTask
func switchToTask(t *task)
// startTask is a small wrapper function that sets up the first (and only)
// argument to the new goroutine and makes sure it is exited when the goroutine
@ -79,11 +76,20 @@ var startTask [0]uint8
// adds it to the runqueue.
func startGoroutine(fn, args uintptr) {
stack := alloc(stackSize)
t := (*task)(stack)
t.sp = uintptr(stack) + stackSize
t := (*task)(unsafe.Pointer(uintptr(stack) + stackSize - unsafe.Sizeof(task{})))
// Set up the stack canary, a random number that should be checked when
// switching from the task back to the scheduler. The stack canary pointer
// points to the first word of the stack. If it has changed between now and
// the next stack switch, there was a stack overflow.
t.canaryPtr = (*uintptr)(unsafe.Pointer(stack))
*t.canaryPtr = stackCanary
// Store the initial sp/pc for the startTask function (implemented in
// assembly).
t.sp = uintptr(stack) + stackSize - unsafe.Sizeof(task{})
t.pc = uintptr(unsafe.Pointer(&startTask))
t.prepareStartTask(fn, args)
t.canary = stackCanary
scheduleLogTask(" start goroutine:", t)
runqueuePushBack(t)
}
@ -92,17 +98,15 @@ func startGoroutine(fn, args uintptr) {
// any wakeups must be configured before calling yield
//export runtime.yield
func yield() {
swapTask(currentTask, &schedulerState)
// Check whether the canary (the lowest address of the stack) is still
// valid. If it is not, a stack overflow has occured.
if *currentTask.canaryPtr != stackCanary {
runtimePanic("goroutine stack overflow")
}
switchToScheduler(currentTask)
}
// getSystemStackPointer returns the current stack pointer of the system stack.
// This is not necessarily the same as the current stack pointer.
func getSystemStackPointer() uintptr {
if currentTask == nil {
// Currently on the system stack.
return getCurrentStackPointer()
} else {
// Currently in a goroutine.
return schedulerState.sp
}
}
//export tinygo_getSystemStackPointer
func getSystemStackPointer() uintptr