From 9fa667ce6341fe653f2cac571dd0a262fb390cdf Mon Sep 17 00:00:00 2001 From: Nia Waldvogel Date: Mon, 22 Nov 2021 18:01:50 -0500 Subject: [PATCH] rumtime: implement __sync libcalls as critical sections This change implements __sync atomic polyfill libcalls by disabling interrupts. This was previously done in a limited capacity on some targets, but this change uses a go:generate to emit all of the calls on all microcontroller targets. --- src/runtime/arch_cortexm.go | 68 +---- src/runtime/arch_tinygoriscv.go | 105 +------ src/runtime/atomics_critical.go | 286 ++++++++++++++++++ src/runtime/runtime.go | 2 + .../gen-critical-atomics.go | 182 +++++++++++ 5 files changed, 472 insertions(+), 171 deletions(-) create mode 100644 src/runtime/atomics_critical.go create mode 100644 tools/gen-critical-atomics/gen-critical-atomics.go diff --git a/src/runtime/arch_cortexm.go b/src/runtime/arch_cortexm.go index e4c88609..fbc4427c 100644 --- a/src/runtime/arch_cortexm.go +++ b/src/runtime/arch_cortexm.go @@ -1,3 +1,4 @@ +//go:build cortexm // +build cortexm package runtime @@ -20,73 +21,6 @@ func getCurrentStackPointer() uintptr { return uintptr(stacksave()) } -// Documentation: -// * https://llvm.org/docs/Atomics.html -// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html -// -// In the case of Cortex-M, some atomic operations are emitted inline while -// others are emitted as libcalls. How many are emitted as libcalls depends on -// the MCU core variant (M3 and higher support some 32-bit atomic operations -// while M0 and M0+ do not). - -//export __sync_fetch_and_add_4 -func __sync_fetch_and_add_4(ptr *uint32, value uint32) uint32 { - mask := arm.DisableInterrupts() - oldValue := *ptr - *ptr = oldValue + value - arm.EnableInterrupts(mask) - return oldValue -} - -//export __sync_fetch_and_add_8 -func __sync_fetch_and_add_8(ptr *uint64, value uint64) uint64 { - mask := arm.DisableInterrupts() - oldValue := *ptr - *ptr = oldValue + value - arm.EnableInterrupts(mask) - return oldValue -} - -//export __sync_lock_test_and_set_4 -func __sync_lock_test_and_set_4(ptr *uint32, value uint32) uint32 { - mask := arm.DisableInterrupts() - oldValue := *ptr - *ptr = value - arm.EnableInterrupts(mask) - return oldValue -} - -//export __sync_lock_test_and_set_8 -func __sync_lock_test_and_set_8(ptr *uint64, value uint64) uint64 { - mask := arm.DisableInterrupts() - oldValue := *ptr - *ptr = value - arm.EnableInterrupts(mask) - return oldValue -} - -//export __sync_val_compare_and_swap_4 -func __sync_val_compare_and_swap_4(ptr *uint32, expected, desired uint32) uint32 { - mask := arm.DisableInterrupts() - oldValue := *ptr - if oldValue == expected { - *ptr = desired - } - arm.EnableInterrupts(mask) - return oldValue -} - -//export __sync_val_compare_and_swap_8 -func __sync_val_compare_and_swap_8(ptr *uint64, expected, desired uint64) uint64 { - mask := arm.DisableInterrupts() - oldValue := *ptr - if oldValue == expected { - *ptr = desired - } - arm.EnableInterrupts(mask) - return oldValue -} - // The safest thing to do here would just be to disable interrupts for // procPin/procUnpin. Note that a global variable is safe in this case, as any // access to procPinnedMask will happen with interrupts disabled. diff --git a/src/runtime/arch_tinygoriscv.go b/src/runtime/arch_tinygoriscv.go index 3b4ff798..d4ec961b 100644 --- a/src/runtime/arch_tinygoriscv.go +++ b/src/runtime/arch_tinygoriscv.go @@ -1,3 +1,4 @@ +//go:build tinygo.riscv // +build tinygo.riscv package runtime @@ -8,110 +9,6 @@ func getCurrentStackPointer() uintptr { return uintptr(stacksave()) } -// Documentation: -// * https://llvm.org/docs/Atomics.html -// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html -// -// In the case of RISC-V, some operations may be implemented with libcalls if -// the operation is too big to be handled by assembly. Officially, these calls -// should be implemented with a lock-free algorithm but as (as of this time) all -// supported RISC-V chips have a single hart, we can simply disable interrupts -// to get the same behavior. - -//export __atomic_load_4 -func __atomic_load_4(ptr *uint32, ordering int32) uint32 { - mask := riscv.DisableInterrupts() - value := *ptr - riscv.EnableInterrupts(mask) - return value -} - -//export __atomic_store_4 -func __atomic_store_4(ptr *uint32, value uint32, ordering int32) { - mask := riscv.DisableInterrupts() - *ptr = value - riscv.EnableInterrupts(mask) -} - -//export __atomic_exchange_4 -func __atomic_exchange_4(ptr *uint32, value uint32, ordering int32) uint32 { - mask := riscv.DisableInterrupts() - oldValue := *ptr - *ptr = value - riscv.EnableInterrupts(mask) - return oldValue -} - -//export __atomic_compare_exchange_4 -func __atomic_compare_exchange_4(ptr, expected *uint32, desired uint32, success_ordering, failure_ordering int32) bool { - mask := riscv.DisableInterrupts() - oldValue := *ptr - success := oldValue == *expected - if success { - *ptr = desired - } else { - *expected = oldValue - } - riscv.EnableInterrupts(mask) - return success -} - -//export __atomic_fetch_add_4 -func __atomic_fetch_add_4(ptr *uint32, value uint32, ordering int32) uint32 { - mask := riscv.DisableInterrupts() - oldValue := *ptr - *ptr = oldValue + value - riscv.EnableInterrupts(mask) - return oldValue -} - -//export __atomic_load_8 -func __atomic_load_8(ptr *uint64, ordering int32) uint64 { - mask := riscv.DisableInterrupts() - value := *ptr - riscv.EnableInterrupts(mask) - return value -} - -//export __atomic_store_8 -func __atomic_store_8(ptr *uint64, value uint64, ordering int32) { - mask := riscv.DisableInterrupts() - *ptr = value - riscv.EnableInterrupts(mask) -} - -//export __atomic_exchange_8 -func __atomic_exchange_8(ptr *uint64, value uint64, ordering int32) uint64 { - mask := riscv.DisableInterrupts() - oldValue := *ptr - *ptr = value - riscv.EnableInterrupts(mask) - return oldValue -} - -//export __atomic_compare_exchange_8 -func __atomic_compare_exchange_8(ptr, expected *uint64, desired uint64, success_ordering, failure_ordering int32) bool { - mask := riscv.DisableInterrupts() - oldValue := *ptr - success := oldValue == *expected - if success { - *ptr = desired - } else { - *expected = oldValue - } - riscv.EnableInterrupts(mask) - return success -} - -//export __atomic_fetch_add_8 -func __atomic_fetch_add_8(ptr *uint64, value uint64, ordering int32) uint64 { - mask := riscv.DisableInterrupts() - oldValue := *ptr - *ptr = oldValue + value - riscv.EnableInterrupts(mask) - return oldValue -} - // The safest thing to do here would just be to disable interrupts for // procPin/procUnpin. Note that a global variable is safe in this case, as any // access to procPinnedMask will happen with interrupts disabled. diff --git a/src/runtime/atomics_critical.go b/src/runtime/atomics_critical.go new file mode 100644 index 00000000..615426ae --- /dev/null +++ b/src/runtime/atomics_critical.go @@ -0,0 +1,286 @@ +//go:build baremetal && !tinygo.wasm +// +build baremetal,!tinygo.wasm + +// Automatically generated file. DO NOT EDIT. +// This file implements standins for non-native atomics using critical sections. + +package runtime + +import ( + "runtime/interrupt" + _ "unsafe" +) + +// Documentation: +// * https://llvm.org/docs/Atomics.html +// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html +// +// Some atomic operations are emitted inline while others are emitted as libcalls. +// How many are emitted as libcalls depends on the MCU arch and core variant. + +// 16-bit atomics. + +//export __atomic_load_2 +func __atomic_load_2(ptr *uint16, ordering uintptr) uint16 { + // The LLVM docs for this say that there is a val argument after the pointer. + // That is a typo, and the GCC docs omit it. + mask := interrupt.Disable() + val := *ptr + interrupt.Restore(mask) + return val +} + +//export __atomic_store_2 +func __atomic_store_2(ptr *uint16, val uint16, ordering uintptr) { + mask := interrupt.Disable() + *ptr = val + interrupt.Restore(mask) +} + +//go:inline +func doAtomicCAS16(ptr *uint16, expected, desired uint16) uint16 { + mask := interrupt.Disable() + old := *ptr + if old == expected { + *ptr = desired + } + interrupt.Restore(mask) + return old +} + +//export __sync_val_compare_and_swap_2 +func __sync_val_compare_and_swap_2(ptr *uint16, expected, desired uint16) uint16 { + return doAtomicCAS16(ptr, expected, desired) +} + +//export __atomic_compare_exchange_2 +func __atomic_compare_exchange_2(ptr, expected *uint16, desired uint16, successOrder, failureOrder uintptr) bool { + exp := *expected + old := doAtomicCAS16(ptr, exp, desired) + return old == exp +} + +//go:inline +func doAtomicSwap16(ptr *uint16, new uint16) uint16 { + mask := interrupt.Disable() + old := *ptr + *ptr = new + interrupt.Restore(mask) + return old +} + +//export __sync_lock_test_and_set_2 +func __sync_lock_test_and_set_2(ptr *uint16, new uint16) uint16 { + return doAtomicSwap16(ptr, new) +} + +//export __atomic_exchange_2 +func __atomic_exchange_2(ptr *uint16, new uint16, ordering uintptr) uint16 { + return doAtomicSwap16(ptr, new) +} + +//go:inline +func doAtomicAdd16(ptr *uint16, value uint16) (old, new uint16) { + mask := interrupt.Disable() + old = *ptr + new = old + value + *ptr = new + interrupt.Restore(mask) + return old, new +} + +//export __atomic_fetch_add_2 +func __atomic_fetch_add_2(ptr *uint16, value uint16, ordering uintptr) uint16 { + old, _ := doAtomicAdd16(ptr, value) + return old +} + +//export __sync_fetch_and_add_2 +func __sync_fetch_and_add_2(ptr *uint16, value uint16) uint16 { + old, _ := doAtomicAdd16(ptr, value) + return old +} + +//export __atomic_add_fetch_2 +func __atomic_add_fetch_2(ptr *uint16, value uint16, ordering uintptr) uint16 { + _, new := doAtomicAdd16(ptr, value) + return new +} + +// 32-bit atomics. + +//export __atomic_load_4 +func __atomic_load_4(ptr *uint32, ordering uintptr) uint32 { + // The LLVM docs for this say that there is a val argument after the pointer. + // That is a typo, and the GCC docs omit it. + mask := interrupt.Disable() + val := *ptr + interrupt.Restore(mask) + return val +} + +//export __atomic_store_4 +func __atomic_store_4(ptr *uint32, val uint32, ordering uintptr) { + mask := interrupt.Disable() + *ptr = val + interrupt.Restore(mask) +} + +//go:inline +func doAtomicCAS32(ptr *uint32, expected, desired uint32) uint32 { + mask := interrupt.Disable() + old := *ptr + if old == expected { + *ptr = desired + } + interrupt.Restore(mask) + return old +} + +//export __sync_val_compare_and_swap_4 +func __sync_val_compare_and_swap_4(ptr *uint32, expected, desired uint32) uint32 { + return doAtomicCAS32(ptr, expected, desired) +} + +//export __atomic_compare_exchange_4 +func __atomic_compare_exchange_4(ptr, expected *uint32, desired uint32, successOrder, failureOrder uintptr) bool { + exp := *expected + old := doAtomicCAS32(ptr, exp, desired) + return old == exp +} + +//go:inline +func doAtomicSwap32(ptr *uint32, new uint32) uint32 { + mask := interrupt.Disable() + old := *ptr + *ptr = new + interrupt.Restore(mask) + return old +} + +//export __sync_lock_test_and_set_4 +func __sync_lock_test_and_set_4(ptr *uint32, new uint32) uint32 { + return doAtomicSwap32(ptr, new) +} + +//export __atomic_exchange_4 +func __atomic_exchange_4(ptr *uint32, new uint32, ordering uintptr) uint32 { + return doAtomicSwap32(ptr, new) +} + +//go:inline +func doAtomicAdd32(ptr *uint32, value uint32) (old, new uint32) { + mask := interrupt.Disable() + old = *ptr + new = old + value + *ptr = new + interrupt.Restore(mask) + return old, new +} + +//export __atomic_fetch_add_4 +func __atomic_fetch_add_4(ptr *uint32, value uint32, ordering uintptr) uint32 { + old, _ := doAtomicAdd32(ptr, value) + return old +} + +//export __sync_fetch_and_add_4 +func __sync_fetch_and_add_4(ptr *uint32, value uint32) uint32 { + old, _ := doAtomicAdd32(ptr, value) + return old +} + +//export __atomic_add_fetch_4 +func __atomic_add_fetch_4(ptr *uint32, value uint32, ordering uintptr) uint32 { + _, new := doAtomicAdd32(ptr, value) + return new +} + +// 64-bit atomics. + +//export __atomic_load_8 +func __atomic_load_8(ptr *uint64, ordering uintptr) uint64 { + // The LLVM docs for this say that there is a val argument after the pointer. + // That is a typo, and the GCC docs omit it. + mask := interrupt.Disable() + val := *ptr + interrupt.Restore(mask) + return val +} + +//export __atomic_store_8 +func __atomic_store_8(ptr *uint64, val uint64, ordering uintptr) { + mask := interrupt.Disable() + *ptr = val + interrupt.Restore(mask) +} + +//go:inline +func doAtomicCAS64(ptr *uint64, expected, desired uint64) uint64 { + mask := interrupt.Disable() + old := *ptr + if old == expected { + *ptr = desired + } + interrupt.Restore(mask) + return old +} + +//export __sync_val_compare_and_swap_8 +func __sync_val_compare_and_swap_8(ptr *uint64, expected, desired uint64) uint64 { + return doAtomicCAS64(ptr, expected, desired) +} + +//export __atomic_compare_exchange_8 +func __atomic_compare_exchange_8(ptr, expected *uint64, desired uint64, successOrder, failureOrder uintptr) bool { + exp := *expected + old := doAtomicCAS64(ptr, exp, desired) + return old == exp +} + +//go:inline +func doAtomicSwap64(ptr *uint64, new uint64) uint64 { + mask := interrupt.Disable() + old := *ptr + *ptr = new + interrupt.Restore(mask) + return old +} + +//export __sync_lock_test_and_set_8 +func __sync_lock_test_and_set_8(ptr *uint64, new uint64) uint64 { + return doAtomicSwap64(ptr, new) +} + +//export __atomic_exchange_8 +func __atomic_exchange_8(ptr *uint64, new uint64, ordering uintptr) uint64 { + return doAtomicSwap64(ptr, new) +} + +//go:inline +func doAtomicAdd64(ptr *uint64, value uint64) (old, new uint64) { + mask := interrupt.Disable() + old = *ptr + new = old + value + *ptr = new + interrupt.Restore(mask) + return old, new +} + +//export __atomic_fetch_add_8 +func __atomic_fetch_add_8(ptr *uint64, value uint64, ordering uintptr) uint64 { + old, _ := doAtomicAdd64(ptr, value) + return old +} + +//export __sync_fetch_and_add_8 +func __sync_fetch_and_add_8(ptr *uint64, value uint64) uint64 { + old, _ := doAtomicAdd64(ptr, value) + return old +} + +//export __atomic_add_fetch_8 +func __atomic_add_fetch_8(ptr *uint64, value uint64, ordering uintptr) uint64 { + _, new := doAtomicAdd64(ptr, value) + return new +} diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go index dc4c9f87..f22e2cdd 100644 --- a/src/runtime/runtime.go +++ b/src/runtime/runtime.go @@ -4,6 +4,8 @@ import ( "unsafe" ) +//go:generate go run ../../tools/gen-critical-atomics -out ./atomics_critical.go + const Compiler = "tinygo" // The compiler will fill this with calls to the initialization function of each diff --git a/tools/gen-critical-atomics/gen-critical-atomics.go b/tools/gen-critical-atomics/gen-critical-atomics.go new file mode 100644 index 00000000..f444c1c3 --- /dev/null +++ b/tools/gen-critical-atomics/gen-critical-atomics.go @@ -0,0 +1,182 @@ +package main + +import ( + "bytes" + "flag" + "os" + "os/exec" + "strings" + "text/template" +) + +var tmpl = template.Must(template.New("go").Funcs(template.FuncMap{ + "mul": func(x, y int) int { + return x * y + }, + "tuple": func(v ...interface{}) []interface{} { + return v + }, + "title": strings.Title, +}).Parse(`//+build baremetal,!tinygo.wasm + +// Automatically generated file. DO NOT EDIT. +// This file implements standins for non-native atomics using critical sections. + +package runtime + +import ( + _ "unsafe" + "runtime/interrupt" +) + +// Documentation: +// * https://llvm.org/docs/Atomics.html +// * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html +// +// Some atomic operations are emitted inline while others are emitted as libcalls. +// How many are emitted as libcalls depends on the MCU arch and core variant. + +{{- define "load"}}{{$bits := mul . 8 -}} +//export __atomic_load_{{.}} +func __atomic_load_{{.}}(ptr *uint{{$bits}}, ordering uintptr) uint{{$bits}} { + // The LLVM docs for this say that there is a val argument after the pointer. + // That is a typo, and the GCC docs omit it. + mask := interrupt.Disable() + val := *ptr + interrupt.Restore(mask) + return val +} +{{end}} +{{- define "store"}}{{$bits := mul . 8 -}} +//export __atomic_store_{{.}} +func __atomic_store_{{.}}(ptr *uint{{$bits}}, val uint{{$bits}}, ordering uintptr) { + mask := interrupt.Disable() + *ptr = val + interrupt.Restore(mask) +} +{{end}} +{{- define "cas"}}{{$bits := mul . 8 -}} +//go:inline +func doAtomicCAS{{$bits}}(ptr *uint{{$bits}}, expected, desired uint{{$bits}}) uint{{$bits}} { + mask := interrupt.Disable() + old := *ptr + if old == expected { + *ptr = desired + } + interrupt.Restore(mask) + return old +} + +//export __sync_val_compare_and_swap_{{.}} +func __sync_val_compare_and_swap_{{.}}(ptr *uint{{$bits}}, expected, desired uint{{$bits}}) uint{{$bits}} { + return doAtomicCAS{{$bits}}(ptr, expected, desired) +} + +//export __atomic_compare_exchange_{{.}} +func __atomic_compare_exchange_{{.}}(ptr, expected *uint{{$bits}}, desired uint{{$bits}}, successOrder, failureOrder uintptr) bool { + exp := *expected + old := doAtomicCAS{{$bits}}(ptr, exp, desired) + return old == exp +} +{{end}} +{{- define "swap"}}{{$bits := mul . 8 -}} +//go:inline +func doAtomicSwap{{$bits}}(ptr *uint{{$bits}}, new uint{{$bits}}) uint{{$bits}} { + mask := interrupt.Disable() + old := *ptr + *ptr = new + interrupt.Restore(mask) + return old +} + +//export __sync_lock_test_and_set_{{.}} +func __sync_lock_test_and_set_{{.}}(ptr *uint{{$bits}}, new uint{{$bits}}) uint{{$bits}} { + return doAtomicSwap{{$bits}}(ptr, new) +} + +//export __atomic_exchange_{{.}} +func __atomic_exchange_{{.}}(ptr *uint{{$bits}}, new uint{{$bits}}, ordering uintptr) uint{{$bits}} { + return doAtomicSwap{{$bits}}(ptr, new) +} +{{end}} +{{- define "rmw"}} + {{- $opname := index . 0}} + {{- $bytes := index . 1}}{{$bits := mul $bytes 8}} + {{- $signed := index . 2}} + {{- $opdef := index . 3}} + +{{- $type := printf "int%d" $bits}} +{{- if not $signed}}{{$type = printf "u%s" $type}}{{end -}} +{{- $opfn := printf "doAtomic%s%d" (title $opname) $bits}} + +//go:inline +func {{$opfn}}(ptr *{{$type}}, value {{$type}}) (old, new {{$type}}) { + mask := interrupt.Disable() + old = *ptr + {{$opdef}} + *ptr = new + interrupt.Restore(mask) + return old, new +} + +//export __atomic_fetch_{{$opname}}_{{$bytes}} +func __atomic_fetch_{{$opname}}_{{$bytes}}(ptr *{{$type}}, value {{$type}}, ordering uintptr) {{$type}} { + old, _ := {{$opfn}}(ptr, value) + return old +} + +//export __sync_fetch_and_{{$opname}}_{{$bytes}} +func __sync_fetch_and_{{$opname}}_{{$bytes}}(ptr *{{$type}}, value {{$type}}) {{$type}} { + old, _ := {{$opfn}}(ptr, value) + return old +} + +//export __atomic_{{$opname}}_fetch_{{$bytes}} +func __atomic_{{$opname}}_fetch_{{$bytes}}(ptr *{{$type}}, value {{$type}}, ordering uintptr) {{$type}} { + _, new := {{$opfn}}(ptr, value) + return new +} +{{end}} +{{- define "atomics"}} +// {{mul . 8}}-bit atomics. + +{{/* These atomics are accessible directly from sync/atomic. */ -}} +{{template "load" .}} +{{template "store" .}} +{{template "cas" .}} +{{template "swap" .}} +{{template "rmw" (tuple "add" . false "new = old + value")}} + +{{- end}} +{{template "atomics" 2 -}} +{{template "atomics" 4 -}} +{{template "atomics" 8}} +`)) + +func main() { + var out string + flag.StringVar(&out, "out", "-", "output path") + flag.Parse() + f := os.Stdout + if out != "-" { + var err error + f, err = os.Create(out) + if err != nil { + panic(err) + } + defer f.Close() + } + var buf bytes.Buffer + err := tmpl.Execute(&buf, nil) + if err != nil { + panic(err) + } + cmd := exec.Command("gofmt") + cmd.Stdin = &buf + cmd.Stdout = f + cmd.Stderr = os.Stderr + err = cmd.Run() + if err != nil { + panic(err) + } +}