diff --git a/Makefile b/Makefile index 507075dc..8af76c44 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ LLVM := $(shell go env GOPATH)/src/github.com/aykevl/llvm/bindings/go/llvm/workd LINK = $(LLVM)llvm-link LLC = $(LLVM)llc LLAS = $(LLVM)llvm-as +OPT = $(LLVM)opt CFLAGS = -Wall -Werror -Os -g -fno-exceptions -flto -ffunction-sections -fdata-sections $(LLFLAGS) @@ -72,10 +73,15 @@ build/tgo: *.go @mkdir -p build go build -o build/tgo -i . -# Build textual IR with the Go compiler. -build/%.o: src/examples/% src/examples/%/*.go build/tgo src/runtime/*.go build/runtime-$(TARGET)-combined.bc +# Build IR with the Go compiler. +build/%.bc: src/examples/% src/examples/%/*.go build/tgo src/runtime/*.go build/runtime-$(TARGET)-combined.bc ./build/tgo $(TGOFLAGS) -printir -runtime build/runtime-$(TARGET)-combined.bc -o $@ $(subst src/,,$<) +# Compile and optimize bitcode file. +build/%.o: build/%.bc + $(OPT) -coro-early -coro-split -coro-elide -O1 -coro-cleanup -o $< $< + $(LLC) -filetype=obj -o $@ $< + # Compile C sources for the runtime. build/%.bc: src/runtime/%.c src/runtime/*.h @mkdir -p build diff --git a/analysis.go b/analysis.go new file mode 100644 index 00000000..8042aedd --- /dev/null +++ b/analysis.go @@ -0,0 +1,161 @@ + +package main + +import ( + "golang.org/x/tools/go/ssa" +) + +// Analysis results over a whole program. +type Analysis struct { + functions map[*ssa.Function]*FuncMeta + needsScheduler bool + goCalls []*ssa.Go +} + +// Some analysis results of a single function. +type FuncMeta struct { + f *ssa.Function + blocking bool + parents []*ssa.Function // calculated by AnalyseCallgraph + children []*ssa.Function +} + +// Return a new Analysis object. +func NewAnalysis() *Analysis { + return &Analysis{ + functions: make(map[*ssa.Function]*FuncMeta), + } +} + +// Add a given package to the analyzer, to be analyzed later. +func (a *Analysis) AddPackage(pkg *ssa.Package) { + for _, member := range pkg.Members { + switch member := member.(type) { + case *ssa.Function: + a.addFunction(member) + case *ssa.Type: + ms := pkg.Prog.MethodSets.MethodSet(member.Type()) + for i := 0; i < ms.Len(); i++ { + a.addFunction(pkg.Prog.MethodValue(ms.At(i))) + } + } + } +} + +// Analyze the given function quickly without any recursion, and add it to the +// list of functions in the analyzer. +func (a *Analysis) addFunction(f *ssa.Function) { + fm := &FuncMeta{} + for _, block := range f.Blocks { + for _, instr := range block.Instrs { + switch instr := instr.(type) { + case *ssa.Call: + switch call := instr.Call.Value.(type) { + case *ssa.Function: + name := getFunctionName(call, false) + if name == "runtime.Sleep" { + fm.blocking = true + } + fm.children = append(fm.children, call) + } + case *ssa.Go: + a.goCalls = append(a.goCalls, instr) + } + } + } + a.functions[f] = fm + + for _, child := range f.AnonFuncs { + a.addFunction(child) + } +} + +// Fill in parents of all functions. +// +// All packages need to be added before this pass can run, or it will produce +// incorrect results. +func (a *Analysis) AnalyseCallgraph() { + for f, fm := range a.functions { + for _, child := range fm.children { + childRes, ok := a.functions[child] + if !ok { + print("child not found: " + child.Pkg.Pkg.Path() + "." + child.Name() + ", function: " + f.Name()) + continue + } + childRes.parents = append(childRes.parents, f) + } + } +} + +// Analyse which functions are recursively blocking. +// +// Depends on AnalyseCallgraph. +func (a *Analysis) AnalyseBlockingRecursive() { + worklist := make([]*FuncMeta, 0) + + // Fill worklist with directly blocking functions. + for _, fm := range a.functions { + if fm.blocking { + worklist = append(worklist, fm) + } + } + + // Keep reducing this worklist by marking a function as recursively blocking + // from the worklist and pushing all its parents that are non-blocking. + // This is somewhat similar to a worklist in a mark-sweep garbage collector. + // The work items are then grey objects. + for len(worklist) != 0 { + // Pick the topmost. + fm := worklist[len(worklist)-1] + worklist = worklist[:len(worklist)-1] + for _, parent := range fm.parents { + parentfm := a.functions[parent] + if !parentfm.blocking { + parentfm.blocking = true + worklist = append(worklist, parentfm) + } + } + } +} + +// Check whether we need a scheduler. This is only necessary when there are go +// calls that start blocking functions (if they're not blocking, the go function +// can be turned into a regular function call). +// +// Depends on AnalyseBlockingRecursive. +func (a *Analysis) AnalyseGoCalls() { + for _, instr := range a.goCalls { + if a.isBlocking(instr.Call.Value) { + a.needsScheduler = true + } + } +} + +// Whether this function needs a scheduler. +// +// Depends on AnalyseGoCalls. +func (a *Analysis) NeedsScheduler() bool { + return a.needsScheduler +} + +// Whether this function blocks. Builtins are also accepted for convenience. +// They will always be non-blocking. +// +// Depends on AnalyseBlockingRecursive. +func (a *Analysis) IsBlocking(f ssa.Value) bool { + if !a.needsScheduler { + return false + } + return a.isBlocking(f) +} + +func (a *Analysis) isBlocking(f ssa.Value) bool { + switch f := f.(type) { + case *ssa.Builtin: + return false + case *ssa.Function: + return a.functions[f].blocking + default: + panic("Analysis.IsBlocking on unknown type") + } +} diff --git a/src/examples/blinky/blinky.go b/src/examples/blinky/blinky.go index 10aaeb79..b282806e 100644 --- a/src/examples/blinky/blinky.go +++ b/src/examples/blinky/blinky.go @@ -7,15 +7,34 @@ import ( ) func main() { - led := machine.GPIO{17} // LED 1 on the PCA10040 + go led1() + led2() +} + +func led1() { + led := machine.GPIO{machine.LED} led.Configure(machine.GPIOConfig{Mode: machine.GPIO_OUTPUT}) for { - println("LED on") - led.Set(false) - runtime.Sleep(runtime.Millisecond * 500) + println("+") + led.Low() + runtime.Sleep(runtime.Millisecond * 1000) - println("LED off") - led.Set(true) - runtime.Sleep(runtime.Millisecond * 500) + println("-") + led.High() + runtime.Sleep(runtime.Millisecond * 1000) + } +} + +func led2() { + led := machine.GPIO{machine.LED2} + led.Configure(machine.GPIOConfig{Mode: machine.GPIO_OUTPUT}) + for { + println(" +") + led.Low() + runtime.Sleep(runtime.Millisecond * 420) + + println(" -") + led.High() + runtime.Sleep(runtime.Millisecond * 420) } } diff --git a/src/examples/hello/hello.go b/src/examples/hello/hello.go index 607f3922..a9762d65 100644 --- a/src/examples/hello/hello.go +++ b/src/examples/hello/hello.go @@ -25,6 +25,16 @@ func main() { printItf(5) printItf(byte('x')) printItf("foo") + + runFunc(hello) // must be indirect to avoid obvious inlining +} + +func runFunc(f func()) { + f() +} + +func hello() { + println("hello from function pointer!") } func strlen(s string) int { diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go index ac8ee4ca..81c73560 100644 --- a/src/runtime/runtime.go +++ b/src/runtime/runtime.go @@ -6,6 +6,13 @@ const Compiler = "tgo" // The bitness of the CPU (e.g. 8, 32, 64). Set by the compiler as a constant. var TargetBits uint8 +func Sleep(d Duration) { + // This function is treated specially by the compiler: when goroutines are + // used, it is transformed into a llvm.coro.suspend() call. + // When goroutines are not used this function behaves as normal. + sleep(d) +} + func _panic(message interface{}) { printstring("panic: ") printitf(message) diff --git a/src/runtime/runtime.ll b/src/runtime/runtime.ll index fa271f16..677e5152 100644 --- a/src/runtime/runtime.ll +++ b/src/runtime/runtime.ll @@ -2,9 +2,26 @@ source_filename = "runtime/runtime.ll" declare void @runtime.initAll() declare void @main.main() +declare i8* @main.main$async(i8*) +declare void @runtime.scheduler(i8*) + +; Will be changed to true if there are 'go' statements in the compiled program. +@.has_scheduler = private unnamed_addr constant i1 false define i32 @main() { call void @runtime.initAll() + %has_scheduler = load i1, i1* @.has_scheduler + ; This branch will be optimized away. Only one of the targets will remain. + br i1 %has_scheduler, label %with_scheduler, label %without_scheduler + +with_scheduler: + ; Initialize main and run the scheduler. + %main = call i8* @main.main$async(i8* null) + call void @runtime.scheduler(i8* %main) + ret i32 0 + +without_scheduler: + ; No scheduler is necessary. Call main directly. call void @main.main() ret i32 0 } diff --git a/src/runtime/runtime_nrf.go b/src/runtime/runtime_nrf.go index de0e2b3e..da740adf 100644 --- a/src/runtime/runtime_nrf.go +++ b/src/runtime/runtime_nrf.go @@ -45,8 +45,32 @@ func putchar(c byte) { nrf.UART0.EVENTS_TXDRDY = 0 } -func Sleep(d Duration) { - C.rtc_sleep(C.uint32_t(d / 32)) // TODO: not accurate (must be d / 30.5175...) +func sleep(d Duration) { + ticks64 := d / 32 + for ticks64 != 0 { + monotime() // update timestamp + ticks := uint32(ticks64) & 0x7fffff // 23 bits (to be on the safe side) + C.rtc_sleep(C.uint32_t(ticks)) // TODO: not accurate (must be d / 30.5175...) + ticks64 -= Duration(ticks) + } +} + +var ( + timestamp uint64 // microseconds since boottime + rtcLastCounter uint32 // 24 bits ticks +) + +// Monotonically increasing numer of microseconds since start. +// +// Note: very long pauses between measurements (more than 8 minutes) may +// overflow the counter, leading to incorrect results. This might be fixed by +// handling the overflow event. +func monotime() uint64 { + rtcCounter := uint32(nrf.RTC0.COUNTER) + offset := (rtcCounter - rtcLastCounter) % 0xffffff // change since last measurement + rtcLastCounter = rtcCounter + timestamp += uint64(offset * 32) // TODO: not precise + return timestamp } func abort() { diff --git a/src/runtime/runtime_unix.go b/src/runtime/runtime_unix.go index 5ba18fef..1cb8256a 100644 --- a/src/runtime/runtime_unix.go +++ b/src/runtime/runtime_unix.go @@ -10,6 +10,7 @@ import ( // #include // #include // #include +// #include import "C" const Microsecond = 1 @@ -18,10 +19,20 @@ func putchar(c byte) { C.putchar(C.int(c)) } -func Sleep(d Duration) { +func sleep(d Duration) { C.usleep(C.useconds_t(d)) } +// Return monotonic time in microseconds. +// +// TODO: use nanoseconds? +// TODO: noescape +func monotime() uint64 { + var ts C.struct_timespec + C.clock_gettime(C.CLOCK_MONOTONIC, &ts) + return uint64(ts.tv_sec) * 1000 * 1000 + uint64(ts.tv_nsec) / 1000 +} + func abort() { C.abort() } @@ -35,5 +46,5 @@ func alloc(size uintptr) unsafe.Pointer { } func free(ptr unsafe.Pointer) { - C.free(ptr) + //C.free(ptr) // TODO } diff --git a/src/runtime/scheduler.go b/src/runtime/scheduler.go new file mode 100644 index 00000000..3af75017 --- /dev/null +++ b/src/runtime/scheduler.go @@ -0,0 +1,249 @@ + +package runtime + +// This file implements the Go scheduler using coroutines. +// A goroutine contains a whole stack. A coroutine is just a single function. +// How do we use coroutines for goroutines, then? +// * Every function that contains a blocking call (like sleep) is marked +// blocking, and all it's parents (callers) are marked blocking as well +// transitively until the root (main.main or a go statement). +// * A blocking function that calls a non-blocking function is called as +// usual. +// * A blocking function that calls a blocking function passes its own +// coroutine handle as a parameter to the subroutine and will make sure it's +// own coroutine is removed from the scheduler. When the subroutine returns, +// it will re-insert the parent into the scheduler. +// Note that a goroutine is generally called a 'task' for brevity and because +// that's the more common term among RTOSes. But a goroutine and a task are +// basically the same thing. Although, the code often uses the word 'task' to +// refer to both a coroutine and a goroutine, as most of the scheduler isn't +// aware of the difference. +// +// For more background on coroutines in LLVM: +// https://llvm.org/docs/Coroutines.html + +import ( + "unsafe" +) + +// State/promise of a task. Internally represented as: +// +// {i8 state, i32 data, i8* next} +type taskState struct { + state uint8 + data uint32 + next taskInstance +} + +// Pointer to a task. Wrap unsafe.Pointer to provide some sort of type safety. +type taskInstance unsafe.Pointer + +// Various states a task can be in. Not always updated (especially +// TASK_STATE_RUNNABLE). +const ( + TASK_STATE_RUNNABLE = iota + TASK_STATE_SLEEP + TASK_STATE_CALL // waiting for a sub-coroutine +) + +// Queues used by the scheduler. +// +// TODO: runqueueFront can be removed by making the run queue a circular linked +// list. The runqueueBack will simply refer to the front in the 'next' pointer. +var ( + runqueueFront taskInstance + runqueueBack taskInstance + sleepQueue taskInstance + sleepQueueBaseTime uint64 +) + +// Translated to void @llvm.coro.resume(i8*). +func _llvm_coro_resume(taskInstance) + +// Translated to void @llvm.coro.destroy(i8*). +func _llvm_coro_destroy(taskInstance) + +// Translated to i1 @llvm.coro.done(i8*). +func _llvm_coro_done(taskInstance) bool + +// Translated to i8* @llvm.coro.promise(i8*, i32, i1). +func _llvm_coro_promise(taskInstance, int32, bool) unsafe.Pointer + +// Get the promise belonging to a task. +func taskPromise(t taskInstance) *taskState { + return (*taskState)(_llvm_coro_promise(t, 4, false)) +} + +// Simple logging, for debugging. +func scheduleLog(msg string) { + //println(msg) +} + +// Simple logging with a task pointer, for debugging. +func scheduleLogTask(msg string, t taskInstance) { + //println(msg, t) +} + +// Set the task state to sleep for a given time. +// +// This is a compiler intrinsic. +func sleepTask(caller taskInstance, duration Duration) { + promise := taskPromise(caller) + promise.state = TASK_STATE_SLEEP + promise.data = uint32(duration) // TODO: longer durations +} + +// Wait for the result of an async call. This means that the parent goroutine +// will be removed from the runqueue and be rescheduled by the callee. +// +// This is a compiler intrinsic. +func waitForAsyncCall(caller taskInstance) { + promise := taskPromise(caller) + promise.state = TASK_STATE_CALL +} + +// Add a task to the runnable or sleep queue, depending on the state. +// +// This is a compiler intrinsic. +func scheduleTask(t taskInstance) { + if t == nil { + return + } + scheduleLogTask(" schedule task:", t) + // See what we should do with this task: try to execute it directly + // again or let it sleep for a bit. + promise := taskPromise(t) + if promise.state == TASK_STATE_CALL { + return // calling an async task, the subroutine will re-active the parent + } else if promise.state == TASK_STATE_SLEEP && promise.data != 0 { + addSleepTask(t) + } else { + pushTask(t) + } +} + +// Add this task to the end of the run queue. May also destroy the task if it's +// done. +func pushTask(t taskInstance) { + if _llvm_coro_done(t) { + scheduleLogTask(" destroy task:", t) + _llvm_coro_destroy(t) + return + } + if runqueueBack == nil { // empty runqueue + runqueueBack = t + runqueueFront = t + } else { + lastTaskPromise := taskPromise(runqueueBack) + lastTaskPromise.next = t + runqueueBack = t + } +} + +// Get a task from the front of the run queue. May return nil if there is none. +func popTask() taskInstance { + t := runqueueFront + if t == nil { + return nil + } + scheduleLogTask(" popTask:", t) + promise := taskPromise(t) + runqueueFront = promise.next + if runqueueFront == nil { + runqueueBack = nil + } + promise.next = nil + return t +} + +// Add this task to the sleep queue, assuming its state is set to sleeping. +func addSleepTask(t taskInstance) { + now := monotime() + if sleepQueue == nil { + scheduleLog(" -> sleep new queue") + // Create new linked list for the sleep queue. + sleepQueue = t + sleepQueueBaseTime = now + return + } + + // Make sure promise.data is relative to the queue time base. + promise := taskPromise(t) + + // Insert at front of sleep queue. + if promise.data < taskPromise(sleepQueue).data { + scheduleLog(" -> sleep at start") + taskPromise(sleepQueue).data -= promise.data + promise.next = sleepQueue + sleepQueue = t + return + } + + // Add to sleep queue (in the middle or at the end). + queueIndex := sleepQueue + for { + promise.data -= taskPromise(queueIndex).data + if taskPromise(queueIndex).next == nil || taskPromise(queueIndex).data > promise.data { + if taskPromise(queueIndex).next == nil { + scheduleLog(" -> sleep at end") + promise.next = nil + } else { + scheduleLog(" -> sleep in middle") + promise.next = taskPromise(queueIndex).next + taskPromise(promise.next).data -= promise.data + } + taskPromise(queueIndex).next = t + break + } + queueIndex = taskPromise(queueIndex).next + } +} + +// Run the scheduler until all tasks have finished. +// It takes an initial task (main.main) to bootstrap. +func scheduler(main taskInstance) { + // Initial task. + scheduleTask(main) + + // Main scheduler loop. + for { + scheduleLog("\n schedule") + now := monotime() + + // Add tasks that are done sleeping to the end of the runqueue so they + // will be executed soon. + if sleepQueue != nil && now - sleepQueueBaseTime >= uint64(taskPromise(sleepQueue).data) { + scheduleLog(" run <- sleep") + t := sleepQueue + promise := taskPromise(t) + sleepQueueBaseTime += uint64(promise.data) + sleepQueue = promise.next + promise.next = nil + pushTask(t) + } + + scheduleLog(" <- popTask") + t := popTask() + if t == nil { + if sleepQueue == nil { + // No more tasks to execute. + // It would be nice if we could detect deadlocks here, because + // there might still be functions waiting on each other in a + // deadlock. + scheduleLog(" no tasks left!") + return + } + scheduleLog(" sleeping...") + timeLeft := uint64(taskPromise(sleepQueue).data) - (now - sleepQueueBaseTime) + sleep(Duration(timeLeft)) + continue + } + + // Run the given task. + scheduleLogTask(" run:", t) + _llvm_coro_resume(t) + + // Add the just resumed task to the run queue or the sleep queue. + scheduleTask(t) + } +} diff --git a/tgo.go b/tgo.go index d72e4dcd..266462ec 100644 --- a/tgo.go +++ b/tgo.go @@ -41,19 +41,33 @@ type Compiler struct { stringType llvm.Type interfaceType llvm.Type typeassertType llvm.Type + taskDataType llvm.Type allocFunc llvm.Value freeFunc llvm.Value + coroIdFunc llvm.Value + coroSizeFunc llvm.Value + coroBeginFunc llvm.Value + coroSuspendFunc llvm.Value + coroEndFunc llvm.Value + coroFreeFunc llvm.Value itfTypeNumbers map[types.Type]uint64 itfTypes []types.Type initFuncs []llvm.Value + analysis *Analysis } type Frame struct { - llvmFn llvm.Value - params map[*ssa.Parameter]int // arguments to the function - locals map[ssa.Value]llvm.Value // local variables - blocks map[*ssa.BasicBlock]llvm.BasicBlock - phis []Phi + fn *ssa.Function + llvmFn llvm.Value + params map[*ssa.Parameter]int // arguments to the function + locals map[ssa.Value]llvm.Value // local variables + blocks map[*ssa.BasicBlock]llvm.BasicBlock + phis []Phi + blocking bool + taskState llvm.Value + taskHandle llvm.Value + cleanupBlock llvm.BasicBlock + suspendBlock llvm.BasicBlock } func pkgPrefix(pkg *ssa.Package) string { @@ -72,6 +86,7 @@ func NewCompiler(pkgName, triple string) (*Compiler, error) { c := &Compiler{ triple: triple, itfTypeNumbers: make(map[types.Type]uint64), + analysis: NewAnalysis(), } target, err := llvm.GetTargetFromTriple(triple) @@ -100,12 +115,33 @@ func NewCompiler(pkgName, triple string) (*Compiler, error) { // Go typeassert result: tuple of (ptr, bool) c.typeassertType = llvm.StructType([]llvm.Type{c.i8ptrType, llvm.Int1Type()}, false) + // Goroutine / task data: {i8 state, i32 data, i8* next} + c.taskDataType = llvm.StructType([]llvm.Type{llvm.Int8Type(), llvm.Int32Type(), c.i8ptrType}, false) + allocType := llvm.FunctionType(c.i8ptrType, []llvm.Type{c.uintptrType}, false) c.allocFunc = llvm.AddFunction(c.mod, "runtime.alloc", allocType) freeType := llvm.FunctionType(llvm.VoidType(), []llvm.Type{c.i8ptrType}, false) c.freeFunc = llvm.AddFunction(c.mod, "runtime.free", freeType) + coroIdType := llvm.FunctionType(c.ctx.TokenType(), []llvm.Type{llvm.Int32Type(), c.i8ptrType, c.i8ptrType, c.i8ptrType}, false) + c.coroIdFunc = llvm.AddFunction(c.mod, "llvm.coro.id", coroIdType) + + coroSizeType := llvm.FunctionType(llvm.Int32Type(), nil, false) + c.coroSizeFunc = llvm.AddFunction(c.mod, "llvm.coro.size.i32", coroSizeType) + + coroBeginType := llvm.FunctionType(c.i8ptrType, []llvm.Type{c.ctx.TokenType(), c.i8ptrType}, false) + c.coroBeginFunc = llvm.AddFunction(c.mod, "llvm.coro.begin", coroBeginType) + + coroSuspendType := llvm.FunctionType(llvm.Int8Type(), []llvm.Type{c.ctx.TokenType(), llvm.Int1Type()}, false) + c.coroSuspendFunc = llvm.AddFunction(c.mod, "llvm.coro.suspend", coroSuspendType) + + coroEndType := llvm.FunctionType(llvm.Int1Type(), []llvm.Type{c.i8ptrType, llvm.Int1Type()}, false) + c.coroEndFunc = llvm.AddFunction(c.mod, "llvm.coro.end", coroEndType) + + coroFreeType := llvm.FunctionType(c.i8ptrType, []llvm.Type{c.ctx.TokenType(), c.i8ptrType}, false) + c.coroFreeFunc = llvm.AddFunction(c.mod, "llvm.coro.free", coroFreeType) + return c, nil } @@ -188,6 +224,13 @@ func (c *Compiler) Parse(mainPath string, buildTags []string) error { } } + for _, pkg := range packageList { + c.analysis.AddPackage(pkg) + } + c.analysis.AnalyseCallgraph() // set up callgraph + c.analysis.AnalyseBlockingRecursive() // make all parents of blocking calls blocking (transitively) + c.analysis.AnalyseGoCalls() // check whether we need a scheduler + // Transform each package into LLVM IR. for _, pkg := range packageList { err := c.parsePackage(program, pkg) @@ -214,7 +257,19 @@ func (c *Compiler) Parse(mainPath string, buildTags []string) error { // Set functions referenced in runtime.ll to internal linkage, to improve // optimization (hopefully). main := c.mod.NamedFunction("main.main") - main.SetLinkage(llvm.PrivateLinkage) + if !main.IsDeclaration() { + main.SetLinkage(llvm.PrivateLinkage) + } + mainAsync := c.mod.NamedFunction("main.main$async") + if !mainAsync.IsDeclaration() { + mainAsync.SetLinkage(llvm.PrivateLinkage) + } + c.mod.NamedFunction("runtime.scheduler").SetLinkage(llvm.PrivateLinkage) + + if c.analysis.NeedsScheduler() { + // Enable the scheduler. + c.mod.NamedGlobal(".has_scheduler").SetInitializer(llvm.ConstInt(llvm.Int1Type(), 1, false)) + } return nil } @@ -260,6 +315,32 @@ func (c *Compiler) getLLVMType(goType types.Type) (llvm.Type, error) { return llvm.Type{}, err } return llvm.PointerType(ptrTo, 0), nil + case *types.Signature: // function pointer + // return value + var err error + var returnType llvm.Type + if typ.Results().Len() == 0 { + returnType = llvm.VoidType() + } else if typ.Results().Len() == 1 { + returnType, err = c.getLLVMType(typ.Results().At(0).Type()) + if err != nil { + return llvm.Type{}, err + } + } else { + return llvm.Type{}, errors.New("todo: multiple return values in function pointer") + } + // param values + var paramTypes []llvm.Type + params := typ.Params() + for i := 0; i < params.Len(); i++ { + subType, err := c.getLLVMType(params.At(i).Type()) + if err != nil { + return llvm.Type{}, err + } + paramTypes = append(paramTypes, subType) + } + // make a function pointer of it + return llvm.PointerType(llvm.FunctionType(returnType, paramTypes, false), 0), nil case *types.Struct: members := make([]llvm.Type, typ.NumFields()) for i := 0; i < typ.NumFields(); i++ { @@ -327,18 +408,22 @@ func (c *Compiler) isPointer(typ types.Type) bool { } } -func getFunctionName(fn *ssa.Function) string { +func getFunctionName(fn *ssa.Function, blocking bool) string { + suffix := "" + if blocking { + suffix = "$async" + } if fn.Signature.Recv() != nil { // Method on a defined type. typeName := fn.Params[0].Type().(*types.Named).Obj().Name() - return pkgPrefix(fn.Pkg) + "." + typeName + "." + fn.Name() + return pkgPrefix(fn.Pkg) + "." + typeName + "." + fn.Name() + suffix } else { // Bare function. if strings.HasPrefix(fn.Name(), "_Cfunc_") { // Name CGo functions directly. return fn.Name()[len("_Cfunc_"):] } else { - name := pkgPrefix(fn.Pkg) + "." + fn.Name() + name := pkgPrefix(fn.Pkg) + "." + fn.Name() + suffix if fn.Pkg.Pkg.Path() == "runtime" && strings.HasPrefix(fn.Name(), "_llvm_") { // Special case for LLVM intrinsics in the runtime. name = "llvm." + strings.Replace(fn.Name()[len("_llvm_"):], "_", ".", -1) @@ -488,13 +573,20 @@ func (c *Compiler) parseFuncDecl(f *ssa.Function) (*Frame, error) { f.WriteTo(os.Stdout) frame := &Frame{ - params: make(map[*ssa.Parameter]int), - locals: make(map[ssa.Value]llvm.Value), - blocks: make(map[*ssa.BasicBlock]llvm.BasicBlock), + fn: f, + params: make(map[*ssa.Parameter]int), + locals: make(map[ssa.Value]llvm.Value), + blocks: make(map[*ssa.BasicBlock]llvm.BasicBlock), + blocking: c.analysis.IsBlocking(f), } var retType llvm.Type - if f.Signature.Results() == nil { + if frame.blocking { + if f.Signature.Results() != nil { + return nil, errors.New("todo: return values in blocking function") + } + retType = c.i8ptrType + } else if f.Signature.Results() == nil { retType = llvm.VoidType() } else if f.Signature.Results().Len() == 1 { var err error @@ -507,6 +599,9 @@ func (c *Compiler) parseFuncDecl(f *ssa.Function) (*Frame, error) { } var paramTypes []llvm.Type + if frame.blocking { + paramTypes = append(paramTypes, c.i8ptrType) // parent coroutine + } for i, param := range f.Params { paramType, err := c.getLLVMType(param.Type()) if err != nil { @@ -518,7 +613,7 @@ func (c *Compiler) parseFuncDecl(f *ssa.Function) (*Frame, error) { fnType := llvm.FunctionType(retType, paramTypes, false) - name := getFunctionName(f) + name := getFunctionName(f, frame.blocking) frame.llvmFn = c.mod.NamedFunction(name) if frame.llvmFn.IsNil() { frame.llvmFn = llvm.AddFunction(c.mod, name, fnType) @@ -624,6 +719,10 @@ func (c *Compiler) parseFunc(frame *Frame, f *ssa.Function) error { llvmBlock := c.ctx.AddBasicBlock(frame.llvmFn, block.Comment) frame.blocks[block] = llvmBlock } + if frame.blocking { + frame.cleanupBlock = c.ctx.AddBasicBlock(frame.llvmFn, "task.cleanup") + frame.suspendBlock = c.ctx.AddBasicBlock(frame.llvmFn, "task.suspend") + } // Load function parameters for _, param := range f.Params { @@ -631,7 +730,41 @@ func (c *Compiler) parseFunc(frame *Frame, f *ssa.Function) error { frame.locals[param] = llvmParam } - // Fill those blocks with instructions. + if frame.blocking { + // Coroutine initialization. + c.builder.SetInsertPointAtEnd(frame.blocks[f.Blocks[0]]) + frame.taskState = c.builder.CreateAlloca(c.taskDataType, "task.state") + stateI8 := c.builder.CreateBitCast(frame.taskState, c.i8ptrType, "task.state.i8") + id := c.builder.CreateCall(c.coroIdFunc, []llvm.Value{ + llvm.ConstInt(llvm.Int32Type(), 0, false), + stateI8, + llvm.ConstNull(c.i8ptrType), + llvm.ConstNull(c.i8ptrType), + }, "task.token") + size := c.builder.CreateCall(c.coroSizeFunc, nil, "task.size") + if c.targetData.TypeAllocSize(size.Type()) > c.targetData.TypeAllocSize(c.uintptrType) { + size = c.builder.CreateTrunc(size, c.uintptrType, "task.size.uintptr") + } else if c.targetData.TypeAllocSize(size.Type()) < c.targetData.TypeAllocSize(c.uintptrType) { + size = c.builder.CreateZExt(size, c.uintptrType, "task.size.uintptr") + } + data := c.builder.CreateCall(c.allocFunc, []llvm.Value{size}, "task.data") + frame.taskHandle = c.builder.CreateCall(c.coroBeginFunc, []llvm.Value{id, data}, "task.handle") + + // Coroutine cleanup. Free resources associated with this coroutine. + c.builder.SetInsertPointAtEnd(frame.cleanupBlock) + mem := c.builder.CreateCall(c.coroFreeFunc, []llvm.Value{id, frame.taskHandle}, "task.data.free") + c.builder.CreateCall(c.freeFunc, []llvm.Value{mem}, "") + // re-insert parent coroutine + c.builder.CreateCall(c.mod.NamedFunction("runtime.scheduleTask"), []llvm.Value{frame.llvmFn.FirstParam()}, "") + c.builder.CreateBr(frame.suspendBlock) + + // Coroutine suspend. A call to llvm.coro.suspend() will branch here. + c.builder.SetInsertPointAtEnd(frame.suspendBlock) + c.builder.CreateCall(c.coroEndFunc, []llvm.Value{frame.taskHandle, llvm.ConstInt(llvm.Int1Type(), 0, false)}, "unused") + c.builder.CreateRet(frame.taskHandle) + } + + // Fill blocks with instructions. for _, block := range f.DomPreorder() { c.builder.SetInsertPointAtEnd(frame.blocks[block]) for _, instr := range block.Instrs { @@ -664,6 +797,27 @@ func (c *Compiler) parseInstr(frame *Frame, instr ssa.Instruction) error { value, err := c.parseExpr(frame, instr) frame.locals[instr] = value return err + case *ssa.Go: + if instr.Common().Method != nil { + return errors.New("todo: go on method receiver") + } + + // Execute non-blocking calls (including builtins) directly. + // parentHandle param is ignored. + if !c.analysis.IsBlocking(instr.Common().Value) { + _, err := c.parseCall(frame, instr.Common(), llvm.Value{}) + return err // probably nil + } + + // Start this goroutine. + // parentHandle is nil, as the goroutine has no parent frame (it's a new + // stack). + handle, err := c.parseCall(frame, instr.Common(), llvm.Value{}) + if err != nil { + return err + } + c.builder.CreateCall(c.mod.NamedFunction("runtime.scheduleTask"), []llvm.Value{handle}, "") + return nil case *ssa.If: cond, err := c.parseExpr(frame, instr.Cond) if err != nil { @@ -687,18 +841,32 @@ func (c *Compiler) parseInstr(frame *Frame, instr ssa.Instruction) error { c.builder.CreateUnreachable() return nil case *ssa.Return: - if len(instr.Results) == 0 { - c.builder.CreateRetVoid() - return nil - } else if len(instr.Results) == 1 { - val, err := c.parseExpr(frame, instr.Results[0]) - if err != nil { - return err + if frame.blocking { + if len(instr.Results) != 0 { + return errors.New("todo: return values from blocking function") } - c.builder.CreateRet(val) + // Final suspend. + continuePoint := c.builder.CreateCall(c.coroSuspendFunc, []llvm.Value{ + llvm.ConstNull(c.ctx.TokenType()), + llvm.ConstInt(llvm.Int1Type(), 1, false), // final=true + }, "") + sw := c.builder.CreateSwitch(continuePoint, frame.suspendBlock, 2) + sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 1, false), frame.cleanupBlock) return nil } else { - return errors.New("todo: return value") + if len(instr.Results) == 0 { + c.builder.CreateRetVoid() + return nil + } else if len(instr.Results) == 1 { + val, err := c.parseExpr(frame, instr.Results[0]) + if err != nil { + return err + } + c.builder.CreateRet(val) + return nil + } else { + return errors.New("todo: return value") + } } case *ssa.Store: llvmAddr, err := c.parseExpr(frame, instr.Addr) @@ -797,16 +965,17 @@ func (c *Compiler) parseBuiltin(frame *Frame, args []ssa.Value, callName string) } } -func (c *Compiler) parseFunctionCall(frame *Frame, call *ssa.CallCommon, fn *ssa.Function) (llvm.Value, error) { - fmt.Printf(" function: %s\n", fn) - - name := getFunctionName(fn) - target := c.mod.NamedFunction(name) - if target.IsNil() { - return llvm.Value{}, errors.New("undefined function: " + name) - } - +func (c *Compiler) parseFunctionCall(frame *Frame, call *ssa.CallCommon, llvmFn llvm.Value, blocking bool, parentHandle llvm.Value) (llvm.Value, error) { var params []llvm.Value + if blocking { + if parentHandle.IsNil() { + // Started from 'go' statement. + params = append(params, llvm.ConstNull(c.i8ptrType)) + } else { + // Blocking function calls another blocking function. + params = append(params, parentHandle) + } + } for _, param := range call.Args { val, err := c.parseExpr(frame, param) if err != nil { @@ -815,19 +984,75 @@ func (c *Compiler) parseFunctionCall(frame *Frame, call *ssa.CallCommon, fn *ssa params = append(params, val) } - return c.builder.CreateCall(target, params, ""), nil + if frame.blocking && llvmFn.Name() == "runtime.Sleep" { + // Set task state to TASK_STATE_SLEEP and set the duration. + c.builder.CreateCall(c.mod.NamedFunction("runtime.sleepTask"), []llvm.Value{frame.taskHandle, params[0]}, "") + + // Yield to scheduler. + continuePoint := c.builder.CreateCall(c.coroSuspendFunc, []llvm.Value{ + llvm.ConstNull(c.ctx.TokenType()), + llvm.ConstInt(llvm.Int1Type(), 0, false), + }, "") + wakeup := c.ctx.InsertBasicBlock(llvm.NextBasicBlock(c.builder.GetInsertBlock()), "task.wakeup") + sw := c.builder.CreateSwitch(continuePoint, frame.suspendBlock, 2) + sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 0, false), wakeup) + sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 1, false), frame.cleanupBlock) + c.builder.SetInsertPointAtEnd(wakeup) + + return llvm.Value{}, nil + } + + result := c.builder.CreateCall(llvmFn, params, "") + if blocking && !parentHandle.IsNil() { + // Calling a blocking function as a regular function call. + // This is done by passing the current coroutine as a parameter to the + // new coroutine and dropping the current coroutine from the scheduler + // (with the TASK_STATE_CALL state). When the subroutine is finished, it + // will reactivate the parent (this frame) in it's destroy function. + + c.builder.CreateCall(c.mod.NamedFunction("runtime.scheduleTask"), []llvm.Value{result}, "") + + // Set task state to TASK_STATE_CALL. + c.builder.CreateCall(c.mod.NamedFunction("runtime.waitForAsyncCall"), []llvm.Value{frame.taskHandle}, "") + + // Yield to the scheduler. + continuePoint := c.builder.CreateCall(c.coroSuspendFunc, []llvm.Value{ + llvm.ConstNull(c.ctx.TokenType()), + llvm.ConstInt(llvm.Int1Type(), 0, false), + }, "") + resume := c.ctx.InsertBasicBlock(llvm.NextBasicBlock(c.builder.GetInsertBlock()), "task.callComplete") + sw := c.builder.CreateSwitch(continuePoint, frame.suspendBlock, 2) + sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 0, false), resume) + sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 1, false), frame.cleanupBlock) + c.builder.SetInsertPointAtEnd(resume) + } + return result, nil } -func (c *Compiler) parseCall(frame *Frame, instr *ssa.Call) (llvm.Value, error) { - fmt.Printf(" call: %s\n", instr) - - switch call := instr.Common().Value.(type) { +func (c *Compiler) parseCall(frame *Frame, instr *ssa.CallCommon, parentHandle llvm.Value) (llvm.Value, error) { + switch call := instr.Value.(type) { case *ssa.Builtin: - return c.parseBuiltin(frame, instr.Common().Args, call.Name()) + return c.parseBuiltin(frame, instr.Args, call.Name()) case *ssa.Function: - return c.parseFunctionCall(frame, instr.Common(), call) - default: - return llvm.Value{}, errors.New("todo: unknown call type: " + fmt.Sprintf("%#v", call)) + targetBlocks := false + name := getFunctionName(call, targetBlocks) + llvmFn := c.mod.NamedFunction(name) + if llvmFn.IsNil() { + targetBlocks = true + nameAsync := getFunctionName(call, targetBlocks) + llvmFn = c.mod.NamedFunction(nameAsync) + if llvmFn.IsNil() { + return llvm.Value{}, errors.New("undefined function: " + name) + } + } + return c.parseFunctionCall(frame, instr, llvmFn, targetBlocks, parentHandle) + default: // function pointer + value, err := c.parseExpr(frame, instr.Value) + if err != nil { + return llvm.Value{}, err + } + // TODO: blocking function pointers (needs analysis) + return c.parseFunctionCall(frame, instr, value, false, parentHandle) } } @@ -866,7 +1091,9 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) { case *ssa.BinOp: return c.parseBinOp(frame, expr) case *ssa.Call: - return c.parseCall(frame, expr) + // Passing the current task here to the subroutine. It is only used when + // the subroutine is blocking. + return c.parseCall(frame, expr.Common(), frame.taskHandle) case *ssa.ChangeType: return c.parseConvert(frame, expr.Type(), expr.X) case *ssa.Const: @@ -890,6 +1117,8 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) { llvm.ConstInt(llvm.Int32Type(), uint64(expr.Field), false), } return c.builder.CreateGEP(val, indices, ""), nil + case *ssa.Function: + return c.mod.NamedFunction(getFunctionName(expr, false)), nil case *ssa.Global: fullName := getGlobalName(expr) value := c.mod.NamedGlobal(fullName) @@ -1274,11 +1503,15 @@ func (c *Compiler) LinkModule(mod llvm.Module) error { func (c *Compiler) ApplyFunctionSections() { // Put every function in a separate section. This makes it possible for the - // linker to remove dead code (--gc-sections). + // linker to remove dead code (-ffunction-sections). llvmFn := c.mod.FirstFunction() for !llvmFn.IsNil() { if !llvmFn.IsDeclaration() { - llvmFn.SetSection(".text." + llvmFn.Name()) + name := llvmFn.Name() + if strings.HasSuffix(name, "$async") { + name = name[:len(name)-len("$async")] + } + llvmFn.SetSection(".text." + name) } llvmFn = llvm.NextFunction(llvmFn) } @@ -1367,7 +1600,7 @@ func Compile(pkgName, runtimePath, outpath, target string, printIR bool) error { if err := c.Verify(); err != nil { return err } - c.Optimize(2, 1) // -O2 -Os + //c.Optimize(2, 1) // -O2 -Os if err := c.Verify(); err != nil { return err }