runtime: implement precise GC

This implements the block-based GC as a partially precise GC. This means that for most heap allocations it is known which words contain a pointer and which don't. This should in theory make the GC faster (because it can skip non-pointer object) and have fewer false positives in a GC cycle. It does however use a bit more RAM to store the layout of each object. Right now this GC seems to be slower than the conservative GC, but should be less likely to run out of memory as a result of false positives.
2022-12-15 21:24:17 +01:00 · 2022-12-15 21:24:17 +01:00 · 655075e5e0
--- a/compileopts/config.go
+++ b/compileopts/config.go
@ -90,7 +90,7 @@ func (c *Config) CgoEnabled() bool {
 }

 // GC returns the garbage collection strategy in use on this platform. Valid
-// values are "none", "leaking", and "conservative".
+// values are "none", "leaking", "conservative" and "precise".
 func (c *Config) GC() string {
 	if c.Options.GC != "" {
 		return c.Options.GC
@ -105,7 +105,7 @@ func (c *Config) GC() string {
 // that can be traced by the garbage collector.
 func (c *Config) NeedsStackObjects() bool {
 	switch c.GC() {
-	case "conservative":
+	case "conservative", "precise":
 		for _, tag := range c.BuildTags() {
 			if tag == "tinygo.wasm" {
 				return true
--- a/compileopts/options.go
+++ b/compileopts/options.go
@ -8,7 +8,7 @@ import (
 )

 var (
-	validGCOptions            = []string{"none", "leaking", "conservative"}
+	validGCOptions            = []string{"none", "leaking", "conservative", "precise"}
 	validSchedulerOptions     = []string{"none", "tasks", "asyncify"}
 	validSerialOptions        = []string{"none", "uart", "usb"}
 	validPrintSizeOptions     = []string{"none", "short", "full"}
--- a/compileopts/options_test.go
+++ b/compileopts/options_test.go
@ -9,7 +9,7 @@ import (

 func TestVerifyOptions(t *testing.T) {

-	expectedGCError := errors.New(`invalid gc option 'incorrect': valid values are none, leaking, conservative`)
+	expectedGCError := errors.New(`invalid gc option 'incorrect': valid values are none, leaking, conservative, precise`)
 	expectedSchedulerError := errors.New(`invalid scheduler option 'incorrect': valid values are none, tasks, asyncify`)
 	expectedPrintSizeError := errors.New(`invalid size option 'incorrect': valid values are none, short, full`)
 	expectedPanicStrategyError := errors.New(`invalid panic option 'incorrect': valid values are print, trap`)
--- a/compileopts/target.go
+++ b/compileopts/target.go
@ -250,6 +250,7 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) {
 		GOOS:             goos,
 		GOARCH:           goarch,
 		BuildTags:        []string{goos, goarch},
+		GC:               "precise",
 		Scheduler:        "tasks",
 		Linker:           "cc",
 		DefaultStackSize: 1024 * 64, // 64kB
--- a/compiler/llvm.go
+++ b/compiler/llvm.go
@ -84,6 +84,8 @@ func (c *compilerContext) makeGlobalArray(buf []byte, name string, elementType l
 // which words contain a pointer (indicated by setting the given bit to 1). For
 // arrays, only the element is stored. This works because the GC knows the
 // object size and can therefore know how this value is repeated in the object.
+//
+// For details on what's in this value, see src/runtime/gc_precise.go.
 func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Value {
 	// Use the element type for arrays. This works even for nested arrays.
 	for {
--- a/interp/memory.go
+++ b/interp/memory.go
@ -1236,6 +1236,8 @@ func (r *runner) getValue(llvmValue llvm.Value) value {

 // readObjectLayout reads the object layout as it is stored by the compiler. It
 // returns the size in the number of words and the bitmap.
+//
+// For details on this format, see src/runtime/gc_precise.go.
 func (r *runner) readObjectLayout(layoutValue value) (uint64, *big.Int) {
 	pointerSize := layoutValue.len(r)
 	if checks && uint64(pointerSize) != r.targetData.TypeAllocSize(r.i8ptrType) {
--- a/src/internal/task/gc_stack_chain.go
+++ b/src/internal/task/gc_stack_chain.go
@ -1,4 +1,4 @@
-//go:build gc.conservative && tinygo.wasm
+//go:build (gc.conservative || gc.precise) && tinygo.wasm

 package task

--- a/src/internal/task/gc_stack_noop.go
+++ b/src/internal/task/gc_stack_noop.go
@ -1,4 +1,4 @@
-//go:build !gc.conservative || !tinygo.wasm
+//go:build !(gc.conservative || gc.precise) || !tinygo.wasm

 package task

--- a/src/internal/task/task_stack.go
+++ b/src/internal/task/task_stack.go
@ -17,6 +17,9 @@ const stackCanary = uintptr(uint64(0x670c1333b83bf575) & uint64(^uintptr(0)))
 type state struct {
 	// sp is the stack pointer of the saved state.
 	// When the task is inactive, the saved registers are stored at the top of the stack.
+	// Note: this should ideally be a unsafe.Pointer for the precise GC. The GC
+	// will find the stack through canaryPtr though so it's not currently a
+	// problem to store this value as uintptr.
 	sp uintptr

 	// canaryPtr points to the top word of the stack (the lowest address).
@ -63,20 +66,20 @@ func (t *Task) Resume() {
 // initialize the state and prepare to call the specified function with the specified argument bundle.
 func (s *state) initialize(fn uintptr, args unsafe.Pointer, stackSize uintptr) {
 	// Create a stack.
-	stack := make([]uintptr, stackSize/unsafe.Sizeof(uintptr(0)))
+	stack := runtime_alloc(stackSize, nil)

 	// Set up the stack canary, a random number that should be checked when
 	// switching from the task back to the scheduler. The stack canary pointer
 	// points to the first word of the stack. If it has changed between now and
 	// the next stack switch, there was a stack overflow.
-	s.canaryPtr = &stack[0]
+	s.canaryPtr = (*uintptr)(stack)
 	*s.canaryPtr = stackCanary

 	// Get a pointer to the top of the stack, where the initial register values
 	// are stored. They will be popped off the stack on the first stack switch
 	// to the goroutine, and will start running tinygo_startTask (this setup
 	// happens in archInit).
-	r := (*calleeSavedRegs)(unsafe.Pointer(&stack[uintptr(len(stack))-(unsafe.Sizeof(calleeSavedRegs{})/unsafe.Sizeof(uintptr(0)))]))
+	r := (*calleeSavedRegs)(unsafe.Add(stack, stackSize-unsafe.Sizeof(calleeSavedRegs{})))

 	// Invoke architecture-specific initialization.
 	s.archInit(r, fn, args)
@ -95,6 +98,9 @@ var startTask [0]uint8
 //go:linkname runqueuePushBack runtime.runqueuePushBack
 func runqueuePushBack(*Task)

+//go:linkname runtime_alloc runtime.alloc
+func runtime_alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer
+
 // start creates and starts a new goroutine with the given function and arguments.
 // The new goroutine is scheduled to run later.
 func start(fn uintptr, args unsafe.Pointer, stackSize uintptr) {
--- a/src/runtime/gc_blocks.go
+++ b/src/runtime/gc_blocks.go
@ -1,4 +1,4 @@
-//go:build gc.conservative
+//go:build gc.conservative || gc.precise

 package runtime

@ -187,6 +187,10 @@ func (b gcBlock) unmark() {
 	}
 }

+func isOnHeap(ptr uintptr) bool {
+	return ptr >= heapStart && ptr < uintptr(metadataStart)
+}
+
 // Initialize the memory allocator.
 // No memory may be allocated before this is called. That means the runtime and
 // any packages the runtime depends upon may not allocate memory during package
@ -269,6 +273,10 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {
 		return unsafe.Pointer(&zeroSizedAlloc)
 	}

+	if preciseHeap {
+		size += align(unsafe.Sizeof(layout))
+	}
+
 	gcTotalAlloc += uint64(size)
 	gcMallocs++

@ -352,6 +360,15 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {

 			// Return a pointer to this allocation.
 			pointer := thisAlloc.pointer()
+			if preciseHeap {
+				// Store the object layout at the start of the object.
+				// TODO: this wastes a little bit of space on systems with
+				// larger-than-pointer alignment requirements.
+				*(*unsafe.Pointer)(pointer) = layout
+				add := align(unsafe.Sizeof(layout))
+				pointer = unsafe.Add(pointer, add)
+				size -= add
+			}
 			memzero(pointer, size)
 			return pointer
 		}
@ -493,12 +510,23 @@ func startMark(root gcBlock) {
 		}

 		// Scan all pointers inside the block.
+		scanner := newGCObjectScanner(block)
+		if scanner.pointerFree() {
+			// This object doesn't contain any pointers.
+			// This is a fast path for objects like make([]int, 4096).
+			continue
+		}
 		start, end := block.address(), block.findNext().address()
+		if preciseHeap {
+			// The first word of the object is just the pointer layout value.
+			// Skip it.
+			start += align(unsafe.Sizeof(uintptr(0)))
+		}
 		for addr := start; addr != end; addr += unsafe.Alignof(addr) {
 			// Load the word.
 			word := *(*uintptr)(unsafe.Pointer(addr))

-			if !looksLikePointer(word) {
+			if !scanner.nextIsPointer(word, root.address(), addr) {
 				// Not a heap pointer.
 				continue
 			}
@ -565,7 +593,7 @@ func finishMark() {

 // mark a GC root at the address addr.
 func markRoot(addr, root uintptr) {
-	if looksLikePointer(root) {
+	if isOnHeap(root) {
 		block := blockFromAddr(root)
 		if block.state() == blockStateFree {
 			// The to-be-marked object doesn't actually exist.
--- a/src/runtime/gc_conservative.go
+++ b/src/runtime/gc_conservative.go
@ -1,10 +1,29 @@
 //go:build gc.conservative

+// This implements the block-based heap as a fully conservative GC. No tracking
+// of pointers is done, every word in an object is considered live if it looks
+// like a pointer.
+
 package runtime

-// looksLikePointer returns whether this could be a pointer. Currently, it
-// simply returns whether it lies anywhere in the heap. Go allows interior
-// pointers so we can't check alignment or anything like that.
-func looksLikePointer(ptr uintptr) bool {
-	return ptr >= heapStart && ptr < uintptr(metadataStart)
+const preciseHeap = false
+
+type gcObjectScanner struct {
+}
+
+func newGCObjectScanner(block gcBlock) gcObjectScanner {
+	return gcObjectScanner{}
+}
+
+func (scanner *gcObjectScanner) pointerFree() bool {
+	// We don't know whether this object contains pointers, so conservatively
+	// return false.
+	return false
+}
+
+// nextIsPointer returns whether this could be a pointer. Because the GC is
+// conservative, we can't do much more than check whether the object lies
+// somewhere in the heap.
+func (scanner gcObjectScanner) nextIsPointer(ptr, parent, addrOfWord uintptr) bool {
+	return isOnHeap(ptr)
 }
--- a/src/runtime/gc_globals.go
+++ b/src/runtime/gc_globals.go
@ -1,4 +1,4 @@
-//go:build gc.conservative && (baremetal || tinygo.wasm)
+//go:build (gc.conservative || gc.precise) && (baremetal || tinygo.wasm)

 package runtime

--- a/src/runtime/gc_precise.go
+++ b/src/runtime/gc_precise.go
@ -0,0 +1,147 @@
+//go:build gc.precise
+
+// This implements the block-based GC as a partially precise GC. This means that
+// for most heap allocations it is known which words contain a pointer and which
+// don't. This should in theory make the GC faster (because it can skip
+// non-pointer object) and have fewer false positives in a GC cycle. It does
+// however use a bit more RAM to store the layout of each object.
+//
+// The pointer/non-pointer information for objects is stored in the first word
+// of the object. It is described below but in essense it contains a bitstring
+// of a particular size. This size does not indicate the size of the object:
+// instead the allocated object is a multiple of the bitstring size. This is so
+// that arrays and slices can store the size of the object efficiently. The
+// bitstring indicates where the pointers are in the object (the bit is set when
+// the value may be a pointer, and cleared when it certainly isn't a pointer).
+// Some examples (assuming a 32-bit system for the moment):
+//
+// | object type | size | bitstring | note
+// |-------------|------|-----------|------
+// | int         | 1    | 0         | no pointers in this object
+// | string      | 2    | 10        | {pointer, len} pair so there is one pointer
+// | []int       | 3    | 100       | {pointer, len, cap}
+// | [4]*int     | 1    | 1         | even though it contains 4 pointers, an array repeats so it can be stored with size=1
+// | [30]byte    | 1    | 0         | there are no pointers so the layout is very simple
+//
+// The garbage collector scans objects by starting at the first word value in
+// the object. If the least significant bit of the bitstring is clear, it is
+// skipped (it's not a pointer). If the bit is set, it is treated as if it could
+// be a pointer. The garbage collector continues by scanning further words in
+// the object and checking them against the corresponding bit in the bitstring.
+// Once it reaches the end of the bitstring, it wraps around (for arrays,
+// slices, strings, etc).
+//
+// The layout as passed to the runtime.alloc function and stored in the object
+// is a pointer-sized value. If the least significant bit of the value is set,
+// the bitstring is contained directly inside the value, of the form
+// pppp_pppp_ppps_sss1.
+//   * The 'p' bits indicate which parts of the object are a pointer.
+//   * The 's' bits indicate the size of the object. In this case, there are 11
+//     pointer bits so four bits are enough for the size (0-15).
+//   * The lowest bit is always set to distinguish this value from a pointer.
+// This example is for a 16-bit architecture. For example, 32-bit architectures
+// use a layout format of pppppppp_pppppppp_pppppppp_ppsssss1 (26 bits for
+// pointer/non-pointer information, 5 size bits, and one bit that's always set).
+//
+// For larger objects that don't fit in an uintptr, the layout value is a
+// pointer to a global with a format as follows:
+//     struct {
+//         size uintptr
+//         bits [...]uint8
+//     }
+// The 'size' field is the number of bits in the bitstring. The 'bits' field is
+// a byte array that contains the bitstring itself, in little endian form. The
+// length of the bits array is ceil(size/8).
+
+package runtime
+
+import "unsafe"
+
+const preciseHeap = true
+
+type gcObjectScanner struct {
+	index      uintptr
+	size       uintptr
+	bitmap     uintptr
+	bitmapAddr unsafe.Pointer
+}
+
+func newGCObjectScanner(block gcBlock) gcObjectScanner {
+	if gcAsserts && block != block.findHead() {
+		runtimePanic("gc: object scanner must start at head")
+	}
+	scanner := gcObjectScanner{}
+	layout := *(*uintptr)(unsafe.Pointer(block.address()))
+	if layout == 0 {
+		// Unknown layout. Assume all words in the object could be pointers.
+		// This layout value below corresponds to a slice of pointers like:
+		//     make(*byte, N)
+		scanner.size = 1
+		scanner.bitmap = 1
+	} else if layout&1 != 0 {
+		// Layout is stored directly in the integer value.
+		// Determine format of bitfields in the integer.
+		const layoutBits = uint64(unsafe.Sizeof(layout) * 8)
+		var sizeFieldBits uint64
+		switch layoutBits { // note: this switch should be resolved at compile time
+		case 16:
+			sizeFieldBits = 4
+		case 32:
+			sizeFieldBits = 5
+		case 64:
+			sizeFieldBits = 6
+		default:
+			runtimePanic("unknown pointer size")
+		}
+
+		// Extract values from the bitfields.
+		// See comment at the top of this file for more information.
+		scanner.size = (layout >> 1) & (1<<sizeFieldBits - 1)
+		scanner.bitmap = layout >> (1 + sizeFieldBits)
+	} else {
+		// Layout is stored separately in a global object.
+		layoutAddr := unsafe.Pointer(layout)
+		scanner.size = *(*uintptr)(layoutAddr)
+		scanner.bitmapAddr = unsafe.Add(layoutAddr, unsafe.Sizeof(uintptr(0)))
+	}
+	return scanner
+}
+
+func (scanner *gcObjectScanner) pointerFree() bool {
+	if scanner.bitmapAddr != nil {
+		// While the format allows for large objects without pointers, this is
+		// optimized by the compiler so if bitmapAddr is set, we know that there
+		// are at least some pointers in the object.
+		return false
+	}
+	// If the bitmap is zero, there are definitely no pointers in the object.
+	return scanner.bitmap == 0
+}
+
+func (scanner *gcObjectScanner) nextIsPointer(word, parent, addrOfWord uintptr) bool {
+	index := scanner.index
+	scanner.index++
+	if scanner.index == scanner.size {
+		scanner.index = 0
+	}
+
+	if !isOnHeap(word) {
+		// Definitely isn't a pointer.
+		return false
+	}
+
+	// Might be a pointer. Now look at the object layout to know for sure.
+	if scanner.bitmapAddr != nil {
+		if (*(*uint8)(unsafe.Add(scanner.bitmapAddr, index/8))>>(index%8))&1 == 0 {
+			return false
+		}
+		return true
+	}
+	if (scanner.bitmap>>index)&1 == 0 {
+		// not a pointer!
+		return false
+	}
+
+	// Probably a pointer.
+	return true
+}
--- a/src/runtime/gc_stack_portable.go
+++ b/src/runtime/gc_stack_portable.go
@ -1,4 +1,4 @@
-//go:build gc.conservative && tinygo.wasm
+//go:build (gc.conservative || gc.precise) && tinygo.wasm

 package runtime

--- a/src/runtime/gc_stack_raw.go
+++ b/src/runtime/gc_stack_raw.go
@ -1,4 +1,4 @@
-//go:build gc.conservative && !tinygo.wasm
+//go:build (gc.conservative || gc.precise) && !tinygo.wasm

 package runtime