gc: use raw stack access whenever possible
The only architecture that actually needs special support for scanning the stack is WebAssembly. All others allow raw access to the stack with a small bit of assembly. Therefore, don't manually keep track of all these objects on the stack manually and instead just use conservative stack scanning. This results in a massive code size decrease in the affected targets (only tested linux/amd64 for code size) - sometimes around 33%. It also allows for future improvements such as using proper stackful goroutines.
Этот коммит содержится в:
		
							родитель
							
								
									bfa29f17da
								
							
						
					
					
						коммит
						67de8b490d
					
				
					 13 изменённых файлов: 125 добавлений и 11 удалений
				
			
		|  | @ -118,12 +118,12 @@ func (c *Config) NeedsStackObjects() bool { | ||||||
| 	switch c.GC() { | 	switch c.GC() { | ||||||
| 	case "conservative", "extalloc": | 	case "conservative", "extalloc": | ||||||
| 		for _, tag := range c.BuildTags() { | 		for _, tag := range c.BuildTags() { | ||||||
| 			if tag == "baremetal" { | 			if tag == "wasm" { | ||||||
| 				return false | 				return true | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		return true | 		return false | ||||||
| 	default: | 	default: | ||||||
| 		return false | 		return false | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -246,6 +246,9 @@ func defaultTarget(goos, goarch, triple string) (*TargetSpec, error) { | ||||||
| 	} else { | 	} else { | ||||||
| 		spec.LDFlags = append(spec.LDFlags, "-no-pie", "-Wl,--gc-sections") // WARNING: clang < 5.0 requires -nopie | 		spec.LDFlags = append(spec.LDFlags, "-no-pie", "-Wl,--gc-sections") // WARNING: clang < 5.0 requires -nopie | ||||||
| 	} | 	} | ||||||
|  | 	if goarch != "wasm" { | ||||||
|  | 		spec.ExtraFiles = append(spec.ExtraFiles, "src/runtime/gc_"+goarch+".S") | ||||||
|  | 	} | ||||||
| 	if goarch != runtime.GOARCH { | 	if goarch != runtime.GOARCH { | ||||||
| 		// Some educated guesses as to how to invoke helper programs. | 		// Some educated guesses as to how to invoke helper programs. | ||||||
| 		spec.GDB = "gdb-multiarch" | 		spec.GDB = "gdb-multiarch" | ||||||
|  |  | ||||||
|  | @ -1,5 +1,7 @@ | ||||||
| package runtime | package runtime | ||||||
| 
 | 
 | ||||||
|  | import "device" | ||||||
|  | 
 | ||||||
| const GOARCH = "386" | const GOARCH = "386" | ||||||
| 
 | 
 | ||||||
| // The bitness of the CPU (e.g. 8, 32, 64). | // The bitness of the CPU (e.g. 8, 32, 64). | ||||||
|  | @ -10,4 +12,6 @@ func align(ptr uintptr) uintptr { | ||||||
| 	return (ptr + 3) &^ 3 | 	return (ptr + 3) &^ 3 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func getCurrentStackPointer() uintptr | func getCurrentStackPointer() uintptr { | ||||||
|  | 	return device.AsmFull("movl %esp, {}", nil) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -1,5 +1,7 @@ | ||||||
| package runtime | package runtime | ||||||
| 
 | 
 | ||||||
|  | import "device" | ||||||
|  | 
 | ||||||
| const GOARCH = "amd64" | const GOARCH = "amd64" | ||||||
| 
 | 
 | ||||||
| // The bitness of the CPU (e.g. 8, 32, 64). | // The bitness of the CPU (e.g. 8, 32, 64). | ||||||
|  | @ -12,4 +14,6 @@ func align(ptr uintptr) uintptr { | ||||||
| 	return (ptr + 15) &^ 15 | 	return (ptr + 15) &^ 15 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func getCurrentStackPointer() uintptr | func getCurrentStackPointer() uintptr { | ||||||
|  | 	return device.AsmFull("movq %rsp, {}", nil) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -1,5 +1,7 @@ | ||||||
| package runtime | package runtime | ||||||
| 
 | 
 | ||||||
|  | import "device/arm" | ||||||
|  | 
 | ||||||
| const GOARCH = "arm64" | const GOARCH = "arm64" | ||||||
| 
 | 
 | ||||||
| // The bitness of the CPU (e.g. 8, 32, 64). | // The bitness of the CPU (e.g. 8, 32, 64). | ||||||
|  | @ -9,5 +11,6 @@ const TargetBits = 64 | ||||||
| func align(ptr uintptr) uintptr { | func align(ptr uintptr) uintptr { | ||||||
| 	return (ptr + 7) &^ 7 | 	return (ptr + 7) &^ 7 | ||||||
| } | } | ||||||
| 
 | func getCurrentStackPointer() uintptr { | ||||||
| func getCurrentStackPointer() uintptr | 	return arm.AsmFull("mov {}, sp", nil) | ||||||
|  | } | ||||||
|  |  | ||||||
							
								
								
									
										22
									
								
								src/runtime/gc_386.S
									
										
									
									
									
										Обычный файл
									
								
							
							
						
						
									
										22
									
								
								src/runtime/gc_386.S
									
										
									
									
									
										Обычный файл
									
								
							|  | @ -0,0 +1,22 @@ | ||||||
|  | .section .text.tinygo_scanCurrentStack | ||||||
|  | .global tinygo_scanCurrentStack
 | ||||||
|  | .type tinygo_scanCurrentStack, %function | ||||||
|  | tinygo_scanCurrentStack: | ||||||
|  |     // Sources: | ||||||
|  |     //   * https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call | ||||||
|  |     //   * https://godbolt.org/z/q7e8dn | ||||||
|  | 
 | ||||||
|  |     // Save callee-saved registers. | ||||||
|  |     pushl %ebx | ||||||
|  |     pushl %esi | ||||||
|  |     pushl %edi | ||||||
|  |     pushl %ebp | ||||||
|  | 
 | ||||||
|  |     // Scan the stack. | ||||||
|  |     pushl %esp | ||||||
|  |     calll tinygo_scanstack | ||||||
|  | 
 | ||||||
|  |     // Restore the stack pointer. Registers do not need to be restored as they | ||||||
|  |     // were only pushed to be discoverable by the GC. | ||||||
|  |     addl $20, %esp | ||||||
|  |     retl | ||||||
							
								
								
									
										29
									
								
								src/runtime/gc_amd64.S
									
										
									
									
									
										Обычный файл
									
								
							
							
						
						
									
										29
									
								
								src/runtime/gc_amd64.S
									
										
									
									
									
										Обычный файл
									
								
							|  | @ -0,0 +1,29 @@ | ||||||
|  | #ifdef __ELF__ | ||||||
|  | .section .text.tinygo_scanCurrentStack | ||||||
|  | .global tinygo_scanCurrentStack
 | ||||||
|  | tinygo_scanCurrentStack: | ||||||
|  | #else // Darwin | ||||||
|  | .global _tinygo_scanCurrentStack
 | ||||||
|  | _tinygo_scanCurrentStack: | ||||||
|  | #endif | ||||||
|  |     // Save callee-saved registers. | ||||||
|  |     pushq %rbx | ||||||
|  |     pushq %rbp | ||||||
|  |     pushq %r12 | ||||||
|  |     pushq %r13 | ||||||
|  |     pushq %r14 | ||||||
|  |     pushq %r15 | ||||||
|  | 
 | ||||||
|  |     // Scan the stack. | ||||||
|  |     subq $8, %rsp // adjust the stack before the call to maintain 16-byte alignment | ||||||
|  |     movq %rsp, %rdi | ||||||
|  | #ifdef __ELF__ | ||||||
|  |     callq tinygo_scanstack | ||||||
|  | #else | ||||||
|  |     callq _tinygo_scanstack // Darwin | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  |     // Restore the stack pointer. Registers do not need to be restored as they | ||||||
|  |     // were only pushed to be discoverable by the GC. | ||||||
|  |     addq $56, %rsp | ||||||
|  |     retq | ||||||
							
								
								
									
										23
									
								
								src/runtime/gc_arm64.S
									
										
									
									
									
										Обычный файл
									
								
							
							
						
						
									
										23
									
								
								src/runtime/gc_arm64.S
									
										
									
									
									
										Обычный файл
									
								
							|  | @ -0,0 +1,23 @@ | ||||||
|  | .section .text.tinygo_scanCurrentStack | ||||||
|  | .global tinygo_scanCurrentStack
 | ||||||
|  | .type tinygo_scanCurrentStack, %function | ||||||
|  | tinygo_scanCurrentStack: | ||||||
|  |     // Sources: | ||||||
|  |     //   * https://developer.arm.com/architectures/learn-the-architecture/armv8-a-instruction-set-architecture/procedure-call-standard | ||||||
|  |     //   * https://godbolt.org/z/qrvrEh | ||||||
|  | 
 | ||||||
|  |     // Save callee-saved registers. | ||||||
|  |     stp     x29, x30, [sp, #-96]! | ||||||
|  |     stp     x28, x27, [sp, #16] | ||||||
|  |     stp     x26, x25, [sp, #32] | ||||||
|  |     stp     x24, x23, [sp, #48] | ||||||
|  |     stp     x22, x21, [sp, #64] | ||||||
|  |     stp     x20, x19, [sp, #80] | ||||||
|  | 
 | ||||||
|  |     // Scan the stack. | ||||||
|  |     mov     x0, sp | ||||||
|  |     bl      tinygo_scanstack | ||||||
|  | 
 | ||||||
|  |     // Restore stack state and return. | ||||||
|  |     ldp     x29, x30, [sp], #96 | ||||||
|  |     ret | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| // +build gc.conservative gc.extalloc | // +build gc.conservative gc.extalloc | ||||||
| // +build !baremetal | // +build wasm | ||||||
| 
 | 
 | ||||||
| package runtime | package runtime | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| // +build gc.conservative gc.extalloc | // +build gc.conservative gc.extalloc | ||||||
| // +build baremetal | // +build !wasm | ||||||
| 
 | 
 | ||||||
| package runtime | package runtime | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -8,19 +8,32 @@ type timeUnit int64 | ||||||
| 
 | 
 | ||||||
| const asyncScheduler = false | const asyncScheduler = false | ||||||
| 
 | 
 | ||||||
|  | var stackTop uintptr | ||||||
|  | 
 | ||||||
| func postinit() {} | func postinit() {} | ||||||
| 
 | 
 | ||||||
| // Entry point for Go. Initialize all packages and call main.main(). | // Entry point for Go. Initialize all packages and call main.main(). | ||||||
| //export main | //export main | ||||||
| func main() int { | func main() int { | ||||||
| 	preinit() | 	preinit() | ||||||
| 	run() | 
 | ||||||
|  | 	// Obtain the initial stack pointer right before calling the run() function. | ||||||
|  | 	// The run function has been moved to a separate (non-inlined) function so | ||||||
|  | 	// that the correct stack pointer is read. | ||||||
|  | 	stackTop = getCurrentStackPointer() | ||||||
|  | 	runMain() | ||||||
| 
 | 
 | ||||||
| 	// Call exit to correctly finish the program | 	// Call exit to correctly finish the program | ||||||
| 	// Without this, the application crashes at start, not sure why | 	// Without this, the application crashes at start, not sure why | ||||||
| 	return exit(0) | 	return exit(0) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // Must be a separate function to get the correct stack pointer. | ||||||
|  | //go:noinline | ||||||
|  | func runMain() { | ||||||
|  | 	run() | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // sleepTicks sleeps for the specified system ticks | // sleepTicks sleeps for the specified system ticks | ||||||
| func sleepTicks(d timeUnit) { | func sleepTicks(d timeUnit) { | ||||||
| 	sleepThread(uint64(ticksToNanoseconds(d))) | 	sleepThread(uint64(ticksToNanoseconds(d))) | ||||||
|  |  | ||||||
|  | @ -37,6 +37,8 @@ type timespec struct { | ||||||
| 
 | 
 | ||||||
| const CLOCK_MONOTONIC_RAW = 4 | const CLOCK_MONOTONIC_RAW = 4 | ||||||
| 
 | 
 | ||||||
|  | var stackTop uintptr | ||||||
|  | 
 | ||||||
| func postinit() {} | func postinit() {} | ||||||
| 
 | 
 | ||||||
| // Entry point for Go. Initialize all packages and call main.main(). | // Entry point for Go. Initialize all packages and call main.main(). | ||||||
|  | @ -44,12 +46,22 @@ func postinit() {} | ||||||
| func main() int { | func main() int { | ||||||
| 	preinit() | 	preinit() | ||||||
| 
 | 
 | ||||||
| 	run() | 	// Obtain the initial stack pointer right before calling the run() function. | ||||||
|  | 	// The run function has been moved to a separate (non-inlined) function so | ||||||
|  | 	// that the correct stack pointer is read. | ||||||
|  | 	stackTop = getCurrentStackPointer() | ||||||
|  | 	runMain() | ||||||
| 
 | 
 | ||||||
| 	// For libc compatibility. | 	// For libc compatibility. | ||||||
| 	return 0 | 	return 0 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // Must be a separate function to get the correct stack pointer. | ||||||
|  | //go:noinline | ||||||
|  | func runMain() { | ||||||
|  | 	run() | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func putchar(c byte) { | func putchar(c byte) { | ||||||
| 	_putchar(int(c)) | 	_putchar(int(c)) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -27,6 +27,7 @@ | ||||||
|   "linkerscript": "targets/nintendoswitch.ld", |   "linkerscript": "targets/nintendoswitch.ld", | ||||||
|   "extra-files": [ |   "extra-files": [ | ||||||
|     "targets/nintendoswitch.s", |     "targets/nintendoswitch.s", | ||||||
|  |     "src/runtime/gc_arm64.S", | ||||||
|     "src/runtime/runtime_nintendoswitch.s" |     "src/runtime/runtime_nintendoswitch.s" | ||||||
|   ] |   ] | ||||||
| } | } | ||||||
|  |  | ||||||
		Загрузка…
	
	Создание таблицы
		
		Сослаться в новой задаче
	
	 Ayke van Laethem
						Ayke van Laethem