ThinLTO optimizes across LLVM modules at link time. This means that
optimizations (such as inlining and const-propagation) are possible
between C and Go. This makes this change especially useful for CGo, but
not just for CGo. By doing some optimizations at link time, the linker
can discard some unused functions and this leads to a size reduction on
average. It does increase code size in some cases, but that's true for
most optimizations.

I've excluded a number of targets for now (wasm, avr, xtensa, windows,
macos). They can probably be supported with some more work, but that
should be done in separate PRs.

Overall, this change results in an average 3.24% size reduction over all
the tinygo.org/x/drivers smoke tests.

TODO: this commit runs part of the pass pipeline twice. We should set
the PrepareForThinLTO flag in the PassManagerBuilder for even further
reduced code size (0.7%) and improved compilation speed.
Этот коммит содержится в:
Ayke van Laethem 2022-02-07 15:19:25 +01:00 коммит произвёл Ron Evans
родитель d4b1467e4c
коммит 603fff78d4
8 изменённых файлов: 121 добавлений и 25 удалений

Просмотреть файл

@ -105,6 +105,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
} }
// Look up the build cache directory, which is used to speed up incremental
// builds.
cacheDir := goenv.Get("GOCACHE")
if cacheDir == "off" {
// Use temporary build directory instead, effectively disabling the
// build cache.
cacheDir = dir
}
// Check for a libc dependency. // Check for a libc dependency.
// As a side effect, this also creates the headers for the given libc, if // As a side effect, this also creates the headers for the given libc, if
// the libc needs them. // the libc needs them.
@ -238,12 +247,6 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
// Determine the path of the bitcode file (which is a serialized version // Determine the path of the bitcode file (which is a serialized version
// of a LLVM module). // of a LLVM module).
cacheDir := goenv.Get("GOCACHE")
if cacheDir == "off" {
// Use temporary build directory instead, effectively disabling the
// build cache.
cacheDir = dir
}
bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc") bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc")
packageBitcodePaths[pkg.ImportPath] = bitcodePath packageBitcodePaths[pkg.ImportPath] = bitcodePath
@ -416,7 +419,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
// Load and link all the bitcode files. This does not yet optimize // Load and link all the bitcode files. This does not yet optimize
// anything, it only links the bitcode files together. // anything, it only links the bitcode files together.
ctx := llvm.NewContext() ctx := llvm.NewContext()
mod = ctx.NewModule("") mod = ctx.NewModule("main")
for _, pkg := range lprogram.Sorted() { for _, pkg := range lprogram.Sorted() {
pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath]) pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath])
if err != nil { if err != nil {
@ -512,8 +515,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
} }
return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666) return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666)
case ".bc": case ".bc":
data := llvm.WriteBitcodeToMemoryBuffer(mod).Bytes() var buf llvm.MemoryBuffer
return ioutil.WriteFile(outpath, data, 0666) if config.UseThinLTO() {
buf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
} else {
buf = llvm.WriteBitcodeToMemoryBuffer(mod)
}
defer buf.Dispose()
return ioutil.WriteFile(outpath, buf.Bytes(), 0666)
case ".ll": case ".ll":
data := []byte(mod.String()) data := []byte(mod.String())
return ioutil.WriteFile(outpath, data, 0666) return ioutil.WriteFile(outpath, data, 0666)
@ -533,10 +542,17 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
dependencies: []*compileJob{programJob}, dependencies: []*compileJob{programJob},
result: objfile, result: objfile,
run: func(*compileJob) error { run: func(*compileJob) error {
llvmBuf, err := machine.EmitToMemoryBuffer(mod, llvm.ObjectFile) var llvmBuf llvm.MemoryBuffer
if err != nil { if config.UseThinLTO() {
return err llvmBuf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
} else {
var err error
llvmBuf, err = machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
if err != nil {
return err
}
} }
defer llvmBuf.Dispose()
return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666) return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666)
}, },
} }
@ -569,7 +585,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
job := &compileJob{ job := &compileJob{
description: "compile extra file " + path, description: "compile extra file " + path,
run: func(job *compileJob) error { run: func(job *compileJob) error {
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.Options.PrintCommands) result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.UseThinLTO(), config.Options.PrintCommands)
job.result = result job.result = result
return err return err
}, },
@ -587,7 +603,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
job := &compileJob{ job := &compileJob{
description: "compile CGo file " + abspath, description: "compile CGo file " + abspath,
run: func(job *compileJob) error { run: func(job *compileJob) error {
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.Options.PrintCommands) result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.UseThinLTO(), config.Options.PrintCommands)
job.result = result job.result = result
return err return err
}, },
@ -656,6 +672,24 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
if config.Options.PrintCommands != nil { if config.Options.PrintCommands != nil {
config.Options.PrintCommands(config.Target.Linker, ldflags...) config.Options.PrintCommands(config.Target.Linker, ldflags...)
} }
if config.UseThinLTO() {
ldflags = append(ldflags,
"--thinlto-cache-dir="+filepath.Join(cacheDir, "thinlto"),
"-plugin-opt=mcpu="+config.CPU(),
"-plugin-opt=O"+strconv.Itoa(optLevel),
"-plugin-opt=thinlto")
if config.CodeModel() != "default" {
ldflags = append(ldflags,
"-mllvm", "-code-model="+config.CodeModel())
}
if sizeLevel >= 2 {
// Workaround with roughly the same effect as
// https://reviews.llvm.org/D119342.
// Can hopefully be removed in LLVM 15.
ldflags = append(ldflags,
"-mllvm", "--rotation-max-header-size=0")
}
}
err = link(config.Target.Linker, ldflags...) err = link(config.Target.Linker, ldflags...)
if err != nil { if err != nil {
return &commandError{"failed to link", executable, err} return &commandError{"failed to link", executable, err}
@ -846,7 +880,7 @@ func optimizeProgram(mod llvm.Module, config *compileopts.Config) error {
} }
} }
if config.GOOS() != "darwin" { if config.GOOS() != "darwin" && !config.UseThinLTO() {
transform.ApplyFunctionSections(mod) // -ffunction-sections transform.ApplyFunctionSections(mod) // -ffunction-sections
} }

Просмотреть файл

@ -56,7 +56,7 @@ import (
// depfile but without invalidating its name. For this reason, the depfile is // depfile but without invalidating its name. For this reason, the depfile is
// written on each new compilation (even when it seems unnecessary). However, it // written on each new compilation (even when it seems unnecessary). However, it
// could in rare cases lead to a stale file fetched from the cache. // could in rare cases lead to a stale file fetched from the cache.
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) { func compileAndCacheCFile(abspath, tmpdir string, cflags []string, thinlto bool, printCommands func(string, ...string)) (string, error) {
// Hash input file. // Hash input file.
fileHash, err := hashFile(abspath) fileHash, err := hashFile(abspath)
if err != nil { if err != nil {
@ -67,6 +67,11 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock")) unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock"))
defer unlock() defer unlock()
ext := ".o"
if thinlto {
ext = ".bc"
}
// Create cache key for the dependencies file. // Create cache key for the dependencies file.
buf, err := json.Marshal(struct { buf, err := json.Marshal(struct {
Path string Path string
@ -99,7 +104,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
} }
// Obtain hashes of all the files listed as a dependency. // Obtain hashes of all the files listed as a dependency.
outpath, err := makeCFileCachePath(dependencies, depfileNameHash) outpath, err := makeCFileCachePath(dependencies, depfileNameHash, ext)
if err == nil { if err == nil {
if _, err := os.Stat(outpath); err == nil { if _, err := os.Stat(outpath); err == nil {
return outpath, nil return outpath, nil
@ -112,7 +117,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
return "", err return "", err
} }
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o") objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*"+ext)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -124,6 +129,9 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
depTmpFile.Close() depTmpFile.Close()
flags := append([]string{}, cflags...) // copy cflags flags := append([]string{}, cflags...) // copy cflags
flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies
if thinlto {
flags = append(flags, "-flto=thin")
}
flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath) flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath)
if strings.ToLower(filepath.Ext(abspath)) == ".s" { if strings.ToLower(filepath.Ext(abspath)) == ".s" {
// If this is an assembly file (.s or .S, lowercase or uppercase), then // If this is an assembly file (.s or .S, lowercase or uppercase), then
@ -181,7 +189,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
} }
// Move temporary object file to final location. // Move temporary object file to final location.
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash) outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash, ext)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -196,7 +204,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
// Create a cache path (a path in GOCACHE) to store the output of a compiler // Create a cache path (a path in GOCACHE) to store the output of a compiler
// job. This path is based on the dep file name (which is a hash of metadata // job. This path is based on the dep file name (which is a hash of metadata
// including compiler flags) and the hash of all input files in the paths slice. // including compiler flags) and the hash of all input files in the paths slice.
func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) { func makeCFileCachePath(paths []string, depfileNameHash, ext string) (string, error) {
// Hash all input files. // Hash all input files.
fileHashes := make(map[string]string, len(paths)) fileHashes := make(map[string]string, len(paths))
for _, path := range paths { for _, path := range paths {
@ -221,7 +229,7 @@ func makeCFileCachePath(paths []string, depfileNameHash string) (string, error)
outFileNameBuf := sha512.Sum512_224(buf) outFileNameBuf := sha512.Sum512_224(buf)
cacheKey := hex.EncodeToString(outFileNameBuf[:]) cacheKey := hex.EncodeToString(outFileNameBuf[:])
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o") outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+ext)
return outpath, nil return outpath, nil
} }

Просмотреть файл

@ -176,6 +176,34 @@ func (c *Config) AutomaticStackSize() bool {
return false return false
} }
// UseThinLTO returns whether ThinLTO should be used for the given target. Some
// targets (such as wasm) are not yet supported.
// We should try and remove as many exceptions as possible in the future, so
// that this optimization can be applied in more places.
func (c *Config) UseThinLTO() bool {
parts := strings.Split(c.Triple(), "-")
if parts[0] == "wasm32" {
// wasm-ld doesn't seem to support ThinLTO yet.
return false
}
if parts[0] == "avr" || parts[0] == "xtensa" {
// These use external (GNU) linkers which might perhaps support ThinLTO
// through a plugin, but it's too much hassle to set up.
return false
}
if len(parts) >= 2 && strings.HasPrefix(parts[2], "macos") {
// We use an external linker here at the moment.
return false
}
if len(parts) >= 2 && parts[2] == "windows" {
// Linker error (undefined runtime.trackedGlobalsBitmap) when linking
// for Windows. Disable it for now until that's figured out and fixed.
return false
}
// Other architectures support ThinLTO.
return true
}
// RP2040BootPatch returns whether the RP2040 boot patch should be applied that // RP2040BootPatch returns whether the RP2040 boot patch should be applied that
// calculates and patches in the checksum for the 2nd stage bootloader. // calculates and patches in the checksum for the 2nd stage bootloader.
func (c *Config) RP2040BootPatch() bool { func (c *Config) RP2040BootPatch() bool {

2
go.mod
Просмотреть файл

@ -15,5 +15,5 @@ require (
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9 golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9
gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v2 v2.4.0
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1
) )

4
go.sum
Просмотреть файл

@ -80,5 +80,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 h1:vQSFy0kNQegAfL/F6iyWQa4bF941Xc1gyJUkGy2m448= tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 h1:6G8AxueDdqobCEqQrmHPLaEH1AZ1p6Y7rGElDNT7N98=
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=

Просмотреть файл

@ -151,6 +151,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config, optLevel, sizeLevel i
funcPasses.FinalizeFunc() funcPasses.FinalizeFunc()
// Run module passes. // Run module passes.
// TODO: somehow set the PrepareForThinLTO flag in the pass manager builder.
modPasses := llvm.NewPassManager() modPasses := llvm.NewPassManager()
defer modPasses.Dispose() defer modPasses.Dispose()
builder.Populate(modPasses) builder.Populate(modPasses)

Просмотреть файл

@ -11,8 +11,9 @@ import (
// modified after linking. // modified after linking.
func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string { func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string {
functionMap := map[llvm.Value][]llvm.Value{} functionMap := map[llvm.Value][]llvm.Value{}
var functions []llvm.Value var functions []llvm.Value // ptrtoint values of functions
var functionNames []string var functionNames []string
var functionValues []llvm.Value // direct references to functions
for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) { for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) {
if use.FirstUse().IsNil() { if use.FirstUse().IsNil() {
// Apparently this stack size isn't used. // Apparently this stack size isn't used.
@ -23,6 +24,7 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
if _, ok := functionMap[ptrtoint]; !ok { if _, ok := functionMap[ptrtoint]; !ok {
functions = append(functions, ptrtoint) functions = append(functions, ptrtoint)
functionNames = append(functionNames, ptrtoint.Operand(0).Name()) functionNames = append(functionNames, ptrtoint.Operand(0).Name())
functionValues = append(functionValues, ptrtoint.Operand(0))
} }
functionMap[ptrtoint] = append(functionMap[ptrtoint], use) functionMap[ptrtoint] = append(functionMap[ptrtoint], use)
} }
@ -44,6 +46,9 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
} }
stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes)) stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes))
// Add all relevant values to llvm.used (for LTO).
appendToUsedGlobals(mod, append([]llvm.Value{stackSizesGlobal}, functionValues...)...)
// Replace the calls with loads from the new global with stack sizes. // Replace the calls with loads from the new global with stack sizes.
irbuilder := mod.Context().NewBuilder() irbuilder := mod.Context().NewBuilder()
defer irbuilder.Dispose() defer irbuilder.Dispose()
@ -62,3 +67,22 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
return functionNames return functionNames
} }
// Append the given values to the llvm.used array. The values can be any pointer
// type, they will be bitcast to i8*.
func appendToUsedGlobals(mod llvm.Module, values ...llvm.Value) {
if !mod.NamedGlobal("llvm.used").IsNil() {
// Sanity check. TODO: we don't emit such a global at the moment, but
// when we do we should append to it instead.
panic("todo: append to existing llvm.used")
}
i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0)
var castValues []llvm.Value
for _, value := range values {
castValues = append(castValues, llvm.ConstBitCast(value, i8ptrType))
}
usedInitializer := llvm.ConstArray(i8ptrType, castValues)
used := llvm.AddGlobal(mod, usedInitializer.Type(), "llvm.used")
used.SetInitializer(usedInitializer)
used.SetLinkage(llvm.AppendingLinkage)
}

1
transform/testdata/stacksize.out.ll предоставленный
Просмотреть файл

@ -2,6 +2,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv7m-none-eabi" target triple = "armv7m-none-eabi"
@"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes" @"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes"
@llvm.used = appending global [2 x i8*] [i8* bitcast ([1 x i32]* @"internal/task.stackSizes" to i8*), i8* bitcast (void (i8*)* @"runtime.run$1$gowrapper" to i8*)]
declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*) declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*)