ThinLTO optimizes across LLVM modules at link time. This means that
optimizations (such as inlining and const-propagation) are possible
between C and Go. This makes this change especially useful for CGo, but
not just for CGo. By doing some optimizations at link time, the linker
can discard some unused functions and this leads to a size reduction on
average. It does increase code size in some cases, but that's true for
most optimizations.

I've excluded a number of targets for now (wasm, avr, xtensa, windows,
macos). They can probably be supported with some more work, but that
should be done in separate PRs.

Overall, this change results in an average 3.24% size reduction over all
the tinygo.org/x/drivers smoke tests.

TODO: this commit runs part of the pass pipeline twice. We should set
the PrepareForThinLTO flag in the PassManagerBuilder for even further
reduced code size (0.7%) and improved compilation speed.
Этот коммит содержится в:
Ayke van Laethem 2022-02-07 15:19:25 +01:00 коммит произвёл Ron Evans
родитель d4b1467e4c
коммит 603fff78d4
8 изменённых файлов: 121 добавлений и 25 удалений

Просмотреть файл

@ -105,6 +105,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
defer os.RemoveAll(dir)
}
// Look up the build cache directory, which is used to speed up incremental
// builds.
cacheDir := goenv.Get("GOCACHE")
if cacheDir == "off" {
// Use temporary build directory instead, effectively disabling the
// build cache.
cacheDir = dir
}
// Check for a libc dependency.
// As a side effect, this also creates the headers for the given libc, if
// the libc needs them.
@ -238,12 +247,6 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
// Determine the path of the bitcode file (which is a serialized version
// of a LLVM module).
cacheDir := goenv.Get("GOCACHE")
if cacheDir == "off" {
// Use temporary build directory instead, effectively disabling the
// build cache.
cacheDir = dir
}
bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc")
packageBitcodePaths[pkg.ImportPath] = bitcodePath
@ -416,7 +419,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
// Load and link all the bitcode files. This does not yet optimize
// anything, it only links the bitcode files together.
ctx := llvm.NewContext()
mod = ctx.NewModule("")
mod = ctx.NewModule("main")
for _, pkg := range lprogram.Sorted() {
pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath])
if err != nil {
@ -512,8 +515,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
}
return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666)
case ".bc":
data := llvm.WriteBitcodeToMemoryBuffer(mod).Bytes()
return ioutil.WriteFile(outpath, data, 0666)
var buf llvm.MemoryBuffer
if config.UseThinLTO() {
buf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
} else {
buf = llvm.WriteBitcodeToMemoryBuffer(mod)
}
defer buf.Dispose()
return ioutil.WriteFile(outpath, buf.Bytes(), 0666)
case ".ll":
data := []byte(mod.String())
return ioutil.WriteFile(outpath, data, 0666)
@ -533,10 +542,17 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
dependencies: []*compileJob{programJob},
result: objfile,
run: func(*compileJob) error {
llvmBuf, err := machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
if err != nil {
return err
var llvmBuf llvm.MemoryBuffer
if config.UseThinLTO() {
llvmBuf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
} else {
var err error
llvmBuf, err = machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
if err != nil {
return err
}
}
defer llvmBuf.Dispose()
return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666)
},
}
@ -569,7 +585,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
job := &compileJob{
description: "compile extra file " + path,
run: func(job *compileJob) error {
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.Options.PrintCommands)
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.UseThinLTO(), config.Options.PrintCommands)
job.result = result
return err
},
@ -587,7 +603,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
job := &compileJob{
description: "compile CGo file " + abspath,
run: func(job *compileJob) error {
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.Options.PrintCommands)
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.UseThinLTO(), config.Options.PrintCommands)
job.result = result
return err
},
@ -656,6 +672,24 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
if config.Options.PrintCommands != nil {
config.Options.PrintCommands(config.Target.Linker, ldflags...)
}
if config.UseThinLTO() {
ldflags = append(ldflags,
"--thinlto-cache-dir="+filepath.Join(cacheDir, "thinlto"),
"-plugin-opt=mcpu="+config.CPU(),
"-plugin-opt=O"+strconv.Itoa(optLevel),
"-plugin-opt=thinlto")
if config.CodeModel() != "default" {
ldflags = append(ldflags,
"-mllvm", "-code-model="+config.CodeModel())
}
if sizeLevel >= 2 {
// Workaround with roughly the same effect as
// https://reviews.llvm.org/D119342.
// Can hopefully be removed in LLVM 15.
ldflags = append(ldflags,
"-mllvm", "--rotation-max-header-size=0")
}
}
err = link(config.Target.Linker, ldflags...)
if err != nil {
return &commandError{"failed to link", executable, err}
@ -846,7 +880,7 @@ func optimizeProgram(mod llvm.Module, config *compileopts.Config) error {
}
}
if config.GOOS() != "darwin" {
if config.GOOS() != "darwin" && !config.UseThinLTO() {
transform.ApplyFunctionSections(mod) // -ffunction-sections
}

Просмотреть файл

@ -56,7 +56,7 @@ import (
// depfile but without invalidating its name. For this reason, the depfile is
// written on each new compilation (even when it seems unnecessary). However, it
// could in rare cases lead to a stale file fetched from the cache.
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) {
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, thinlto bool, printCommands func(string, ...string)) (string, error) {
// Hash input file.
fileHash, err := hashFile(abspath)
if err != nil {
@ -67,6 +67,11 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock"))
defer unlock()
ext := ".o"
if thinlto {
ext = ".bc"
}
// Create cache key for the dependencies file.
buf, err := json.Marshal(struct {
Path string
@ -99,7 +104,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
}
// Obtain hashes of all the files listed as a dependency.
outpath, err := makeCFileCachePath(dependencies, depfileNameHash)
outpath, err := makeCFileCachePath(dependencies, depfileNameHash, ext)
if err == nil {
if _, err := os.Stat(outpath); err == nil {
return outpath, nil
@ -112,7 +117,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
return "", err
}
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o")
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*"+ext)
if err != nil {
return "", err
}
@ -124,6 +129,9 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
depTmpFile.Close()
flags := append([]string{}, cflags...) // copy cflags
flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies
if thinlto {
flags = append(flags, "-flto=thin")
}
flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath)
if strings.ToLower(filepath.Ext(abspath)) == ".s" {
// If this is an assembly file (.s or .S, lowercase or uppercase), then
@ -181,7 +189,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
}
// Move temporary object file to final location.
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash)
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash, ext)
if err != nil {
return "", err
}
@ -196,7 +204,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
// Create a cache path (a path in GOCACHE) to store the output of a compiler
// job. This path is based on the dep file name (which is a hash of metadata
// including compiler flags) and the hash of all input files in the paths slice.
func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) {
func makeCFileCachePath(paths []string, depfileNameHash, ext string) (string, error) {
// Hash all input files.
fileHashes := make(map[string]string, len(paths))
for _, path := range paths {
@ -221,7 +229,7 @@ func makeCFileCachePath(paths []string, depfileNameHash string) (string, error)
outFileNameBuf := sha512.Sum512_224(buf)
cacheKey := hex.EncodeToString(outFileNameBuf[:])
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o")
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+ext)
return outpath, nil
}

Просмотреть файл

@ -176,6 +176,34 @@ func (c *Config) AutomaticStackSize() bool {
return false
}
// UseThinLTO returns whether ThinLTO should be used for the given target. Some
// targets (such as wasm) are not yet supported.
// We should try and remove as many exceptions as possible in the future, so
// that this optimization can be applied in more places.
func (c *Config) UseThinLTO() bool {
parts := strings.Split(c.Triple(), "-")
if parts[0] == "wasm32" {
// wasm-ld doesn't seem to support ThinLTO yet.
return false
}
if parts[0] == "avr" || parts[0] == "xtensa" {
// These use external (GNU) linkers which might perhaps support ThinLTO
// through a plugin, but it's too much hassle to set up.
return false
}
if len(parts) >= 2 && strings.HasPrefix(parts[2], "macos") {
// We use an external linker here at the moment.
return false
}
if len(parts) >= 2 && parts[2] == "windows" {
// Linker error (undefined runtime.trackedGlobalsBitmap) when linking
// for Windows. Disable it for now until that's figured out and fixed.
return false
}
// Other architectures support ThinLTO.
return true
}
// RP2040BootPatch returns whether the RP2040 boot patch should be applied that
// calculates and patches in the checksum for the 2nd stage bootloader.
func (c *Config) RP2040BootPatch() bool {

2
go.mod
Просмотреть файл

@ -15,5 +15,5 @@ require (
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9
gopkg.in/yaml.v2 v2.4.0
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1
)

4
go.sum
Просмотреть файл

@ -80,5 +80,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 h1:vQSFy0kNQegAfL/F6iyWQa4bF941Xc1gyJUkGy2m448=
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 h1:6G8AxueDdqobCEqQrmHPLaEH1AZ1p6Y7rGElDNT7N98=
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=

Просмотреть файл

@ -151,6 +151,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config, optLevel, sizeLevel i
funcPasses.FinalizeFunc()
// Run module passes.
// TODO: somehow set the PrepareForThinLTO flag in the pass manager builder.
modPasses := llvm.NewPassManager()
defer modPasses.Dispose()
builder.Populate(modPasses)

Просмотреть файл

@ -11,8 +11,9 @@ import (
// modified after linking.
func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string {
functionMap := map[llvm.Value][]llvm.Value{}
var functions []llvm.Value
var functions []llvm.Value // ptrtoint values of functions
var functionNames []string
var functionValues []llvm.Value // direct references to functions
for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) {
if use.FirstUse().IsNil() {
// Apparently this stack size isn't used.
@ -23,6 +24,7 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
if _, ok := functionMap[ptrtoint]; !ok {
functions = append(functions, ptrtoint)
functionNames = append(functionNames, ptrtoint.Operand(0).Name())
functionValues = append(functionValues, ptrtoint.Operand(0))
}
functionMap[ptrtoint] = append(functionMap[ptrtoint], use)
}
@ -44,6 +46,9 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
}
stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes))
// Add all relevant values to llvm.used (for LTO).
appendToUsedGlobals(mod, append([]llvm.Value{stackSizesGlobal}, functionValues...)...)
// Replace the calls with loads from the new global with stack sizes.
irbuilder := mod.Context().NewBuilder()
defer irbuilder.Dispose()
@ -62,3 +67,22 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
return functionNames
}
// Append the given values to the llvm.used array. The values can be any pointer
// type, they will be bitcast to i8*.
func appendToUsedGlobals(mod llvm.Module, values ...llvm.Value) {
if !mod.NamedGlobal("llvm.used").IsNil() {
// Sanity check. TODO: we don't emit such a global at the moment, but
// when we do we should append to it instead.
panic("todo: append to existing llvm.used")
}
i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0)
var castValues []llvm.Value
for _, value := range values {
castValues = append(castValues, llvm.ConstBitCast(value, i8ptrType))
}
usedInitializer := llvm.ConstArray(i8ptrType, castValues)
used := llvm.AddGlobal(mod, usedInitializer.Type(), "llvm.used")
used.SetInitializer(usedInitializer)
used.SetLinkage(llvm.AppendingLinkage)
}

1
transform/testdata/stacksize.out.ll предоставленный
Просмотреть файл

@ -2,6 +2,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv7m-none-eabi"
@"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes"
@llvm.used = appending global [2 x i8*] [i8* bitcast ([1 x i32]* @"internal/task.stackSizes" to i8*), i8* bitcast (void (i8*)* @"runtime.run$1$gowrapper" to i8*)]
declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*)