all: add support for ThinLTO
ThinLTO optimizes across LLVM modules at link time. This means that optimizations (such as inlining and const-propagation) are possible between C and Go. This makes this change especially useful for CGo, but not just for CGo. By doing some optimizations at link time, the linker can discard some unused functions and this leads to a size reduction on average. It does increase code size in some cases, but that's true for most optimizations. I've excluded a number of targets for now (wasm, avr, xtensa, windows, macos). They can probably be supported with some more work, but that should be done in separate PRs. Overall, this change results in an average 3.24% size reduction over all the tinygo.org/x/drivers smoke tests. TODO: this commit runs part of the pass pipeline twice. We should set the PrepareForThinLTO flag in the PassManagerBuilder for even further reduced code size (0.7%) and improved compilation speed.
Этот коммит содержится в:
родитель
d4b1467e4c
коммит
603fff78d4
8 изменённых файлов: 121 добавлений и 25 удалений
|
@ -105,6 +105,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
defer os.RemoveAll(dir)
|
||||
}
|
||||
|
||||
// Look up the build cache directory, which is used to speed up incremental
|
||||
// builds.
|
||||
cacheDir := goenv.Get("GOCACHE")
|
||||
if cacheDir == "off" {
|
||||
// Use temporary build directory instead, effectively disabling the
|
||||
// build cache.
|
||||
cacheDir = dir
|
||||
}
|
||||
|
||||
// Check for a libc dependency.
|
||||
// As a side effect, this also creates the headers for the given libc, if
|
||||
// the libc needs them.
|
||||
|
@ -238,12 +247,6 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
|
||||
// Determine the path of the bitcode file (which is a serialized version
|
||||
// of a LLVM module).
|
||||
cacheDir := goenv.Get("GOCACHE")
|
||||
if cacheDir == "off" {
|
||||
// Use temporary build directory instead, effectively disabling the
|
||||
// build cache.
|
||||
cacheDir = dir
|
||||
}
|
||||
bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc")
|
||||
packageBitcodePaths[pkg.ImportPath] = bitcodePath
|
||||
|
||||
|
@ -416,7 +419,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
// Load and link all the bitcode files. This does not yet optimize
|
||||
// anything, it only links the bitcode files together.
|
||||
ctx := llvm.NewContext()
|
||||
mod = ctx.NewModule("")
|
||||
mod = ctx.NewModule("main")
|
||||
for _, pkg := range lprogram.Sorted() {
|
||||
pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath])
|
||||
if err != nil {
|
||||
|
@ -512,8 +515,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
}
|
||||
return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666)
|
||||
case ".bc":
|
||||
data := llvm.WriteBitcodeToMemoryBuffer(mod).Bytes()
|
||||
return ioutil.WriteFile(outpath, data, 0666)
|
||||
var buf llvm.MemoryBuffer
|
||||
if config.UseThinLTO() {
|
||||
buf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
|
||||
} else {
|
||||
buf = llvm.WriteBitcodeToMemoryBuffer(mod)
|
||||
}
|
||||
defer buf.Dispose()
|
||||
return ioutil.WriteFile(outpath, buf.Bytes(), 0666)
|
||||
case ".ll":
|
||||
data := []byte(mod.String())
|
||||
return ioutil.WriteFile(outpath, data, 0666)
|
||||
|
@ -533,10 +542,17 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
dependencies: []*compileJob{programJob},
|
||||
result: objfile,
|
||||
run: func(*compileJob) error {
|
||||
llvmBuf, err := machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
|
||||
if err != nil {
|
||||
return err
|
||||
var llvmBuf llvm.MemoryBuffer
|
||||
if config.UseThinLTO() {
|
||||
llvmBuf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod)
|
||||
} else {
|
||||
var err error
|
||||
llvmBuf, err = machine.EmitToMemoryBuffer(mod, llvm.ObjectFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
defer llvmBuf.Dispose()
|
||||
return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666)
|
||||
},
|
||||
}
|
||||
|
@ -569,7 +585,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
job := &compileJob{
|
||||
description: "compile extra file " + path,
|
||||
run: func(job *compileJob) error {
|
||||
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.Options.PrintCommands)
|
||||
result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.UseThinLTO(), config.Options.PrintCommands)
|
||||
job.result = result
|
||||
return err
|
||||
},
|
||||
|
@ -587,7 +603,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
job := &compileJob{
|
||||
description: "compile CGo file " + abspath,
|
||||
run: func(job *compileJob) error {
|
||||
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.Options.PrintCommands)
|
||||
result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.UseThinLTO(), config.Options.PrintCommands)
|
||||
job.result = result
|
||||
return err
|
||||
},
|
||||
|
@ -656,6 +672,24 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
|
|||
if config.Options.PrintCommands != nil {
|
||||
config.Options.PrintCommands(config.Target.Linker, ldflags...)
|
||||
}
|
||||
if config.UseThinLTO() {
|
||||
ldflags = append(ldflags,
|
||||
"--thinlto-cache-dir="+filepath.Join(cacheDir, "thinlto"),
|
||||
"-plugin-opt=mcpu="+config.CPU(),
|
||||
"-plugin-opt=O"+strconv.Itoa(optLevel),
|
||||
"-plugin-opt=thinlto")
|
||||
if config.CodeModel() != "default" {
|
||||
ldflags = append(ldflags,
|
||||
"-mllvm", "-code-model="+config.CodeModel())
|
||||
}
|
||||
if sizeLevel >= 2 {
|
||||
// Workaround with roughly the same effect as
|
||||
// https://reviews.llvm.org/D119342.
|
||||
// Can hopefully be removed in LLVM 15.
|
||||
ldflags = append(ldflags,
|
||||
"-mllvm", "--rotation-max-header-size=0")
|
||||
}
|
||||
}
|
||||
err = link(config.Target.Linker, ldflags...)
|
||||
if err != nil {
|
||||
return &commandError{"failed to link", executable, err}
|
||||
|
@ -846,7 +880,7 @@ func optimizeProgram(mod llvm.Module, config *compileopts.Config) error {
|
|||
}
|
||||
}
|
||||
|
||||
if config.GOOS() != "darwin" {
|
||||
if config.GOOS() != "darwin" && !config.UseThinLTO() {
|
||||
transform.ApplyFunctionSections(mod) // -ffunction-sections
|
||||
}
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ import (
|
|||
// depfile but without invalidating its name. For this reason, the depfile is
|
||||
// written on each new compilation (even when it seems unnecessary). However, it
|
||||
// could in rare cases lead to a stale file fetched from the cache.
|
||||
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) {
|
||||
func compileAndCacheCFile(abspath, tmpdir string, cflags []string, thinlto bool, printCommands func(string, ...string)) (string, error) {
|
||||
// Hash input file.
|
||||
fileHash, err := hashFile(abspath)
|
||||
if err != nil {
|
||||
|
@ -67,6 +67,11 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
|
|||
unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock"))
|
||||
defer unlock()
|
||||
|
||||
ext := ".o"
|
||||
if thinlto {
|
||||
ext = ".bc"
|
||||
}
|
||||
|
||||
// Create cache key for the dependencies file.
|
||||
buf, err := json.Marshal(struct {
|
||||
Path string
|
||||
|
@ -99,7 +104,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
|
|||
}
|
||||
|
||||
// Obtain hashes of all the files listed as a dependency.
|
||||
outpath, err := makeCFileCachePath(dependencies, depfileNameHash)
|
||||
outpath, err := makeCFileCachePath(dependencies, depfileNameHash, ext)
|
||||
if err == nil {
|
||||
if _, err := os.Stat(outpath); err == nil {
|
||||
return outpath, nil
|
||||
|
@ -112,7 +117,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
|
|||
return "", err
|
||||
}
|
||||
|
||||
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o")
|
||||
objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*"+ext)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
@ -124,6 +129,9 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
|
|||
depTmpFile.Close()
|
||||
flags := append([]string{}, cflags...) // copy cflags
|
||||
flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies
|
||||
if thinlto {
|
||||
flags = append(flags, "-flto=thin")
|
||||
}
|
||||
flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath)
|
||||
if strings.ToLower(filepath.Ext(abspath)) == ".s" {
|
||||
// If this is an assembly file (.s or .S, lowercase or uppercase), then
|
||||
|
@ -181,7 +189,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
|
|||
}
|
||||
|
||||
// Move temporary object file to final location.
|
||||
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash)
|
||||
outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash, ext)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
@ -196,7 +204,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands
|
|||
// Create a cache path (a path in GOCACHE) to store the output of a compiler
|
||||
// job. This path is based on the dep file name (which is a hash of metadata
|
||||
// including compiler flags) and the hash of all input files in the paths slice.
|
||||
func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) {
|
||||
func makeCFileCachePath(paths []string, depfileNameHash, ext string) (string, error) {
|
||||
// Hash all input files.
|
||||
fileHashes := make(map[string]string, len(paths))
|
||||
for _, path := range paths {
|
||||
|
@ -221,7 +229,7 @@ func makeCFileCachePath(paths []string, depfileNameHash string) (string, error)
|
|||
outFileNameBuf := sha512.Sum512_224(buf)
|
||||
cacheKey := hex.EncodeToString(outFileNameBuf[:])
|
||||
|
||||
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o")
|
||||
outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+ext)
|
||||
return outpath, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -176,6 +176,34 @@ func (c *Config) AutomaticStackSize() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// UseThinLTO returns whether ThinLTO should be used for the given target. Some
|
||||
// targets (such as wasm) are not yet supported.
|
||||
// We should try and remove as many exceptions as possible in the future, so
|
||||
// that this optimization can be applied in more places.
|
||||
func (c *Config) UseThinLTO() bool {
|
||||
parts := strings.Split(c.Triple(), "-")
|
||||
if parts[0] == "wasm32" {
|
||||
// wasm-ld doesn't seem to support ThinLTO yet.
|
||||
return false
|
||||
}
|
||||
if parts[0] == "avr" || parts[0] == "xtensa" {
|
||||
// These use external (GNU) linkers which might perhaps support ThinLTO
|
||||
// through a plugin, but it's too much hassle to set up.
|
||||
return false
|
||||
}
|
||||
if len(parts) >= 2 && strings.HasPrefix(parts[2], "macos") {
|
||||
// We use an external linker here at the moment.
|
||||
return false
|
||||
}
|
||||
if len(parts) >= 2 && parts[2] == "windows" {
|
||||
// Linker error (undefined runtime.trackedGlobalsBitmap) when linking
|
||||
// for Windows. Disable it for now until that's figured out and fixed.
|
||||
return false
|
||||
}
|
||||
// Other architectures support ThinLTO.
|
||||
return true
|
||||
}
|
||||
|
||||
// RP2040BootPatch returns whether the RP2040 boot patch should be applied that
|
||||
// calculates and patches in the checksum for the 2nd stage bootloader.
|
||||
func (c *Config) RP2040BootPatch() bool {
|
||||
|
|
2
go.mod
2
go.mod
|
@ -15,5 +15,5 @@ require (
|
|||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
|
||||
golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3
|
||||
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1
|
||||
)
|
||||
|
|
4
go.sum
4
go.sum
|
@ -80,5 +80,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
|
|||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 h1:vQSFy0kNQegAfL/F6iyWQa4bF941Xc1gyJUkGy2m448=
|
||||
tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=
|
||||
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 h1:6G8AxueDdqobCEqQrmHPLaEH1AZ1p6Y7rGElDNT7N98=
|
||||
tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0=
|
||||
|
|
|
@ -151,6 +151,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config, optLevel, sizeLevel i
|
|||
funcPasses.FinalizeFunc()
|
||||
|
||||
// Run module passes.
|
||||
// TODO: somehow set the PrepareForThinLTO flag in the pass manager builder.
|
||||
modPasses := llvm.NewPassManager()
|
||||
defer modPasses.Dispose()
|
||||
builder.Populate(modPasses)
|
||||
|
|
|
@ -11,8 +11,9 @@ import (
|
|||
// modified after linking.
|
||||
func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string {
|
||||
functionMap := map[llvm.Value][]llvm.Value{}
|
||||
var functions []llvm.Value
|
||||
var functions []llvm.Value // ptrtoint values of functions
|
||||
var functionNames []string
|
||||
var functionValues []llvm.Value // direct references to functions
|
||||
for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) {
|
||||
if use.FirstUse().IsNil() {
|
||||
// Apparently this stack size isn't used.
|
||||
|
@ -23,6 +24,7 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
|
|||
if _, ok := functionMap[ptrtoint]; !ok {
|
||||
functions = append(functions, ptrtoint)
|
||||
functionNames = append(functionNames, ptrtoint.Operand(0).Name())
|
||||
functionValues = append(functionValues, ptrtoint.Operand(0))
|
||||
}
|
||||
functionMap[ptrtoint] = append(functionMap[ptrtoint], use)
|
||||
}
|
||||
|
@ -44,6 +46,9 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
|
|||
}
|
||||
stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes))
|
||||
|
||||
// Add all relevant values to llvm.used (for LTO).
|
||||
appendToUsedGlobals(mod, append([]llvm.Value{stackSizesGlobal}, functionValues...)...)
|
||||
|
||||
// Replace the calls with loads from the new global with stack sizes.
|
||||
irbuilder := mod.Context().NewBuilder()
|
||||
defer irbuilder.Dispose()
|
||||
|
@ -62,3 +67,22 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string
|
|||
|
||||
return functionNames
|
||||
}
|
||||
|
||||
// Append the given values to the llvm.used array. The values can be any pointer
|
||||
// type, they will be bitcast to i8*.
|
||||
func appendToUsedGlobals(mod llvm.Module, values ...llvm.Value) {
|
||||
if !mod.NamedGlobal("llvm.used").IsNil() {
|
||||
// Sanity check. TODO: we don't emit such a global at the moment, but
|
||||
// when we do we should append to it instead.
|
||||
panic("todo: append to existing llvm.used")
|
||||
}
|
||||
i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0)
|
||||
var castValues []llvm.Value
|
||||
for _, value := range values {
|
||||
castValues = append(castValues, llvm.ConstBitCast(value, i8ptrType))
|
||||
}
|
||||
usedInitializer := llvm.ConstArray(i8ptrType, castValues)
|
||||
used := llvm.AddGlobal(mod, usedInitializer.Type(), "llvm.used")
|
||||
used.SetInitializer(usedInitializer)
|
||||
used.SetLinkage(llvm.AppendingLinkage)
|
||||
}
|
||||
|
|
1
transform/testdata/stacksize.out.ll
предоставленный
1
transform/testdata/stacksize.out.ll
предоставленный
|
@ -2,6 +2,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
|||
target triple = "armv7m-none-eabi"
|
||||
|
||||
@"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes"
|
||||
@llvm.used = appending global [2 x i8*] [i8* bitcast ([1 x i32]* @"internal/task.stackSizes" to i8*), i8* bitcast (void (i8*)* @"runtime.run$1$gowrapper" to i8*)]
|
||||
|
||||
declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*)
|
||||
|
||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче