replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)
pull/751/head
Daniel Martí 2 years ago
parent cee53a7868
commit 7d1bd13778

@ -57,8 +57,7 @@ func BenchmarkBuild(b *testing.B) {
outputBin := filepath.Join(tdir, "output")
sourceDir := filepath.Join(tdir, "src")
err := os.Mkdir(sourceDir, 0o777)
qt.Assert(b, err, qt.IsNil)
qt.Assert(b, os.Mkdir(sourceDir, 0o777), qt.IsNil)
writeSourceFile := func(name string, content []byte) {
err := os.WriteFile(filepath.Join(sourceDir, name), content, 0o666)
@ -72,13 +71,18 @@ func BenchmarkBuild(b *testing.B) {
b.ResetTimer()
b.StopTimer()
for i := 0; i < b.N; i++ {
// First we do a fresh build, using a new GOCACHE.
// and the second does an incremental rebuild reusing the cache.
gocache, err := os.MkdirTemp(tdir, "gocache-*")
qt.Assert(b, err, qt.IsNil)
// First we do a fresh build, using empty cache directories,
// and the second does an incremental rebuild reusing the same cache directories.
goCache := filepath.Join(tdir, "go-cache")
qt.Assert(b, os.RemoveAll(goCache), qt.IsNil)
qt.Assert(b, os.Mkdir(goCache, 0o777), qt.IsNil)
garbleCache := filepath.Join(tdir, "garble-cache")
qt.Assert(b, os.RemoveAll(garbleCache), qt.IsNil)
qt.Assert(b, os.Mkdir(garbleCache, 0o777), qt.IsNil)
env := append(os.Environ(),
"RUN_GARBLE_MAIN=true",
"GOCACHE="+gocache,
"GOCACHE="+goCache,
"GARBLE_CACHE="+garbleCache,
"GARBLE_WRITE_ALLOCS=true",
)
args := []string{"build", "-v", "-o=" + outputBin, sourceDir}

@ -137,6 +137,8 @@ func applyPatches(srcDir, workingDir string, modFiles map[string]bool, patches [
return mod, nil
}
// TODO: put linker binaries into fsCache in the main package
func cachePath() (string, error) {
var cacheDir string
if val, ok := os.LookupEnv(garbleCacheDir); ok {

@ -35,6 +35,7 @@ import (
"unicode"
"unicode/utf8"
"github.com/rogpeppe/go-internal/cache"
"golang.org/x/exp/slices"
"golang.org/x/mod/module"
"golang.org/x/mod/semver"
@ -397,7 +398,18 @@ func mainErr(args []string) error {
return commandReverse(args)
case "build", "test", "run":
cmd, err := toolexecCmd(command, args)
defer os.RemoveAll(os.Getenv("GARBLE_SHARED"))
defer func() {
if err := os.RemoveAll(os.Getenv("GARBLE_SHARED")); err != nil {
fmt.Fprintf(os.Stderr, "could not clean up GARBLE_SHARED: %v\n", err)
}
fsCache, err := openCache()
if err == nil {
err = fsCache.Trim()
}
if err != nil {
fmt.Fprintf(os.Stderr, "could not trim GARBLE_CACHE: %v\n", err)
}
}()
if err != nil {
return err
}
@ -893,56 +905,27 @@ func transformCompile(args []string) ([]string, error) {
}
files = append(files, file)
}
tf := newTransformer()
// cachedOutput is modified starting at this point, with the typecheck call.
// We use an extra syntax block to clarify what bits of code set up the caching.
// Note that if the file already exists in the cache from another build,
// we don't need to write to it again thanks to the hash.
// TODO: as an optimization, just load that one gob file.
{
if err := loadCachedOutputs(); err != nil {
return nil, err
}
if err := tf.typecheck(files); err != nil {
return nil, err
}
ssaProg := ssa.NewProgram(fset, 0)
tf := &transformer{}
// Create SSA packages for all imports.
// Order is not significant.
created := make(map[*types.Package]bool)
var createAll func(pkgs []*types.Package)
createAll = func(pkgs []*types.Package) {
for _, p := range pkgs {
if !created[p] {
created[p] = true
ssaProg.CreatePackage(p, nil, nil, true)
createAll(p.Imports())
}
}
}
createAll(tf.pkg.Imports())
ssaPkg := ssaProg.CreatePackage(tf.pkg, files, tf.info, false)
ssaPkg.Build()
tf.recordReflection(ssaPkg)
// Even if loadGarbleCache below finds a direct cache hit,
// other parts of garble still need type information to obfuscate.
// We could potentially avoid this by saving the type info we need in the cache,
// although in general that wouldn't help much, since it's rare for Go's cache
// to miss on a package and for our cache to hit.
if err := tf.typecheck(files); err != nil {
return nil, err
}
if err := tf.prefillObjectMaps(files); err != nil {
return nil, err
}
// NOTE: cachedOutput.KnownEmbeddedAliasFields is already filled by typecheck above.
// That's needed if loadCachedOutput is a miss, as we need to save the map.
// If loadCachedOutput is a hit, then it's still fine, as the map entries are the same.
if err := tf.loadCachedOutput(files); err != nil {
return nil, err
}
if err := writeGobExclusive(
garbleExportFile(curPkg),
cachedOutput,
); err != nil && !errors.Is(err, fs.ErrExist) {
return nil, err
}
if err := tf.prefillObjectMaps(files); err != nil {
return nil, err
}
// cachedOutput isn't modified after this point.
flags = alterTrimpath(flags)
newImportCfg, err := processImportCfg(flags)
@ -1013,6 +996,7 @@ func transformCompile(args []string) ([]string, error) {
// TODO(mvdan): replace this workaround with an actual fix if we can.
// This workaround is presumably worse on the build cache,
// as we end up with extra near-duplicate cached artifacts.
// TODO: can we remove this now with the better caching?
if i == 0 {
src = append(src, fmt.Sprintf(
"\nvar garbleActionID = %q\n", encodeBuildIDHash(curPkg.GarbleActionID),
@ -1279,9 +1263,13 @@ type (
// knownCannotObfuscateUnexported is like KnownCannotObfuscate but for
// unexported names. We don't need to store this in the build cache,
// because these names cannot be referenced by downstream packages.
//
// TODO: move inside cachedOutput once we allow loadCachedOutput to load
// the cache entry for curPkg.
// Otherwise, we only fill this when loadCachedOutput hits a cache miss for curPkg.
var knownCannotObfuscateUnexported = map[types.Object]bool{}
// cachedOutput contains information that will be stored as per garbleExportFile.
// cachedOutput contains information that will be stored in fsCache.
// Note that cachedOutput gets loaded from all direct package dependencies,
// and gets filled while obfuscating the current package, so it ends up
// containing entries for the current package and its transitive dependencies.
@ -1315,56 +1303,122 @@ var cachedOutput = struct {
KnownEmbeddedAliasFields: map[objectString]typeName{},
}
// garbleExportFile returns an absolute path to a build cache entry
// which belongs to garble and corresponds to the given Go package.
//
// Unlike pkg.Export, it is only read and written by garble itself.
// Also unlike pkg.Export, it includes GarbleActionID,
// so its path will change if the obfuscated build changes.
//
// The purpose of such a file is to store garble-specific information
// in the build cache, to be reused at a later time.
// The file should have the same lifetime as pkg.Export,
// as it lives under the same cache directory that gets trimmed automatically.
func garbleExportFile(pkg *listedPackage) string {
trimmed := strings.TrimSuffix(pkg.Export, "-d")
if trimmed == pkg.Export {
panic(fmt.Sprintf("unexpected export path of %s: %q", pkg.ImportPath, pkg.Export))
}
return trimmed + "-garble-" + encodeBuildIDHash(pkg.GarbleActionID) + "-d"
func openCache() (*cache.Cache, error) {
dir := os.Getenv("GARBLE_CACHE") // e.g. "~/.cache/garble"
if dir == "" {
parentDir, err := os.UserCacheDir()
if err != nil {
return nil, err
}
dir = filepath.Join(parentDir, "garble")
}
// Use a subdirectory for the hashed build cache, to clarify what it is,
// and to allow us to have other directories or files later on without mixing.
dir = filepath.Join(dir, "build")
if err := os.MkdirAll(dir, 0o777); err != nil {
return nil, err
}
return cache.Open(dir)
}
func loadCachedOutputs() error {
func (tf *transformer) loadCachedOutput(files []*ast.File) error {
fsCache, err := openCache()
if err != nil {
return err
}
if false { // TODO: re-enable once the problem described in knownCannotObfuscateUnexported is solved
filename, _, err := fsCache.GetFile(curPkg.GarbleActionID)
// Already in the cache; load it directly.
if err == nil {
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
if err := gob.NewDecoder(f).Decode(&cachedOutput); err != nil {
return fmt.Errorf("gob decode: %w", err)
}
return nil
}
}
// Not yet in the cache. Load the cache entries for all direct dependencies,
// build our cache entry, and write it to disk.
// Note that practically all errors from Cache.GetFile are a cache miss;
// for example, a file might exist but be empty if another process
// is filling the same cache entry concurrently.
//
// TODO: if A (curPkg) imports B and C, and B also imports C,
// then loading the gob files from both B and C is unnecessary;
// loading B's gob file would be enough. Is there an easy way to do that?
startTime := time.Now()
loaded := 0
for _, path := range curPkg.Deps {
for _, path := range curPkg.Imports {
if path == "C" {
// `go list -json` shows "C" in Imports but not Deps. A bug?
continue
}
pkg, err := listPackage(path)
if err != nil {
panic(err) // shouldn't happen
}
if pkg.Export == "" {
if pkg.BuildID == "" {
continue // nothing to load
}
// this function literal is used for the deferred close
if err := func() error {
filename := garbleExportFile(pkg)
filename, _, err := fsCache.GetFile(pkg.GarbleActionID)
if err != nil {
return err
}
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
// Decode appends new entries to the existing maps
if err := gob.NewDecoder(f).Decode(&cachedOutput); err != nil {
return fmt.Errorf("gob decode: %w", err)
}
return nil
}(); err != nil {
return fmt.Errorf("cannot load garble export file for %s: %w", path, err)
return fmt.Errorf("cannot load cache entry for %s: %w", path, err)
}
loaded++
}
log.Printf("%d cached output files loaded in %s", loaded, debugSince(startTime))
ssaProg := ssa.NewProgram(fset, 0)
// Create SSA packages for all imports.
// Order is not significant.
created := make(map[*types.Package]bool)
var createAll func(pkgs []*types.Package)
createAll = func(pkgs []*types.Package) {
for _, p := range pkgs {
if !created[p] {
created[p] = true
ssaProg.CreatePackage(p, nil, nil, true)
createAll(p.Imports())
}
}
}
createAll(tf.pkg.Imports())
ssaPkg := ssaProg.CreatePackage(tf.pkg, files, tf.info, false)
ssaPkg.Build()
tf.reflectCheckedAPIs = make(map[string]bool)
tf.recordReflection(ssaPkg)
// Unlikely that we could stream the gob encode, as cache.Put wants an io.ReadSeeker.
var buf bytes.Buffer
if err := gob.NewEncoder(&buf).Encode(cachedOutput); err != nil {
return err
}
if err := fsCache.PutBytes(curPkg.GarbleActionID, buf.Bytes()); err != nil {
return err
}
return nil
}
@ -1446,25 +1500,18 @@ type transformer struct {
reflectCheckedAPIs map[string]bool
}
// newTransformer helps initialize some maps.
func newTransformer() *transformer {
return &transformer{
info: &types.Info{
Types: make(map[ast.Expr]types.TypeAndValue),
Defs: make(map[*ast.Ident]types.Object),
Uses: make(map[*ast.Ident]types.Object),
Implicits: make(map[ast.Node]types.Object),
Scopes: make(map[ast.Node]*types.Scope),
Selections: make(map[*ast.SelectorExpr]*types.Selection),
Instances: make(map[*ast.Ident]types.Instance),
},
recordTypeDone: make(map[*types.Named]bool),
fieldToStruct: make(map[*types.Var]*types.Struct),
reflectCheckedAPIs: make(map[string]bool),
}
}
func (tf *transformer) typecheck(files []*ast.File) error {
tf.info = &types.Info{
Types: make(map[ast.Expr]types.TypeAndValue),
Defs: make(map[*ast.Ident]types.Object),
Uses: make(map[*ast.Ident]types.Object),
Implicits: make(map[ast.Node]types.Object),
Scopes: make(map[ast.Node]*types.Scope),
Selections: make(map[*ast.SelectorExpr]*types.Selection),
Instances: make(map[*ast.Ident]types.Instance),
}
tf.recordTypeDone = make(map[*types.Named]bool)
tf.fieldToStruct = make(map[*types.Var]*types.Struct)
origTypesConfig := types.Config{Importer: origImporter}
pkg, err := origTypesConfig.Check(curPkg.ImportPath, fset, files, tf.info)
if err != nil {

@ -100,9 +100,11 @@ func TestScript(t *testing.T) {
// Don't share cache dirs with the host if we want to collect code
// coverage. Otherwise, the coverage info might be incomplete.
env.Setenv("GOCACHE", filepath.Join(tempCacheDir, "go-cache"))
env.Setenv("GARBLE_CACHE_DIR", filepath.Join(tempCacheDir, "garble-cache"))
env.Setenv("GARBLE_CACHE", filepath.Join(tempCacheDir, "garble-cache"))
env.Setenv("GARBLE_CACHE_DIR", filepath.Join(tempCacheDir, "garble-cache-2"))
} else {
// GOCACHE is initialized by gotooltest to use the host's cache.
env.Setenv("GARBLE_CACHE", filepath.Join(hostCacheDir, "garble"))
env.Setenv("GARBLE_CACHE_DIR", hostCacheDir)
}
return nil
@ -132,7 +134,6 @@ func TestScript(t *testing.T) {
"generate-literals": generateLiterals,
"setenvfile": setenvfile,
"grepfiles": grepfiles,
"find-remove": findRemove,
},
UpdateScripts: *update,
RequireExplicitExec: true,
@ -378,36 +379,6 @@ func grepfiles(ts *testscript.TestScript, neg bool, args []string) {
}
}
func findRemove(ts *testscript.TestScript, neg bool, args []string) {
if neg {
ts.Fatalf("unsupported: ! find-remove")
}
if len(args) != 2 {
ts.Fatalf("usage: find-remove path pattern")
}
removed := 0
path, pattern := ts.MkAbs(args[0]), args[1]
rx := regexp.MustCompile(pattern)
if err := filepath.WalkDir(path, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if rx.MatchString(path) {
if err := os.Remove(path); err != nil {
return err
}
removed++
}
return nil
}); err != nil {
ts.Fatalf("%s", err)
}
if removed == 0 {
ts.Fatalf("no matching files to remove")
}
ts.Logf("removed %d matching files", removed)
}
func TestSplitFlagsFromArgs(t *testing.T) {
t.Parallel()
tests := []struct {

@ -93,7 +93,7 @@ One can reverse a captured panic stack trace as follows:
}
files = append(files, file)
}
tf := newTransformer()
tf := &transformer{}
if err := tf.typecheck(files); err != nil {
return err
}

@ -4,21 +4,22 @@
[short] stop # This step is slow by design, since it starts with an empty cache.
env GOCACHE=${WORK}/gocache
env GOCACHE=${WORK}/go-cache
env GARBLE_CACHE=${WORK}/garble-cache
# level1a has the regular Go build cached.
exec go build ./level1a
# level1b has the garble build cached, but our own cache files are gone.
exec garble build ./level1b
find-remove gocache '-garble-'
rm garble-cache
# level1c has the garble build cached with all files available.
exec garble build ./level1c
# TODO: this test now fails due to our fragile caching.
! exec garble build
stderr 'cannot load garble export file'
stderr 'cannot load cache entry for test/main/level1b'
# exec garble build
# exec ./main
# cmp stderr main.stderr

Loading…
Cancel
Save