From b234cef3354e9a3cea9350dc1f486694a19b3f60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Wed, 7 Oct 2020 23:17:07 +0100 Subject: [PATCH] initial support for build caching As per the discussion in https://github.com/golang/go/issues/41145, it turns out that we don't need special support for build caching in -toolexec. We can simply modify the behavior of "[...]/compile -V=full" and "[...]/link -V=full" so that they include garble's own version and options in the printed build ID. The part of the build ID that matters is the last, since it's the "content ID" which is used to work out whether there is a need to redo the action (build) or not. Since cmd/go parses the last word in the output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}". The slashes let us imitate a full binary build ID, but we assume that the other components such as the action ID are not necessary, since the only reader here is cmd/go and it only consumes the content ID. The reported content ID includes the tool's original content ID, garble's own content ID from the built binary, and the garble options which modify how we obfuscate code. If any of the three changes, we should use a different build cache key. GOPRIVATE also affects caching, since a different GOPRIVATE value means that we might have to garble a different set of packages. Include tests, which mainly check that 'garble build -v' prints package lines when we expect to always need to rebuild packages, and that it prints nothing when we should be reusing the build cache even when the built binary is missing. After this change, 'go test' on Go 1.15.2 stabilizes at about 8s on my machine, whereas it used to be at around 25s before. --- README.md | 5 +- import_obfuscation.go | 2 +- main.go | 181 +++++++++++++++++++++++++++-------- testdata/scripts/basic.txt | 6 +- testdata/scripts/imports.txt | 4 +- testdata/scripts/seed.txt | 15 ++- 6 files changed, 162 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 2cd5837..b9d6d2a 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ information about the original source code as possible. The tool is designed to be: -* Coupled with `cmd/go`, to support both `GOPATH` and modules with ease +* Coupled with `cmd/go`, to support modules and build caching * Deterministic and reproducible, given the same initial source code * Reversible given the original source, to un-garble panic stack traces @@ -44,9 +44,6 @@ packages to garble, set `GOPRIVATE`, documented at `go help module-private`. Most of these can improve with time and effort. The purpose of this section is to document the current shortcomings of this tool. -* Build caching is not supported, so large projects will likely be slow to - build. See [golang/go#41145](https://github.com/golang/go/issues/41145). - * Exported methods and fields are never garbled at the moment, since they could be required by interfaces and reflection. This area is a work in progress. diff --git a/import_obfuscation.go b/import_obfuscation.go index 9337085..f95c9ed 100644 --- a/import_obfuscation.go +++ b/import_obfuscation.go @@ -324,7 +324,7 @@ func hashImport(pkg string, garbledImports map[string]string) string { return garbledPkg } - garbledPkg := hashWith(buildInfo.imports[pkg].buildID, pkg) + garbledPkg := hashWith(buildInfo.imports[pkg].actionID, pkg) garbledImports[pkg] = garbledPkg return garbledPkg diff --git a/main.go b/main.go index 3682470..f7f9207 100644 --- a/main.go +++ b/main.go @@ -99,7 +99,7 @@ var ( })} buildInfo = struct { - buildID string // from -buildid + actionID []byte // from -buildid importCfg string // from -importcfg // TODO: replace part of this with goobj.ParseImportCfg, so that @@ -218,7 +218,7 @@ func garbledImport(path string) (*types.Package, error) { type importedPkg struct { packagefile string - buildID string + actionID []byte pkg *types.Package } @@ -260,7 +260,7 @@ How to install Go: https://golang.org/doc/install return false } - rawVersion := string(bytes.TrimPrefix(bytes.TrimSpace(out), []byte("go version "))) + rawVersion := strings.TrimPrefix(strings.TrimSpace(string(out)), "go version ") tagIdx := strings.IndexByte(rawVersion, ' ') tag := rawVersion[:tagIdx] @@ -424,10 +424,13 @@ func mainErr(args []string) error { } goArgs := []string{ cmd, - "-a", "-trimpath", "-toolexec=" + execPath, } + if flagDebugDir != "" { + // TODO: don't make -debugdir force rebuilding all packages + goArgs = append(goArgs, "-a") + } if cmd == "test" { // vet is generally not useful on garbled code; keep it // disabled by default. @@ -455,6 +458,45 @@ func mainErr(args []string) error { transformed := args[1:] // log.Println(tool, transformed) if transform != nil { + if len(args) == 2 && args[1] == "-V=full" { + cmd := exec.Command(args[0], args[1:]...) + out, err := cmd.Output() + if err != nil { + if err, _ := err.(*exec.ExitError); err != nil { + return fmt.Errorf("%v: %s", err, err.Stderr) + } + return err + } + line := string(bytes.TrimSpace(out)) + f := strings.Fields(line) + if len(f) < 3 || f[0] != tool || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") { + return fmt.Errorf("%s -V=full: unexpected output:\n\t%s", args[0], line) + } + var toolID []byte + if f[2] == "devel" { + // On the development branch, use the content ID part of the build ID. + toolID = decodeHash(contentID(f[len(f)-1])) + } else { + // For a release, the output is like: "compile version go1.9.1 X:framepointer". + // Use the whole line. + toolID = []byte(line) + } + + out = bytes.TrimSpace(out) // no trailing newline + contentID, err := ownContentID(toolID) + if err != nil { + return fmt.Errorf("cannot obtain garble's own version: %v", err) + } + // The part of the build ID that matters is the last, since it's the + // "content ID" which is used to work out whether there is a need to redo + // the action (build) or not. Since cmd/go parses the last word in the + // output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}". + // The slashes let us imitate a full binary build ID, but we assume that + // the other components such as the action ID are not necessary, since the + // only reader here is cmd/go and it only consumes the content ID. + fmt.Printf("%s +garble buildID=_/_/_/%s\n", line, contentID) + return nil + } var err error if transformed, err = transform(transformed); err != nil { return err @@ -476,6 +518,38 @@ func mainErr(args []string) error { return nil } +const buildIDSeparator = "/" + +// actionID returns the action ID half of a build ID, the first element. +func actionID(buildID string) string { + i := strings.Index(buildID, buildIDSeparator) + if i < 0 { + return buildID + } + return buildID[:i] +} + +// contentID returns the content ID half of a build ID, the last element. +func contentID(buildID string) string { + return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:] +} + +// decodeHash isthe opposite of hashToString, but with a panic for error +// handling since it should never happen. +func decodeHash(str string) []byte { + h, err := base64.RawURLEncoding.DecodeString(str) + if err != nil { + panic(fmt.Sprintf("invalid hash %q: %v", str, err)) + } + return h +} + +// hashToString encodes the first 120 bits of a sha256 sum in base64, the same +// format used for elements in a build ID. +func hashToString(h []byte) string { + return base64.RawURLEncoding.EncodeToString(h[:15]) +} + var transformFuncs = map[string]func([]string) ([]string, error){ "compile": transformCompile, "link": transformLink, @@ -484,10 +558,6 @@ var transformFuncs = map[string]func([]string) ([]string, error){ func transformCompile(args []string) ([]string, error) { var err error flags, paths := splitFlagsFromFiles(args, ".go") - if len(paths) == 0 { - // Nothing to transform; probably just ["-V=full"]. - return args, nil - } // We will force the linker to drop DWARF via -w, so don't spend time // generating it. @@ -542,7 +612,7 @@ func transformCompile(args []string) ([]string, error) { mathrand.Seed(int64(binary.BigEndian.Uint64(seed))) } else { - mathrand.Seed(int64(binary.BigEndian.Uint64([]byte(buildInfo.buildID)))) + mathrand.Seed(int64(binary.BigEndian.Uint64([]byte(buildInfo.actionID)))) } info := &types.Info{ @@ -737,12 +807,12 @@ func isPrivate(path string) bool { // fillBuildInfo initializes the global buildInfo struct via the supplied flags. func fillBuildInfo(flags []string) error { - buildInfo.buildID = flagValue(flags, "-buildid") - switch buildInfo.buildID { + buildID := flagValue(flags, "-buildid") + switch buildID { case "", "true": return fmt.Errorf("could not find -buildid argument") } - buildInfo.buildID = trimBuildID(buildInfo.buildID) + buildInfo.actionID = decodeHash(actionID(buildID)) buildInfo.importCfg = flagValue(flags, "-importcfg") if buildInfo.importCfg == "" { return fmt.Errorf("could not find -importcfg argument") @@ -769,32 +839,24 @@ func fillBuildInfo(flags []string) error { continue } importPath, objectPath := args[:j], args[j+1:] - fileID, err := buildidOf(objectPath) + buildID, err := buildidOf(objectPath) if err != nil { return err } - // log.Println("buildid:", fileID) + // log.Println("buildid:", buildID) if len(buildInfo.imports) == 0 { buildInfo.firstImport = importPath } buildInfo.imports[importPath] = importedPkg{ packagefile: objectPath, - buildID: fileID, + actionID: decodeHash(actionID(buildID)), } } // log.Printf("%#v", buildInfo) return nil } -func trimBuildID(id string) string { - id = strings.TrimSpace(id) - if i := strings.IndexByte(id, '/'); i > 0 { - id = id[:i] - } - return id -} - func buildidOf(path string) (string, error) { cmd := exec.Command("go", "tool", "buildid", path) out, err := cmd.Output() @@ -804,19 +866,19 @@ func buildidOf(path string) (string, error) { } return "", err } - return trimBuildID(string(bytes.TrimSpace(out))), nil + return string(out), nil } -func hashWith(salt, value string) string { +func hashWith(salt []byte, name string) string { const length = 4 d := sha256.New() - io.WriteString(d, salt) + d.Write(salt) d.Write(seed) - io.WriteString(d, value) + io.WriteString(d, name) sum := b64.EncodeToString(d.Sum(nil)) - if token.IsExported(value) { + if token.IsExported(name) { return "Z" + sum[:length] } return "z" + sum[:length] @@ -1030,12 +1092,12 @@ func transformGo(file *ast.File, info *types.Info, blacklist map[types.Object]st default: return true // we only want to rename the above } - buildID := buildInfo.buildID + actionID := buildInfo.actionID path := pkg.Path() if !isPrivate(path) { return true // only private packages are transformed } - if id := buildInfo.imports[path].buildID; id != "" { + if id := buildInfo.imports[path].actionID; len(id) > 0 { garbledPkg, err := garbledImport(path) if err != nil { panic(err) // shouldn't happen @@ -1044,12 +1106,12 @@ func transformGo(file *ast.File, info *types.Info, blacklist map[types.Object]st if garbledPkg.Scope().Lookup(obj.Name()) != nil { return true } - buildID = id + actionID = id } // The exported names cannot be shortened as counter synchronization between packages is not currently implemented if token.IsExported(node.Name) { - node.Name = hashWith(buildID, node.Name) + node.Name = hashWith(actionID, node.Name) return true } @@ -1071,7 +1133,7 @@ func transformGo(file *ast.File, info *types.Info, blacklist map[types.Object]st // orig := node.Name privateNameMap[fullName] = name node.Name = name - // log.Printf("%q hashed with %q to %q", orig, buildID, node.Name) + // log.Printf("%q hashed with %q to %q", orig, actionID, node.Name) return true } return astutil.Apply(file, pre, nil).(*ast.File) @@ -1117,11 +1179,9 @@ func isTestSignature(sign *types.Signature) bool { } func transformLink(args []string) ([]string, error) { - flags, paths := splitFlagsFromFiles(args, ".a") - if len(paths) == 0 { - // Nothing to transform; probably just ["-V=full"]. - return args, nil - } + // We can't split by the ".a" extension, because cached object files + // lack any extension. + flags, paths := splitFlagsFromArgs(args) if err := fillBuildInfo(flags); err != nil { return nil, err @@ -1168,7 +1228,7 @@ func transformLink(args []string) ([]string, error) { // the import config map. pkgPath = buildInfo.firstImport } - if id := buildInfo.imports[pkgPath].buildID; id != "" { + if id := buildInfo.imports[pkgPath].actionID; len(id) > 0 { // If the name is not in the map file, it means that the name was not obfuscated or is public newName, ok := privateNameMap[pkg+"."+name] if !ok { @@ -1342,3 +1402,46 @@ func flagSetValue(flags []string, name, value string) []string { } return append(flags, name+"="+value) } + +func ownContentID(toolID []byte) (string, error) { + // We can't rely on the module version to exist, because it's + // missing in local builds without 'go get'. + // For now, use 'go tool buildid' on the binary that's running. Just + // like Go's own cache, we use hex-encoded sha256 sums. + // Once https://github.com/golang/go/issues/37475 is fixed, we + // can likely just use that. + path, err := os.Executable() + if err != nil { + return "", err + } + buildID, err := buildidOf(path) + if err != nil { + return "", err + } + ownID := decodeHash(contentID(buildID)) + + // Join the two content IDs together into a single base64-encoded sha256 + // sum. This includes the original tool's content ID, and garble's own + // content ID. + h := sha256.New() + h.Write(toolID) + h.Write(ownID) + + // We also need to add the selected options to the full version string, + // because all of them result in different output. We use spaces to + // separate the env vars and flags, to reduce the chances of collisions. + if envGoPrivate != "" { + fmt.Fprintf(h, " GOPRIVATE=%s", envGoPrivate) + } + if envGarbleLiterals { + fmt.Fprintf(h, " -literals") + } + if envGarbleTiny { + fmt.Fprintf(h, " -tiny") + } + if envGarbleSeed != "" { + fmt.Fprintf(h, " -seed=%x", envGarbleSeed) + } + + return hashToString(h.Sum(nil)), nil +} diff --git a/testdata/scripts/basic.txt b/testdata/scripts/basic.txt index 64bc2c2..4f86b65 100644 --- a/testdata/scripts/basic.txt +++ b/testdata/scripts/basic.txt @@ -32,9 +32,11 @@ stdout 'unknown' stderr 'should be used alongside -trimpath' # Also check that the binary is reproducible. +# No packages should be rebuilt either, thanks to the build cache. cp main$exe main_old$exe rm main$exe -garble build main.go +garble build -v main.go +! stderr . bincmp main$exe main_old$exe # Check that the program works as expected without garble. No need to verify @@ -53,6 +55,8 @@ cmp stderr main.stderr binsubstr main$exe 'main.go' 'globalVar' 'globalFunc' $gofullversion [!windows] binsubstr main$exe ${WORK@R} +-- go.mod -- +module test/mainfoo -- main.go -- package main diff --git a/testdata/scripts/imports.txt b/testdata/scripts/imports.txt index 646c34c..47c9c10 100644 --- a/testdata/scripts/imports.txt +++ b/testdata/scripts/imports.txt @@ -10,9 +10,11 @@ cmp stdout main.stdout [short] stop # checking that the build is reproducible is slow # Also check that the binary is reproducible when many imports are involved. +# No packages should be rebuilt either, thanks to the build cache. cp main$exe main_old$exe rm main$exe -garble build -tags buildtag +garble build -tags buildtag -v +! stderr . bincmp main$exe main_old$exe go build -tags buildtag diff --git a/testdata/scripts/seed.txt b/testdata/scripts/seed.txt index c49d8de..ccd8592 100644 --- a/testdata/scripts/seed.txt +++ b/testdata/scripts/seed.txt @@ -9,19 +9,23 @@ cmp stderr main.stdout [short] stop # checking that the build is reproducible and random is slow # Also check that the binary is reproducible. +# No packages should be rebuilt either, thanks to the build cache. cp main$exe main_old$exe rm main$exe -garble -literals -seed=OQg9kACEECQ= build +garble -literals -seed=OQg9kACEECQ= build -v +! stderr . bincmp main$exe main_old$exe -# Also check that a different seed leads to a different binary +# Also check that a different seed leads to a different binary. +# We can't know if caching happens here, because of previous test runs. cp main$exe main_old$exe rm main$exe garble -literals -seed=NruiDmVz6/s build ! bincmp main$exe main_old$exe -# Check the random binary -garble -literals -seed=random build +# Use a random seed, which should always trigger a full build. +garble -literals -seed=random build -v +stderr . exec ./main$exe cmp stderr main.stdout ! binsubstr main$exe 'teststring' 'teststringVar' 'imported var value' 'ImportedVar' @@ -29,7 +33,8 @@ cmp stderr main.stdout # Also check that the random binary is not reproducible. cp main$exe main_old$exe rm main$exe -garble -literals -seed=random build +garble -literals -seed=random build -v +stderr . ! bincmp main$exe main_old$exe -- go.mod --