stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
pull/423/head
Daniel Martí 3 years ago committed by GitHub
parent d5d1131b75
commit b5bef981ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -7,10 +7,13 @@ import (
"bytes" "bytes"
"crypto/sha256" "crypto/sha256"
"encoding/base64" "encoding/base64"
"encoding/hex"
"fmt" "fmt"
"go/token" "go/token"
"io" "io"
"os"
"os/exec" "os/exec"
"path/filepath"
"strings" "strings"
) )
@ -227,3 +230,30 @@ func hashWith(salt []byte, name string) string {
} }
return string(sum) return string(sum)
} }
// gocachePathForFile works out the path an object file will take in GOCACHE.
// At the moment, such an entry is based on the hex-encoded sha256 of the file.
// We need this code because, in the importcfg files given to us during a build,
// some of the object files are in temporary "[...]/_pkg_.a" files.
// To be able to use the files again later, we need their final cache location.
func gocachePathForFile(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return "", err
}
sum := hex.EncodeToString(h.Sum(nil))
entry := filepath.Join(cache.GoEnv.GOCACHE, sum[:2], sum+"-d")
// Ensure the file actually exists in the build cache.
// If it doesn't, fail immediately, as that's likely a bug in our code.
if _, err := os.Stat(entry); err != nil {
return "", err
}
return entry, nil
}

@ -115,12 +115,12 @@ var (
// Basic information about the package being currently compiled or linked. // Basic information about the package being currently compiled or linked.
curPkg *listedPackage curPkg *listedPackage
// These are pulled from -importcfg in the current obfuscated build. garbledImporter = importer.ForCompiler(fset, "gc", func(path string) (io.ReadCloser, error) {
// As such, they contain export data for the dependencies which might be pkgfile := cachedOutput.KnownObjectFiles[path]
// themselves obfuscated, depending on GOPRIVATE. if pkgfile == "" {
importCfgEntries map[string]*importCfgEntry panic(fmt.Sprintf("missing in cachedOutput.KnownObjectFiles: %q", path))
garbledImporter = importer.ForCompiler(fset, "gc", func(path string) (io.ReadCloser, error) { }
return os.Open(importCfgEntries[path].packagefile) return os.Open(pkgfile)
}).(types.ImporterFrom) }).(types.ImporterFrom)
opts *flagOptions opts *flagOptions
@ -143,79 +143,18 @@ func obfuscatedTypesPackage(path string) *types.Package {
if path == curPkg.ImportPath { if path == curPkg.ImportPath {
panic("called obfuscatedTypesPackage on the current package?") panic("called obfuscatedTypesPackage on the current package?")
} }
entry, ok := importCfgEntries[path] if pkg := cachedTypesPackages[path]; pkg != nil {
if !ok { return pkg
// Handle the case where the name is defined in an indirectly
// imported package. Since only direct imports show up in our
// importcfg, importCfgEntries will not initially contain the
// package path we want.
//
// This edge case can happen, for example, if package A imports
// package B and calls its API, and B's API returns C's struct.
// Suddenly, A can use struct field names defined in C, even
// though A never directly imports C.
//
// Another edge case is if A uses C's named type via a type
// alias in B.
//
// For this rare case, for now, do an extra "go list -toolexec"
// call to retrieve its export path.
//
// TODO: Think about ways to avoid this extra exec call. Perhaps
// avoid relying on importcfg to know whether an imported name
// was obfuscated. Or perhaps record indirect importcfg entries
// somehow.
goArgs := []string{
"list",
"-json",
"-export",
"-trimpath",
"-toolexec=" + cache.ExecPath,
}
goArgs = append(goArgs, cache.ForwardBuildFlags...)
goArgs = append(goArgs, path)
cmd := exec.Command("go", goArgs...)
cmd.Dir = opts.GarbleDir
out, err := cmd.Output()
if err != nil {
if err := err.(*exec.ExitError); err != nil {
panic(fmt.Sprintf("%v: %s", err, err.Stderr))
}
panic(err)
}
var pkg listedPackage
if err := json.Unmarshal(out, &pkg); err != nil {
panic(err) // shouldn't happen
}
if pkg.ImportPath != path {
panic(fmt.Sprintf("unexpected path: %q vs %q", pkg.ImportPath, path))
}
entry = &importCfgEntry{
packagefile: pkg.Export,
}
// Adding it to importCfgEntries allows us to reuse the
// "if" branch below. Plus, if this edge case triggers
// multiple times in a single package compile, we can
// call "go list" once and cache its result.
importCfgEntries[path] = entry
}
if entry.cachedPkg != nil {
return entry.cachedPkg
} }
pkg, err := garbledImporter.ImportFrom(path, opts.GarbleDir, 0) pkg, err := garbledImporter.ImportFrom(path, opts.GarbleDir, 0)
if err != nil { if err != nil {
return nil panic(err)
} }
entry.cachedPkg = pkg // cache for later use cachedTypesPackages[path] = pkg // cache for later use
return pkg return pkg
} }
type importCfgEntry struct { var cachedTypesPackages = make(map[string]*types.Package)
packagefile string
cachedPkg *types.Package
}
func main1() int { func main1() int {
if err := flagSet.Parse(os.Args[1:]); err != nil { if err := flagSet.Parse(os.Args[1:]); err != nil {
@ -633,21 +572,22 @@ func transformCompile(args []string) ([]string, error) {
flags = alterTrimpath(flags) flags = alterTrimpath(flags)
newImportCfg, err := processImportCfg(flags)
if err != nil {
return nil, err
}
// Note that if the file already exists in the cache from another build, // Note that if the file already exists in the cache from another build,
// we don't need to write to it again thanks to the hash. // we don't need to write to it again thanks to the hash.
// TODO: as an optimization, just load that one gob file. // TODO: as an optimization, just load that one gob file.
if err := loadKnownReflectAPIs(); err != nil { if err := loadCachedOutputs(); err != nil {
return nil, err return nil, err
} }
tf.findReflectFunctions(files) tf.findReflectFunctions(files)
newImportCfg, err := processImportCfg(flags)
if err != nil {
return nil, err
}
if err := writeGobExclusive( if err := writeGobExclusive(
garbleExportFile(curPkg), garbleExportFile(curPkg),
knownReflectAPIs, cachedOutput,
); err != nil && !errors.Is(err, fs.ErrExist) { ); err != nil && !errors.Is(err, fs.ErrExist) {
return nil, err return nil, err
} }
@ -855,9 +795,9 @@ func isPrivate(path string) bool {
return module.MatchPrefixPatterns(cache.GoEnv.GOPRIVATE, path) return module.MatchPrefixPatterns(cache.GoEnv.GOPRIVATE, path)
} }
// processImportCfg initializes importCfgEntries via the supplied flags, and // processImportCfg parses the importcfg file passed to a compile or link step,
// constructs a new importcfg with the obfuscated import paths changed as // adding missing entries to KnownObjectFiles to be stored in the build cache.
// necessary. // It also builds a new importcfg file to account for obfuscated import paths.
func processImportCfg(flags []string) (newImportCfg string, _ error) { func processImportCfg(flags []string) (newImportCfg string, _ error) {
importCfg := flagValue(flags, "-importcfg") importCfg := flagValue(flags, "-importcfg")
if importCfg == "" { if importCfg == "" {
@ -868,9 +808,7 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
return "", err return "", err
} }
// TODO: use slices rather than maps to generate a deterministic importcfg. var packagefiles, importmaps [][2]string
importCfgEntries = make(map[string]*importCfgEntry)
importMap := make(map[string]string)
for _, line := range strings.SplitAfter(string(data), "\n") { for _, line := range strings.SplitAfter(string(data), "\n") {
line = strings.TrimSpace(line) line = strings.TrimSpace(line)
@ -890,7 +828,7 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
continue continue
} }
beforePath, afterPath := args[:j], args[j+1:] beforePath, afterPath := args[:j], args[j+1:]
importMap[beforePath] = afterPath importmaps = append(importmaps, [2]string{beforePath, afterPath})
case "packagefile": case "packagefile":
args := strings.TrimSpace(line[i+1:]) args := strings.TrimSpace(line[i+1:])
j := strings.Index(args, "=") j := strings.Index(args, "=")
@ -899,8 +837,21 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
} }
importPath, objectPath := args[:j], args[j+1:] importPath, objectPath := args[:j], args[j+1:]
impPkg := &importCfgEntry{packagefile: objectPath} packagefiles = append(packagefiles, [2]string{importPath, objectPath})
importCfgEntries[importPath] = impPkg
if prev := cachedOutput.KnownObjectFiles[importPath]; prev != "" {
// Nothing to do; recorded by one of our dependencies.
} else if strings.HasSuffix(objectPath, "_pkg_.a") {
// The path is inside a temporary directory, to be deleted soon.
// Record the final location within the build cache instead.
finalObjectPath, err := gocachePathForFile(objectPath)
if err != nil {
return "", err
}
cachedOutput.KnownObjectFiles[importPath] = finalObjectPath
} else {
cachedOutput.KnownObjectFiles[importPath] = objectPath
}
} }
} }
// log.Printf("%#v", buildInfo) // log.Printf("%#v", buildInfo)
@ -913,7 +864,8 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
if err != nil { if err != nil {
return "", err return "", err
} }
for beforePath, afterPath := range importMap { for _, pair := range importmaps {
beforePath, afterPath := pair[0], pair[1]
if isPrivate(afterPath) { if isPrivate(afterPath) {
lpkg, err := listPackage(beforePath) lpkg, err := listPackage(beforePath)
if err != nil { if err != nil {
@ -930,7 +882,8 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
} }
fmt.Fprintf(newCfg, "importmap %s=%s\n", beforePath, afterPath) fmt.Fprintf(newCfg, "importmap %s=%s\n", beforePath, afterPath)
} }
for impPath, pkg := range importCfgEntries { for _, pair := range packagefiles {
impPath, pkgfile := pair[0], pair[1]
if isPrivate(impPath) { if isPrivate(impPath) {
lpkg, err := listPackage(impPath) lpkg, err := listPackage(impPath)
if err != nil { if err != nil {
@ -938,7 +891,7 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
} }
impPath = lpkg.obfuscatedImportPath() impPath = lpkg.obfuscatedImportPath()
} }
fmt.Fprintf(newCfg, "packagefile %s=%s\n", impPath, pkg.packagefile) fmt.Fprintf(newCfg, "packagefile %s=%s\n", impPath, pkgfile)
} }
// Uncomment to debug the transformed importcfg. Do not delete. // Uncomment to debug the transformed importcfg. Do not delete.
@ -956,14 +909,29 @@ type (
reflectParameterPosition = int reflectParameterPosition = int
) )
// knownReflectAPIs is a static record of what std APIs use reflection on their // cachedOutput contains information that will be stored as per garbleExportFile.
// parameters, so we can avoid obfuscating types used with them. var cachedOutput = struct {
// // KnownObjectFiles is filled from -importcfg in the current obfuscated build.
// TODO: we're not including fmt.Printf, as it would have many false positives, // As such, it records export data for the dependencies which might be
// unless we were smart enough to detect which arguments get used as %#v or %T. // themselves obfuscated, depending on GOPRIVATE.
var knownReflectAPIs = map[funcFullName][]reflectParameterPosition{ //
"reflect.TypeOf": {0}, // TODO: We rely on obfuscated type information to know what names we didn't
"reflect.ValueOf": {0}, // obfuscate. Instead, directly record what names we chose not to obfuscate,
// which should then avoid having to go through go/types.
KnownObjectFiles map[string]string
// KnownReflectAPIs is a static record of what std APIs use reflection on their
// parameters, so we can avoid obfuscating types used with them.
//
// TODO: we're not including fmt.Printf, as it would have many false positives,
// unless we were smart enough to detect which arguments get used as %#v or %T.
KnownReflectAPIs map[funcFullName][]reflectParameterPosition
}{
KnownObjectFiles: map[string]string{},
KnownReflectAPIs: map[funcFullName][]reflectParameterPosition{
"reflect.TypeOf": {0},
"reflect.ValueOf": {0},
},
} }
// garbleExportFile returns an absolute path to a build cache entry // garbleExportFile returns an absolute path to a build cache entry
@ -985,11 +953,11 @@ func garbleExportFile(pkg *listedPackage) string {
return trimmed + "-garble-" + hashToString(pkg.GarbleActionID) + "-d" return trimmed + "-garble-" + hashToString(pkg.GarbleActionID) + "-d"
} }
func loadKnownReflectAPIs() error { func loadCachedOutputs() error {
for _, path := range curPkg.Deps { for _, path := range curPkg.Deps {
pkg, err := listPackage(path) pkg, err := listPackage(path)
if err != nil { if err != nil {
return err panic(err) // shouldn't happen
} }
if pkg.Export == "" { if pkg.Export == "" {
continue // nothing to load continue // nothing to load
@ -1003,8 +971,8 @@ func loadKnownReflectAPIs() error {
} }
defer f.Close() defer f.Close()
// Decode appends new entries to the existing map // Decode appends new entries to the existing maps
if err := gob.NewDecoder(f).Decode(&knownReflectAPIs); err != nil { if err := gob.NewDecoder(f).Decode(&cachedOutput); err != nil {
return fmt.Errorf("gob decode: %w", err) return fmt.Errorf("gob decode: %w", err)
} }
return nil return nil
@ -1049,7 +1017,7 @@ func (tf *transformer) findReflectFunctions(files []*ast.File) {
fullName := fnType.FullName() fullName := fnType.FullName()
var identifiers []string var identifiers []string
for _, argPos := range knownReflectAPIs[fullName] { for _, argPos := range cachedOutput.KnownReflectAPIs[fullName] {
arg := call.Args[argPos] arg := call.Args[argPos]
ident, ok := arg.(*ast.Ident) ident, ok := arg.(*ast.Ident)
@ -1075,13 +1043,13 @@ func (tf *transformer) findReflectFunctions(files []*ast.File) {
} }
} }
} }
knownReflectAPIs[funcObj.FullName()] = argumentPosReflect cachedOutput.KnownReflectAPIs[funcObj.FullName()] = argumentPosReflect
return true return true
}) })
} }
lenPrevKnownReflectAPIs := len(knownReflectAPIs) lenPrevKnownReflectAPIs := len(cachedOutput.KnownReflectAPIs)
for _, file := range files { for _, file := range files {
for _, decl := range file.Decls { for _, decl := range file.Decls {
visitReflect(decl) visitReflect(decl)
@ -1089,7 +1057,7 @@ func (tf *transformer) findReflectFunctions(files []*ast.File) {
} }
// if a new reflectAPI is found we need to Re-evaluate all functions which might be using that API // if a new reflectAPI is found we need to Re-evaluate all functions which might be using that API
if len(knownReflectAPIs) > lenPrevKnownReflectAPIs { if len(cachedOutput.KnownReflectAPIs) > lenPrevKnownReflectAPIs {
tf.findReflectFunctions(files) tf.findReflectFunctions(files)
} }
} }
@ -1128,7 +1096,7 @@ func (tf *transformer) prefillIgnoreObjects(files []*ast.File) {
fullName := fnType.FullName() fullName := fnType.FullName()
// log.Printf("%s: %s", fset.Position(node.Pos()), fullName) // log.Printf("%s: %s", fset.Position(node.Pos()), fullName)
for _, argPos := range knownReflectAPIs[fullName] { for _, argPos := range cachedOutput.KnownReflectAPIs[fullName] {
arg := call.Args[argPos] arg := call.Args[argPos]
argType := tf.info.TypeOf(arg) argType := tf.info.TypeOf(arg)
tf.recordIgnore(argType, tf.pkg.Path()) tf.recordIgnore(argType, tf.pkg.Path())
@ -1851,7 +1819,7 @@ func flagSetValue(flags []string, name, value string) []string {
func fetchGoEnv() error { func fetchGoEnv() error {
out, err := exec.Command("go", "env", "-json", out, err := exec.Command("go", "env", "-json",
"GOPRIVATE", "GOMOD", "GOVERSION", "GOPRIVATE", "GOMOD", "GOVERSION", "GOCACHE",
).CombinedOutput() ).CombinedOutput()
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, `Can't find Go toolchain: %v fmt.Fprintf(os.Stderr, `Can't find Go toolchain: %v

@ -45,6 +45,7 @@ type sharedCache struct {
GOPRIVATE string // Set to the module path as a fallback. GOPRIVATE string // Set to the module path as a fallback.
GOMOD string GOMOD string
GOVERSION string GOVERSION string
GOCACHE string
} }
} }

Loading…
Cancel
Save