From e33179d48056660898d70841538e9898bfd992c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Tue, 23 Feb 2021 20:11:15 +0000 Subject: [PATCH] reverse: support unexported names and package paths (#233) Unexported names are a bit tricky, since they are not listed in the export data file. Perhaps unsurprisingly, it's only meant to expose exported objects. One option would be to go back to adding an extra header to the export data file, containing the unexported methods in a map[string]T or []string. However, we have an easier route: just parse the Go files and look up the names directly. This does mean that we parse the Go files every time "reverse" runs, even if the build cache is warm, but that should not be an issue. Parsing Go files without any typechecking is very cheap compared to everything else we do. Plus, we save having to load go/types information from the build cache, or having to load extra headers from export files. It should be noted that the obfuscation process does need type information, mainly to be careful about which names can be obfuscated and how they should be obfuscated. Neither is a worry here; all names belong to a single package, and it doesn't matter if some aren't actually obfuscated, since the string replacements would simply never trigger in practice. The test includes an unexported func, to test the new feature. We also start reversing the obfuscation of import paths. Now, the test's reverse output is as follows: goroutine 1 [running]: runtime/debug.Stack(0x??, 0x??, 0x??) runtime/debug/stack.go:24 +0x?? test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??) p.go:6 +0x?? main.unexportedMainFunc(...) C.go:2 main.main() z.go:3 +0x?? The only major missing feature is positions and filenames. A follow-up PR will take care of those. Updates #5. --- reverse.go | 44 +++++++++++++++++++++++++----------- shared.go | 3 +++ testdata/scripts/reverse.txt | 32 ++++++++++++++++++-------- 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/reverse.go b/reverse.go index 88702ba..66b7b85 100644 --- a/reverse.go +++ b/reverse.go @@ -8,8 +8,12 @@ import ( "bytes" "encoding/json" "fmt" + "go/ast" + "go/parser" + "go/token" "io" "os" + "path/filepath" "strings" ) @@ -69,10 +73,8 @@ func commandReverse(args []string) error { if err != nil { return err } - // Adding it to buildInfo.imports allows us to reuse the - // "if" branch below. Plus, if this edge case triggers - // multiple times in a single package compile, we can - // call "go list" once and cache its result. + // The action ID, and possibly the export file, will be used + // later to reconstruct the mapping of obfuscated names. buildInfo.imports[pkg.ImportPath] = importedPkg{ packagefile: pkg.Export, actionID: decodeHash(splitActionID(buildID)), @@ -83,24 +85,40 @@ func commandReverse(args []string) error { return fmt.Errorf("go list error: %v: %s", err, stderr.Bytes()) } + // A package's names are generally hashed with the action ID of its + // obfuscated build. We recorded those action IDs above. + // Note that we parse Go files directly to obtain the names, since the + // export data only exposes exported names. Parsing Go files is cheap, + // so it's unnecessary to try to avoid this cost. var replaces []string + fset := token.NewFileSet() for _, pkgPath := range privatePkgPaths { ipkg := buildInfo.imports[pkgPath] + addReplace := func(str string) { + replaces = append(replaces, hashWith(ipkg.actionID, str), str) + } + + // Package paths are obfuscated, too. + addReplace(pkgPath) - // All original exported names names are hashed with the - // obfuscated package's action ID. - tpkg, err := origImporter.Import(pkgPath) + lpkg, err := listPackage(pkgPath) if err != nil { return err } - pkgScope := tpkg.Scope() - for _, name := range pkgScope.Names() { - obj := pkgScope.Lookup(name) - if !obj.Exported() { - continue + for _, goFile := range lpkg.GoFiles { + goFile = filepath.Join(lpkg.Dir, goFile) + file, err := parser.ParseFile(fset, goFile, nil, 0) + if err != nil { + return err + } + for _, decl := range file.Decls { + // TODO: Probably do type names too. What else? + switch decl := decl.(type) { + case *ast.FuncDecl: + addReplace(decl.Name.Name) + } } - replaces = append(replaces, hashWith(ipkg.actionID, name), name) } } repl := strings.NewReplacer(replaces...) diff --git a/shared.go b/shared.go index 7a30cdc..42304c8 100644 --- a/shared.go +++ b/shared.go @@ -146,6 +146,9 @@ type listedPackage struct { Deps []string ImportMap map[string]string + Dir string + GoFiles []string + // TODO(mvdan): reuse this field once TOOLEXEC_IMPORTPATH is used private bool } diff --git a/testdata/scripts/reverse.txt b/testdata/scripts/reverse.txt index c882ba2..0721d1c 100644 --- a/testdata/scripts/reverse.txt +++ b/testdata/scripts/reverse.txt @@ -13,11 +13,12 @@ exec cat main.stderr # This output is not reproducible between 'go test' runs, # so we can't use a static golden file. grep 'goroutine 1 \[running\]' main.stderr -! grep 'SomeFunc|test/main|main.go|lib.go' main.stderr +! grep 'ExportedLibFunc|unexportedMainFunc|test/main|main.go|lib.go' main.stderr stdin main.stderr garble reverse -stdout -count=1 'SomeFunc' +stdout -count=1 'test/main/lib\.ExportedLibFunc' +stdout -count=1 'main\.unexportedMainFunc' # TODO: this is what we want when "reverse" is finished # cmp stdout reverse.stdout @@ -33,21 +34,31 @@ go 1.15 -- main.go -- package main -import "test/main/lib" +import ( + "os" + + "test/main/lib" +) func main() { - lib.SomeFunc() + unexportedMainFunc() +} + +func unexportedMainFunc() { + if err := lib.ExportedLibFunc(os.Stderr); err != nil { + panic(err) + } } -- lib/lib.go -- package lib import ( - "os" + "io" "regexp" "runtime/debug" ) -func SomeFunc() { +func ExportedLibFunc(w io.Writer) error { // Panic outputs include "0xNN" pointers and offsets which change // between platforms. // Strip them out here, to have portable static stdout files. @@ -55,13 +66,16 @@ func SomeFunc() { stack := debug.Stack() stack = rxVariableSuffix.ReplaceAll(stack, []byte("0x??")) - os.Stderr.Write(stack) + _, err := w.Write(stack) + return err } -- reverse.stdout -- goroutine 1 [running]: runtime/debug.Stack(0x??, 0x??, 0x??) runtime/debug/stack.go:24 +0x?? -test/main/lib.SomeFunc() +test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??) test/main/lib/lib.go:15 +0x?? +main.unexportedMainFunc(...) + test/main/main.go:14 main.main() - test/main/main.go:6 +0x?? + test/main/main.go:10 +0x??