reverse: support unexported names and package paths (#233)

Unexported names are a bit tricky, since they are not listed in the
export data file. Perhaps unsurprisingly, it's only meant to expose
exported objects.

One option would be to go back to adding an extra header to the export
data file, containing the unexported methods in a map[string]T or
[]string. However, we have an easier route: just parse the Go files and
look up the names directly.

This does mean that we parse the Go files every time "reverse" runs,
even if the build cache is warm, but that should not be an issue.
Parsing Go files without any typechecking is very cheap compared to
everything else we do. Plus, we save having to load go/types information
from the build cache, or having to load extra headers from export files.

It should be noted that the obfuscation process does need type
information, mainly to be careful about which names can be obfuscated
and how they should be obfuscated. Neither is a worry here; all names
belong to a single package, and it doesn't matter if some aren't
actually obfuscated, since the string replacements would simply never
trigger in practice.

The test includes an unexported func, to test the new feature. We also
start reversing the obfuscation of import paths. Now, the test's reverse
output is as follows:

	goroutine 1 [running]:
	runtime/debug.Stack(0x??, 0x??, 0x??)
		runtime/debug/stack.go:24 +0x??
	test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??)
		p.go:6 +0x??
	main.unexportedMainFunc(...)
		C.go:2
	main.main()
		z.go:3 +0x??

The only major missing feature is positions and filenames. A follow-up
PR will take care of those.

Updates #5.
pull/234/head
Daniel Martí 3 years ago committed by GitHub
parent a499a6bcd7
commit e33179d480
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,8 +8,12 @@ import (
"bytes"
"encoding/json"
"fmt"
"go/ast"
"go/parser"
"go/token"
"io"
"os"
"path/filepath"
"strings"
)
@ -69,10 +73,8 @@ func commandReverse(args []string) error {
if err != nil {
return err
}
// Adding it to buildInfo.imports allows us to reuse the
// "if" branch below. Plus, if this edge case triggers
// multiple times in a single package compile, we can
// call "go list" once and cache its result.
// The action ID, and possibly the export file, will be used
// later to reconstruct the mapping of obfuscated names.
buildInfo.imports[pkg.ImportPath] = importedPkg{
packagefile: pkg.Export,
actionID: decodeHash(splitActionID(buildID)),
@ -83,24 +85,40 @@ func commandReverse(args []string) error {
return fmt.Errorf("go list error: %v: %s", err, stderr.Bytes())
}
// A package's names are generally hashed with the action ID of its
// obfuscated build. We recorded those action IDs above.
// Note that we parse Go files directly to obtain the names, since the
// export data only exposes exported names. Parsing Go files is cheap,
// so it's unnecessary to try to avoid this cost.
var replaces []string
fset := token.NewFileSet()
for _, pkgPath := range privatePkgPaths {
ipkg := buildInfo.imports[pkgPath]
addReplace := func(str string) {
replaces = append(replaces, hashWith(ipkg.actionID, str), str)
}
// Package paths are obfuscated, too.
addReplace(pkgPath)
// All original exported names names are hashed with the
// obfuscated package's action ID.
tpkg, err := origImporter.Import(pkgPath)
lpkg, err := listPackage(pkgPath)
if err != nil {
return err
}
pkgScope := tpkg.Scope()
for _, name := range pkgScope.Names() {
obj := pkgScope.Lookup(name)
if !obj.Exported() {
continue
for _, goFile := range lpkg.GoFiles {
goFile = filepath.Join(lpkg.Dir, goFile)
file, err := parser.ParseFile(fset, goFile, nil, 0)
if err != nil {
return err
}
for _, decl := range file.Decls {
// TODO: Probably do type names too. What else?
switch decl := decl.(type) {
case *ast.FuncDecl:
addReplace(decl.Name.Name)
}
}
replaces = append(replaces, hashWith(ipkg.actionID, name), name)
}
}
repl := strings.NewReplacer(replaces...)

@ -146,6 +146,9 @@ type listedPackage struct {
Deps []string
ImportMap map[string]string
Dir string
GoFiles []string
// TODO(mvdan): reuse this field once TOOLEXEC_IMPORTPATH is used
private bool
}

@ -13,11 +13,12 @@ exec cat main.stderr
# This output is not reproducible between 'go test' runs,
# so we can't use a static golden file.
grep 'goroutine 1 \[running\]' main.stderr
! grep 'SomeFunc|test/main|main.go|lib.go' main.stderr
! grep 'ExportedLibFunc|unexportedMainFunc|test/main|main.go|lib.go' main.stderr
stdin main.stderr
garble reverse
stdout -count=1 'SomeFunc'
stdout -count=1 'test/main/lib\.ExportedLibFunc'
stdout -count=1 'main\.unexportedMainFunc'
# TODO: this is what we want when "reverse" is finished
# cmp stdout reverse.stdout
@ -33,21 +34,31 @@ go 1.15
-- main.go --
package main
import "test/main/lib"
import (
"os"
"test/main/lib"
)
func main() {
lib.SomeFunc()
unexportedMainFunc()
}
func unexportedMainFunc() {
if err := lib.ExportedLibFunc(os.Stderr); err != nil {
panic(err)
}
}
-- lib/lib.go --
package lib
import (
"os"
"io"
"regexp"
"runtime/debug"
)
func SomeFunc() {
func ExportedLibFunc(w io.Writer) error {
// Panic outputs include "0xNN" pointers and offsets which change
// between platforms.
// Strip them out here, to have portable static stdout files.
@ -55,13 +66,16 @@ func SomeFunc() {
stack := debug.Stack()
stack = rxVariableSuffix.ReplaceAll(stack, []byte("0x??"))
os.Stderr.Write(stack)
_, err := w.Write(stack)
return err
}
-- reverse.stdout --
goroutine 1 [running]:
runtime/debug.Stack(0x??, 0x??, 0x??)
runtime/debug/stack.go:24 +0x??
test/main/lib.SomeFunc()
test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??)
test/main/lib/lib.go:15 +0x??
main.unexportedMainFunc(...)
test/main/main.go:14
main.main()
test/main/main.go:6 +0x??
test/main/main.go:10 +0x??

Loading…
Cancel
Save