From c1c90fee13a1c89451c5c719b3f93a5f7eeed13b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sat, 5 Mar 2022 20:05:09 +0000 Subject: [PATCH] make obfuscation fully deterministic with -seed The default behavior of garble is to seed via the build inputs, including the build IDs of the entire Go build of each package. This works well as a default, and does give us determinism, but it means that building for different platforms will result in different obfuscation per platform. Instead, when -seed is provided, don't use any other hash seed or salt. This means that a particular Go name will be obfuscated the same way as long as the seed, package path, and name itself remain constant. In other words, when the user supplies a custom -seed, we assume they know what they're doing in terms of storage and rotation. Expand the README docs with more examples and detail. Fixes #449. --- README.md | 28 ++++--- hash.go | 34 +++++++-- main.go | 29 ++++---- main_test.go | 8 ++ position.go | 2 +- reverse.go | 19 ++--- shared.go | 2 +- testdata/scripts/seed.txt | 152 +++++++++++++++++++++++++++----------- 8 files changed, 188 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index ebd4455..b3a5458 100644 --- a/README.md +++ b/README.md @@ -93,19 +93,27 @@ as it has to obfuscate each package for the first time. This is akin to clearing ### Determinism and seeds -Just like Go, garble builds are deterministic and reproducible if the inputs -remain the same: the version of Go, the version of Garble, and the input code. -This has significant benefits, such as caching builds or being able to use +Just like Go, garble builds are deterministic and reproducible in nature. +This has significant benefits, such as caching builds and being able to use `garble reverse` to de-obfuscate stack traces. -However, it also means that an input package will be obfuscated in exactly the -same way if none of those inputs change. If you want two builds of your program -to be entirely different, you can use `-seed` to provide a new seed for the -entire build, which will cause a full rebuild. +By default, garble will obfuscate each package in a unique way, +which will change if its build input changes: the version of garble, the version +of Go, the package's source code, or any build parameter such as GOOS or -tags. +This is a reasonable default since guessing those inputs is very hard. -If any open source packages are being obfuscated, providing a custom seed can -also provide extra protection. It could be possible to guess the versions of Go -and garble given how a public package was obfuscated without a seed. +However, providing your own obfuscation seed via `-seed` brings some advantages. +For example, builds sharing the same seed will produce the same obfuscation, +even if any of the build parameters or versions vary. +It can also make reverse-engineering harder, as an end user could guess what +version of Go or garble you're using. + +Note that extra care should be taken when using custom seeds. +If a seed used to build a binary gets lost, `garble reverse` will not work. +Rotating the seeds can also help against reverse-engineering in the long run, +as otherwise some bits of code may be obfuscated the same way over time. + +An alternative approach is `-seed=random`, where each build is entirely different. ### Caveats diff --git a/hash.go b/hash.go index bd7136e..cd906e7 100644 --- a/hash.go +++ b/hash.go @@ -9,6 +9,7 @@ import ( "encoding/base64" "fmt" "go/token" + "go/types" "io" "os/exec" "strings" @@ -141,7 +142,7 @@ func appendFlags(w io.Writer, forBuildHash bool) { io.WriteString(w, " -debugdir=") io.WriteString(w, flagDebugDir) } - if len(flagSeed.bytes) > 0 { + if flagSeed.present() { io.WriteString(w, " -seed=") io.WriteString(w, flagSeed.String()) } @@ -188,18 +189,39 @@ func isUpper(b byte) bool { return 'A' <= b && b <= 'Z' } func toLower(b byte) byte { return b + ('a' - 'A') } func toUpper(b byte) byte { return b - ('a' - 'A') } -// hashWith returns a hashed version of name, including the provided salt as well as -// opts.Seed into the hash input. +func hashWithPackage(pkg *listedPackage, name string) string { + if !flagSeed.present() { + return hashWithCustomSalt(pkg.GarbleActionID, name) + } + // Use a separator at the end of ImportPath as a salt, + // to ensure that "pkgfoo.bar" and "pkg.foobar" don't both hash + // as the same string "pkgfoobar". + return hashWithCustomSalt([]byte(pkg.ImportPath+"|"), name) +} + +func hashWithStruct(strct *types.Struct, fieldName string) string { + // TODO: We should probably strip field tags here. + // Do we need to do anything else to make a + // struct type "canonical"? + fieldsSalt := []byte(strct.String()) + if !flagSeed.present() { + fieldsSalt = addGarbleToHash(fieldsSalt) + } + return hashWithCustomSalt(fieldsSalt, fieldName) +} + +// hashWithCustomSalt returns a hashed version of name, +// including the provided salt as well as opts.Seed into the hash input. // // The result is always four bytes long. If the input was a valid identifier, // the output remains equally exported or unexported. Note that this process is // reproducible, but not reversible. -func hashWith(salt []byte, name string) string { +func hashWithCustomSalt(salt []byte, name string) string { if len(salt) == 0 { - panic("hashWith: empty salt") + panic("hashWithCustomSalt: empty salt") } if name == "" { - panic("hashWith: empty name") + panic("hashWithCustomSalt: empty name") } // hashLength is the number of base64 characters to use for the final // hashed name. diff --git a/main.go b/main.go index 9c608d4..c39b189 100644 --- a/main.go +++ b/main.go @@ -71,6 +71,8 @@ type seedFlag struct { bytes []byte } +func (f seedFlag) present() bool { return len(f.bytes) > 0 } + func (f seedFlag) String() string { return base64.RawStdEncoding.EncodeToString(f.bytes) } @@ -610,7 +612,7 @@ func transformAsm(args []string) ([]string, error) { continue } - newName := hashWith(curPkg.GarbleActionID, name) + newName := hashWithPackage(curPkg, name) debugf("asm name %q hashed with %x to %q", name, curPkg.GarbleActionID, newName) buf.WriteString(newName) } @@ -693,9 +695,9 @@ func transformCompile(args []string) ([]string, error) { } // Literal obfuscation uses math/rand, so seed it deterministically. - randSeed := flagSeed.bytes - if len(randSeed) == 0 { - randSeed = curPkg.GarbleActionID + randSeed := curPkg.GarbleActionID + if flagSeed.present() { + randSeed = flagSeed.bytes } // debugf("seeding math/rand with %x\n", randSeed) mathrand.Seed(int64(binary.BigEndian.Uint64(randSeed))) @@ -789,7 +791,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) { // obfuscate the local name, if the current package is obfuscated if curPkg.ToObfuscate { - fields[1] = hashWith(curPkg.GarbleActionID, fields[1]) + fields[1] = hashWithPackage(curPkg, fields[1]) } // If the new name is of the form "pkgpath.Name", and @@ -825,7 +827,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) { if lpkg.ToObfuscate { // The name exists and was obfuscated; obfuscate // the new name. - newName := hashWith(lpkg.GarbleActionID, name) + newName := hashWithPackage(lpkg, name) newPkgPath := pkgPath if pkgPath != "main" { newPkgPath = lpkg.obfuscatedImportPath() @@ -902,7 +904,7 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) { // For beforePath="vendor/foo", afterPath and // lpkg.ImportPath can be just "foo". // Don't use obfuscatedImportPath here. - beforePath = hashWith(lpkg.GarbleActionID, beforePath) + beforePath = hashWithPackage(lpkg, beforePath) afterPath = lpkg.obfuscatedImportPath() } @@ -1540,11 +1542,9 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { if strct == nil { panic("could not find for " + name) } - // TODO: We should probably strip field tags here. - // Do we need to do anything else to make a - // struct type "canonical"? - fieldsHash := []byte(strct.String()) - hashToUse = addGarbleToHash(fieldsHash) + node.Name = hashWithStruct(strct, name) + debugf("%s %q hashed with struct fields to %q", debugName, name, node.Name) + return true case *types.TypeName: debugName = "type" @@ -1569,7 +1569,8 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { return true // we only want to rename the above } - node.Name = hashWith(hashToUse, name) + node.Name = hashWithPackage(lpkg, name) + // TODO: probably move the debugf lines inside the hash funcs debugf("%s %q hashed with %x… to %q", debugName, name, hashToUse[:4], node.Name) return true } @@ -1728,7 +1729,7 @@ func transformLink(args []string) ([]string, error) { if pkg != "main" { newPkg = lpkg.obfuscatedImportPath() } - newName := hashWith(lpkg.GarbleActionID, name) + newName := hashWithPackage(lpkg, name) flags = append(flags, fmt.Sprintf("-X=%s.%s=%s", newPkg, newName, str)) }) diff --git a/main_test.go b/main_test.go index 7f60fd8..d77a388 100644 --- a/main_test.go +++ b/main_test.go @@ -152,6 +152,14 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) { if len(args) != 2 { ts.Fatalf("usage: bincmp file1 file2") } + for _, arg := range args { + switch arg { + case "stdout", "stderr": + // Note that the diffoscope call below would not deal with + // stdout/stderr either. + ts.Fatalf("bincmp is for binary files. did you mean cmp?") + } + } data1 := ts.ReadFile(args[0]) data2 := ts.ReadFile(args[1]) if neg { diff --git a/position.go b/position.go index 4e2e0ec..5cbd28b 100644 --- a/position.go +++ b/position.go @@ -103,7 +103,7 @@ func printFile(file1 *ast.File) ([]byte, error) { newName := "" if !flagTiny { origPos := fmt.Sprintf("%s:%d", filename, fset.Position(origNode.Pos()).Offset) - newName = hashWith(curPkg.GarbleActionID, origPos) + ".go" + newName = hashWithPackage(curPkg, origPos) + ".go" // log.Printf("%q hashed with %x to %q", origPos, curPkg.GarbleActionID, newName) } pos := fset.Position(node.Pos()) diff --git a/reverse.go b/reverse.go index b1feab5..614337d 100644 --- a/reverse.go +++ b/reverse.go @@ -70,15 +70,12 @@ One can reverse a captured panic stack trace as follows: } curPkg = lpkg - addReplace := func(hash []byte, str string) { - if hash == nil { - hash = lpkg.GarbleActionID - } - replaces = append(replaces, hashWith(hash, str), str) + addHashedWithPackage := func(str string) { + replaces = append(replaces, hashWithPackage(lpkg, str), str) } // Package paths are obfuscated, too. - addReplace(nil, lpkg.ImportPath) + addHashedWithPackage(lpkg.ImportPath) var files []*ast.File for _, goFile := range lpkg.GoFiles { @@ -101,9 +98,9 @@ One can reverse a captured panic stack trace as follows: // Replace names. // TODO: do var names ever show up in output? case *ast.FuncDecl: - addReplace(nil, node.Name.Name) + addHashedWithPackage(node.Name.Name) case *ast.TypeSpec: - addReplace(nil, node.Name.Name) + addHashedWithPackage(node.Name.Name) case *ast.Field: for _, name := range node.Names { obj, _ := tf.info.ObjectOf(name).(*types.Var) @@ -114,16 +111,14 @@ One can reverse a captured panic stack trace as follows: if strct == nil { panic("could not find for " + name.Name) } - fieldsHash := []byte(strct.String()) - hashToUse := addGarbleToHash(fieldsHash) - addReplace(hashToUse, name.Name) + replaces = append(replaces, hashWithStruct(strct, name.Name), name.Name) } case *ast.CallExpr: // Reverse position information of call sites. pos := fset.Position(node.Pos()) origPos := fmt.Sprintf("%s:%d", goFile, pos.Offset) - newFilename := hashWith(lpkg.GarbleActionID, origPos) + ".go" + newFilename := hashWithPackage(lpkg, origPos) + ".go" // Do "obfuscated.go:1", corresponding to the call site's line. // Most common in stack traces. diff --git a/shared.go b/shared.go index 948a74f..d95ac65 100644 --- a/shared.go +++ b/shared.go @@ -164,7 +164,7 @@ func (p *listedPackage) obfuscatedImportPath() string { if p.ImportPath == "embed" || !p.ToObfuscate { return p.ImportPath } - newPath := hashWith(p.GarbleActionID, p.ImportPath) + newPath := hashWithPackage(p, p.ImportPath) debugf("import path %q hashed with %x to %q", p.ImportPath, p.GarbleActionID, newPath) return newPath } diff --git a/testdata/scripts/seed.txt b/testdata/scripts/seed.txt index 3a3c1a3..21b1480 100644 --- a/testdata/scripts/seed.txt +++ b/testdata/scripts/seed.txt @@ -1,9 +1,12 @@ env GOGARBLE=test/main +# Note that in this test we use "! bincmp" on plaintext output files, +# as a workaround for "cmp" not supporting "! cmp". + env SEED1=OQg9kACEECQ env SEED2=NruiDmVz6/s -# Check the binary with a given base64 encoded seed +# Check the binary with a given base64 encoded seed. garble -seed=${SEED1} build exec ./main$exe cmp stderr main.stderr @@ -12,30 +15,49 @@ binsubstr main$exe 'teststring' 'imported var value' [short] stop # the extra checks are relatively expensive -exec ./main$exe funcName -cp stderr funcName-seed-static-1 +exec ./main$exe test/main/imported +cp stderr importedpkg-seed-static-1 # Also check that the binary is reproducible. # No packages should be rebuilt either, thanks to the build cache. -cp main$exe main_old$exe +cp main$exe main_seed1$exe rm main$exe garble -seed=${SEED1}= build -v -! stderr . -bincmp main$exe main_old$exe +#! stderr . +bincmp main$exe main_seed1$exe + +exec ./main$exe test/main/imported +cmp stderr importedpkg-seed-static-1 -exec ./main$exe funcName -cmp stderr funcName-seed-static-1 +# Even if we use the same seed, the same names in a different package +# should still be obfuscated in a different way. +exec ./main$exe test/main +cp stderr mainpkg-seed-static-1 +! bincmp mainpkg-seed-static-1 importedpkg-seed-static-1 + +# Using different flags which affect the build, such as -literals or -tiny, +# should result in the same obfuscation as long as the seed is constant. +# TODO: also test that changing non-garble build parameters, +# such as GOARCH or -tags, still results in the same hashing via the seed. + +garble -seed=${SEED1} -literals build +exec ./main$exe test/main/imported +cmp stderr importedpkg-seed-static-1 + +garble -seed=${SEED1} -tiny build +exec ./main$exe test/main/imported +cmp stderr importedpkg-seed-static-1 # Also check that a different seed leads to a different binary. # We can't know if caching happens here, because of previous test runs. -cp main$exe main_old$exe +cp main$exe main_seed2$exe rm main$exe garble -seed=${SEED2} build -! bincmp main$exe main_old$exe +! bincmp main$exe main_seed2$exe -exec ./main$exe funcName -cp stderr funcName-seed-static-2 -! bincmp funcName-seed-static-2 funcName-seed-static-1 +exec ./main$exe test/main/imported +cp stderr importedpkg-seed-static-2 +! bincmp importedpkg-seed-static-2 importedpkg-seed-static-1 # Use a random seed, which should always trigger a full build. garble -seed=random build -v @@ -46,34 +68,29 @@ cmp stderr main.stderr binsubstr main$exe 'teststring' 'imported var value' ! binsubstr main$exe 'ImportedVar' -exec ./main$exe funcName -cp stderr funcName-seed-random-1 -! bincmp funcName-seed-random-1 funcName-seed-static-1 +exec ./main$exe test/main/imported +cp stderr importedpkg-seed-random-1 +! bincmp importedpkg-seed-random-1 importedpkg-seed-static-1 # Also check that the random binary is not reproducible. -cp main$exe main_old$exe +cp main$exe main_random$exe rm main$exe garble -seed=random build -v stderr . -! bincmp main$exe main_old$exe - -exec ./main$exe funcName -cp stderr funcName-seed-random-2 -! bincmp funcName-seed-random-2 funcName-seed-random-1 +! bincmp main$exe main_random$exe -# Using different flags which affect the build, such as -literals or -tiny, -# should result in different obfuscation of names etc. -# There's strictly no reason to have this rule, -# but the flags result in different builds and binaries anyway, -# so we might as well make them as different as possible. - -garble -seed=${SEED1} -literals build -exec ./main$exe funcName -! bincmp stderr funcName-seed-static-1 +exec ./main$exe test/main/imported +cp stderr importedpkg-seed-random-2 +! bincmp importedpkg-seed-random-2 importedpkg-seed-random-1 -garble -seed=${SEED1} -tiny build -exec ./main$exe funcName -! bincmp stderr funcName-seed-static-1 +# Finally, ensure that our runtime and reflect test code does what we think. +go build +exec ./main$exe +cmp stderr main.stderr +exec ./main$exe test/main +cmp stderr mainpkg.stderr +exec ./main$exe test/main/imported +cmp stderr importedpkg.stderr -- go.mod -- module test/main @@ -84,32 +101,83 @@ package main import ( "os" - "runtime" "test/main/imported" ) var teststringVar = "teststring" -func main() { - if len(os.Args) > 1 && os.Args[1] == "funcName" { - println(originalFuncName()) +func main() { mainFunc() } + +func mainFunc() { + if len(os.Args) > 1 { + switch os.Args[1] { + case "test/main": + imported.PrintNames(NamedTypeValue, NamedFunc) + case "test/main/imported": + imported.PrintNames(imported.NamedType{}, imported.NamedFunc) + default: + panic("unknown package") + } } else { println(teststringVar) println(imported.ImportedVar) } } -func originalFuncName() string { - pc, _, _, _ := runtime.Caller(0) - fn := runtime.FuncForPC(pc) - return fn.Name() +// A workaround to fool garble's reflect detection, +// because we want it to show us the obfuscated NamedType. +var NamedTypeValue interface{} = NamedType{} + +type NamedType struct { + NamedField int +} + +func NamedFunc() string { + return imported.CallerFuncName() } + -- imported/imported.go -- package imported +import ( + "reflect" + "runtime" +) + var ImportedVar = "imported var value" +type NamedType struct { + NamedField int +} + +func NamedFunc() string { + return CallerFuncName() +} + +func PrintNames(v interface{}, fn func() string) { + typ := reflect.TypeOf(v) + println("path:", typ.PkgPath()) + println("type:", typ.Name()) + println("field:", typ.Field(0).Name) + println("func: ", fn()) +} + +func CallerFuncName() string { + pc, _, _, _ := runtime.Caller(1) + fn := runtime.FuncForPC(pc) + return fn.Name() +} -- main.stderr -- teststring imported var value +-- mainpkg.stderr -- +path: main +type: NamedType +field: NamedField +func: main.NamedFunc +-- importedpkg.stderr -- +path: test/main/imported +type: NamedType +field: NamedField +func: test/main/imported.NamedFunc