From c1c90fee13a1c89451c5c719b3f93a5f7eeed13b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Mart=C3=AD?= <mvdan@mvdan.cc>
Date: Sat, 5 Mar 2022 20:05:09 +0000
Subject: [PATCH] make obfuscation fully deterministic with -seed

The default behavior of garble is to seed via the build inputs,
including the build IDs of the entire Go build of each package.
This works well as a default, and does give us determinism,
but it means that building for different platforms
will result in different obfuscation per platform.

Instead, when -seed is provided, don't use any other hash seed or salt.
This means that a particular Go name will be obfuscated the same way
as long as the seed, package path, and name itself remain constant.

In other words, when the user supplies a custom -seed,
we assume they know what they're doing in terms of storage and rotation.

Expand the README docs with more examples and detail.

Fixes #449.
---
 README.md                 |  28 ++++---
 hash.go                   |  34 +++++++--
 main.go                   |  29 ++++----
 main_test.go              |   8 ++
 position.go               |   2 +-
 reverse.go                |  19 ++---
 shared.go                 |   2 +-
 testdata/scripts/seed.txt | 152 +++++++++++++++++++++++++++-----------
 8 files changed, 188 insertions(+), 86 deletions(-)

diff --git a/README.md b/README.md
index ebd4455..b3a5458 100644
--- a/README.md
+++ b/README.md
@@ -93,19 +93,27 @@ as it has to obfuscate each package for the first time. This is akin to clearing
 
 ### Determinism and seeds
 
-Just like Go, garble builds are deterministic and reproducible if the inputs
-remain the same: the version of Go, the version of Garble, and the input code.
-This has significant benefits, such as caching builds or being able to use
+Just like Go, garble builds are deterministic and reproducible in nature.
+This has significant benefits, such as caching builds and being able to use
 `garble reverse` to de-obfuscate stack traces.
 
-However, it also means that an input package will be obfuscated in exactly the
-same way if none of those inputs change. If you want two builds of your program
-to be entirely different, you can use `-seed` to provide a new seed for the
-entire build, which will cause a full rebuild.
+By default, garble will obfuscate each package in a unique way,
+which will change if its build input changes: the version of garble, the version
+of Go, the package's source code, or any build parameter such as GOOS or -tags.
+This is a reasonable default since guessing those inputs is very hard.
 
-If any open source packages are being obfuscated, providing a custom seed can
-also provide extra protection. It could be possible to guess the versions of Go
-and garble given how a public package was obfuscated without a seed.
+However, providing your own obfuscation seed via `-seed` brings some advantages.
+For example, builds sharing the same seed will produce the same obfuscation,
+even if any of the build parameters or versions vary.
+It can also make reverse-engineering harder, as an end user could guess what
+version of Go or garble you're using.
+
+Note that extra care should be taken when using custom seeds.
+If a seed used to build a binary gets lost, `garble reverse` will not work.
+Rotating the seeds can also help against reverse-engineering in the long run,
+as otherwise some bits of code may be obfuscated the same way over time.
+
+An alternative approach is `-seed=random`, where each build is entirely different.
 
 ### Caveats
 
diff --git a/hash.go b/hash.go
index bd7136e..cd906e7 100644
--- a/hash.go
+++ b/hash.go
@@ -9,6 +9,7 @@ import (
 	"encoding/base64"
 	"fmt"
 	"go/token"
+	"go/types"
 	"io"
 	"os/exec"
 	"strings"
@@ -141,7 +142,7 @@ func appendFlags(w io.Writer, forBuildHash bool) {
 		io.WriteString(w, " -debugdir=")
 		io.WriteString(w, flagDebugDir)
 	}
-	if len(flagSeed.bytes) > 0 {
+	if flagSeed.present() {
 		io.WriteString(w, " -seed=")
 		io.WriteString(w, flagSeed.String())
 	}
@@ -188,18 +189,39 @@ func isUpper(b byte) bool { return 'A' <= b && b <= 'Z' }
 func toLower(b byte) byte { return b + ('a' - 'A') }
 func toUpper(b byte) byte { return b - ('a' - 'A') }
 
-// hashWith returns a hashed version of name, including the provided salt as well as
-// opts.Seed into the hash input.
+func hashWithPackage(pkg *listedPackage, name string) string {
+	if !flagSeed.present() {
+		return hashWithCustomSalt(pkg.GarbleActionID, name)
+	}
+	// Use a separator at the end of ImportPath as a salt,
+	// to ensure that "pkgfoo.bar" and "pkg.foobar" don't both hash
+	// as the same string "pkgfoobar".
+	return hashWithCustomSalt([]byte(pkg.ImportPath+"|"), name)
+}
+
+func hashWithStruct(strct *types.Struct, fieldName string) string {
+	// TODO: We should probably strip field tags here.
+	// Do we need to do anything else to make a
+	// struct type "canonical"?
+	fieldsSalt := []byte(strct.String())
+	if !flagSeed.present() {
+		fieldsSalt = addGarbleToHash(fieldsSalt)
+	}
+	return hashWithCustomSalt(fieldsSalt, fieldName)
+}
+
+// hashWithCustomSalt returns a hashed version of name,
+// including the provided salt as well as opts.Seed into the hash input.
 //
 // The result is always four bytes long. If the input was a valid identifier,
 // the output remains equally exported or unexported. Note that this process is
 // reproducible, but not reversible.
-func hashWith(salt []byte, name string) string {
+func hashWithCustomSalt(salt []byte, name string) string {
 	if len(salt) == 0 {
-		panic("hashWith: empty salt")
+		panic("hashWithCustomSalt: empty salt")
 	}
 	if name == "" {
-		panic("hashWith: empty name")
+		panic("hashWithCustomSalt: empty name")
 	}
 	// hashLength is the number of base64 characters to use for the final
 	// hashed name.
diff --git a/main.go b/main.go
index 9c608d4..c39b189 100644
--- a/main.go
+++ b/main.go
@@ -71,6 +71,8 @@ type seedFlag struct {
 	bytes  []byte
 }
 
+func (f seedFlag) present() bool { return len(f.bytes) > 0 }
+
 func (f seedFlag) String() string {
 	return base64.RawStdEncoding.EncodeToString(f.bytes)
 }
@@ -610,7 +612,7 @@ func transformAsm(args []string) ([]string, error) {
 				continue
 			}
 
-			newName := hashWith(curPkg.GarbleActionID, name)
+			newName := hashWithPackage(curPkg, name)
 			debugf("asm name %q hashed with %x to %q", name, curPkg.GarbleActionID, newName)
 			buf.WriteString(newName)
 		}
@@ -693,9 +695,9 @@ func transformCompile(args []string) ([]string, error) {
 	}
 
 	// Literal obfuscation uses math/rand, so seed it deterministically.
-	randSeed := flagSeed.bytes
-	if len(randSeed) == 0 {
-		randSeed = curPkg.GarbleActionID
+	randSeed := curPkg.GarbleActionID
+	if flagSeed.present() {
+		randSeed = flagSeed.bytes
 	}
 	// debugf("seeding math/rand with %x\n", randSeed)
 	mathrand.Seed(int64(binary.BigEndian.Uint64(randSeed)))
@@ -789,7 +791,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) {
 
 			// obfuscate the local name, if the current package is obfuscated
 			if curPkg.ToObfuscate {
-				fields[1] = hashWith(curPkg.GarbleActionID, fields[1])
+				fields[1] = hashWithPackage(curPkg, fields[1])
 			}
 
 			// If the new name is of the form "pkgpath.Name", and
@@ -825,7 +827,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) {
 			if lpkg.ToObfuscate {
 				// The name exists and was obfuscated; obfuscate
 				// the new name.
-				newName := hashWith(lpkg.GarbleActionID, name)
+				newName := hashWithPackage(lpkg, name)
 				newPkgPath := pkgPath
 				if pkgPath != "main" {
 					newPkgPath = lpkg.obfuscatedImportPath()
@@ -902,7 +904,7 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
 			// For beforePath="vendor/foo", afterPath and
 			// lpkg.ImportPath can be just "foo".
 			// Don't use obfuscatedImportPath here.
-			beforePath = hashWith(lpkg.GarbleActionID, beforePath)
+			beforePath = hashWithPackage(lpkg, beforePath)
 
 			afterPath = lpkg.obfuscatedImportPath()
 		}
@@ -1540,11 +1542,9 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
 			if strct == nil {
 				panic("could not find for " + name)
 			}
-			// TODO: We should probably strip field tags here.
-			// Do we need to do anything else to make a
-			// struct type "canonical"?
-			fieldsHash := []byte(strct.String())
-			hashToUse = addGarbleToHash(fieldsHash)
+			node.Name = hashWithStruct(strct, name)
+			debugf("%s %q hashed with struct fields to %q", debugName, name, node.Name)
+			return true
 
 		case *types.TypeName:
 			debugName = "type"
@@ -1569,7 +1569,8 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
 			return true // we only want to rename the above
 		}
 
-		node.Name = hashWith(hashToUse, name)
+		node.Name = hashWithPackage(lpkg, name)
+		// TODO: probably move the debugf lines inside the hash funcs
 		debugf("%s %q hashed with %x… to %q", debugName, name, hashToUse[:4], node.Name)
 		return true
 	}
@@ -1728,7 +1729,7 @@ func transformLink(args []string) ([]string, error) {
 		if pkg != "main" {
 			newPkg = lpkg.obfuscatedImportPath()
 		}
-		newName := hashWith(lpkg.GarbleActionID, name)
+		newName := hashWithPackage(lpkg, name)
 		flags = append(flags, fmt.Sprintf("-X=%s.%s=%s", newPkg, newName, str))
 	})
 
diff --git a/main_test.go b/main_test.go
index 7f60fd8..d77a388 100644
--- a/main_test.go
+++ b/main_test.go
@@ -152,6 +152,14 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) {
 	if len(args) != 2 {
 		ts.Fatalf("usage: bincmp file1 file2")
 	}
+	for _, arg := range args {
+		switch arg {
+		case "stdout", "stderr":
+			// Note that the diffoscope call below would not deal with
+			// stdout/stderr either.
+			ts.Fatalf("bincmp is for binary files. did you mean cmp?")
+		}
+	}
 	data1 := ts.ReadFile(args[0])
 	data2 := ts.ReadFile(args[1])
 	if neg {
diff --git a/position.go b/position.go
index 4e2e0ec..5cbd28b 100644
--- a/position.go
+++ b/position.go
@@ -103,7 +103,7 @@ func printFile(file1 *ast.File) ([]byte, error) {
 		newName := ""
 		if !flagTiny {
 			origPos := fmt.Sprintf("%s:%d", filename, fset.Position(origNode.Pos()).Offset)
-			newName = hashWith(curPkg.GarbleActionID, origPos) + ".go"
+			newName = hashWithPackage(curPkg, origPos) + ".go"
 			// log.Printf("%q hashed with %x to %q", origPos, curPkg.GarbleActionID, newName)
 		}
 		pos := fset.Position(node.Pos())
diff --git a/reverse.go b/reverse.go
index b1feab5..614337d 100644
--- a/reverse.go
+++ b/reverse.go
@@ -70,15 +70,12 @@ One can reverse a captured panic stack trace as follows:
 		}
 		curPkg = lpkg
 
-		addReplace := func(hash []byte, str string) {
-			if hash == nil {
-				hash = lpkg.GarbleActionID
-			}
-			replaces = append(replaces, hashWith(hash, str), str)
+		addHashedWithPackage := func(str string) {
+			replaces = append(replaces, hashWithPackage(lpkg, str), str)
 		}
 
 		// Package paths are obfuscated, too.
-		addReplace(nil, lpkg.ImportPath)
+		addHashedWithPackage(lpkg.ImportPath)
 
 		var files []*ast.File
 		for _, goFile := range lpkg.GoFiles {
@@ -101,9 +98,9 @@ One can reverse a captured panic stack trace as follows:
 				// Replace names.
 				// TODO: do var names ever show up in output?
 				case *ast.FuncDecl:
-					addReplace(nil, node.Name.Name)
+					addHashedWithPackage(node.Name.Name)
 				case *ast.TypeSpec:
-					addReplace(nil, node.Name.Name)
+					addHashedWithPackage(node.Name.Name)
 				case *ast.Field:
 					for _, name := range node.Names {
 						obj, _ := tf.info.ObjectOf(name).(*types.Var)
@@ -114,16 +111,14 @@ One can reverse a captured panic stack trace as follows:
 						if strct == nil {
 							panic("could not find for " + name.Name)
 						}
-						fieldsHash := []byte(strct.String())
-						hashToUse := addGarbleToHash(fieldsHash)
-						addReplace(hashToUse, name.Name)
+						replaces = append(replaces, hashWithStruct(strct, name.Name), name.Name)
 					}
 
 				case *ast.CallExpr:
 					// Reverse position information of call sites.
 					pos := fset.Position(node.Pos())
 					origPos := fmt.Sprintf("%s:%d", goFile, pos.Offset)
-					newFilename := hashWith(lpkg.GarbleActionID, origPos) + ".go"
+					newFilename := hashWithPackage(lpkg, origPos) + ".go"
 
 					// Do "obfuscated.go:1", corresponding to the call site's line.
 					// Most common in stack traces.
diff --git a/shared.go b/shared.go
index 948a74f..d95ac65 100644
--- a/shared.go
+++ b/shared.go
@@ -164,7 +164,7 @@ func (p *listedPackage) obfuscatedImportPath() string {
 	if p.ImportPath == "embed" || !p.ToObfuscate {
 		return p.ImportPath
 	}
-	newPath := hashWith(p.GarbleActionID, p.ImportPath)
+	newPath := hashWithPackage(p, p.ImportPath)
 	debugf("import path %q hashed with %x to %q", p.ImportPath, p.GarbleActionID, newPath)
 	return newPath
 }
diff --git a/testdata/scripts/seed.txt b/testdata/scripts/seed.txt
index 3a3c1a3..21b1480 100644
--- a/testdata/scripts/seed.txt
+++ b/testdata/scripts/seed.txt
@@ -1,9 +1,12 @@
 env GOGARBLE=test/main
 
+# Note that in this test we use "! bincmp" on plaintext output files,
+# as a workaround for "cmp" not supporting "! cmp".
+
 env SEED1=OQg9kACEECQ
 env SEED2=NruiDmVz6/s
 
-# Check the binary with a given base64 encoded seed
+# Check the binary with a given base64 encoded seed.
 garble -seed=${SEED1} build
 exec ./main$exe
 cmp stderr main.stderr
@@ -12,30 +15,49 @@ binsubstr main$exe 'teststring' 'imported var value'
 
 [short] stop # the extra checks are relatively expensive
 
-exec ./main$exe funcName
-cp stderr funcName-seed-static-1
+exec ./main$exe test/main/imported
+cp stderr importedpkg-seed-static-1
 
 # Also check that the binary is reproducible.
 # No packages should be rebuilt either, thanks to the build cache.
-cp main$exe main_old$exe
+cp main$exe main_seed1$exe
 rm main$exe
 garble -seed=${SEED1}= build -v
-! stderr .
-bincmp main$exe main_old$exe
+#! stderr .
+bincmp main$exe main_seed1$exe
+
+exec ./main$exe test/main/imported
+cmp stderr importedpkg-seed-static-1
 
-exec ./main$exe funcName
-cmp stderr funcName-seed-static-1
+# Even if we use the same seed, the same names in a different package
+# should still be obfuscated in a different way.
+exec ./main$exe test/main
+cp stderr mainpkg-seed-static-1
+! bincmp mainpkg-seed-static-1 importedpkg-seed-static-1
+
+# Using different flags which affect the build, such as -literals or -tiny,
+# should result in the same obfuscation as long as the seed is constant.
+# TODO: also test that changing non-garble build parameters,
+# such as GOARCH or -tags, still results in the same hashing via the seed.
+
+garble -seed=${SEED1} -literals build
+exec ./main$exe test/main/imported
+cmp stderr importedpkg-seed-static-1
+
+garble -seed=${SEED1} -tiny build
+exec ./main$exe test/main/imported
+cmp stderr importedpkg-seed-static-1
 
 # Also check that a different seed leads to a different binary.
 # We can't know if caching happens here, because of previous test runs.
-cp main$exe main_old$exe
+cp main$exe main_seed2$exe
 rm main$exe
 garble -seed=${SEED2} build
-! bincmp main$exe main_old$exe
+! bincmp main$exe main_seed2$exe
 
-exec ./main$exe funcName
-cp stderr funcName-seed-static-2
-! bincmp funcName-seed-static-2 funcName-seed-static-1
+exec ./main$exe test/main/imported
+cp stderr importedpkg-seed-static-2
+! bincmp importedpkg-seed-static-2 importedpkg-seed-static-1
 
 # Use a random seed, which should always trigger a full build.
 garble -seed=random build -v
@@ -46,34 +68,29 @@ cmp stderr main.stderr
 binsubstr main$exe 'teststring' 'imported var value'
 ! binsubstr main$exe 'ImportedVar'
 
-exec ./main$exe funcName
-cp stderr funcName-seed-random-1
-! bincmp funcName-seed-random-1 funcName-seed-static-1
+exec ./main$exe test/main/imported
+cp stderr importedpkg-seed-random-1
+! bincmp importedpkg-seed-random-1 importedpkg-seed-static-1
 
 # Also check that the random binary is not reproducible.
-cp main$exe main_old$exe
+cp main$exe main_random$exe
 rm main$exe
 garble -seed=random build -v
 stderr .
-! bincmp main$exe main_old$exe
-
-exec ./main$exe funcName
-cp stderr funcName-seed-random-2
-! bincmp funcName-seed-random-2 funcName-seed-random-1
+! bincmp main$exe main_random$exe
 
-# Using different flags which affect the build, such as -literals or -tiny,
-# should result in different obfuscation of names etc.
-# There's strictly no reason to have this rule,
-# but the flags result in different builds and binaries anyway,
-# so we might as well make them as different as possible.
-
-garble -seed=${SEED1} -literals build
-exec ./main$exe funcName
-! bincmp stderr funcName-seed-static-1
+exec ./main$exe test/main/imported
+cp stderr importedpkg-seed-random-2
+! bincmp importedpkg-seed-random-2 importedpkg-seed-random-1
 
-garble -seed=${SEED1} -tiny build
-exec ./main$exe funcName
-! bincmp stderr funcName-seed-static-1
+# Finally, ensure that our runtime and reflect test code does what we think.
+go build
+exec ./main$exe
+cmp stderr main.stderr
+exec ./main$exe test/main
+cmp stderr mainpkg.stderr
+exec ./main$exe test/main/imported
+cmp stderr importedpkg.stderr
 
 -- go.mod --
 module test/main
@@ -84,32 +101,83 @@ package main
 
 import (
 	"os"
-	"runtime"
 
 	"test/main/imported"
 )
 
 var teststringVar = "teststring"
 
-func main() {
-	if len(os.Args) > 1 && os.Args[1] == "funcName" {
-		println(originalFuncName())
+func main() { mainFunc() }
+
+func mainFunc() {
+	if len(os.Args) > 1 {
+		switch os.Args[1] {
+		case "test/main":
+			imported.PrintNames(NamedTypeValue, NamedFunc)
+		case "test/main/imported":
+			imported.PrintNames(imported.NamedType{}, imported.NamedFunc)
+		default:
+			panic("unknown package")
+		}
 	} else {
 		println(teststringVar)
 		println(imported.ImportedVar)
 	}
 }
 
-func originalFuncName() string {
-	pc, _, _, _ := runtime.Caller(0)
-	fn := runtime.FuncForPC(pc)
-	return fn.Name()
+// A workaround to fool garble's reflect detection,
+// because we want it to show us the obfuscated NamedType.
+var NamedTypeValue interface{} = NamedType{}
+
+type NamedType struct {
+	NamedField int
+}
+
+func NamedFunc() string {
+	return imported.CallerFuncName()
 }
+
 -- imported/imported.go --
 package imported
 
+import (
+	"reflect"
+	"runtime"
+)
+
 var ImportedVar = "imported var value"
 
+type NamedType struct {
+	NamedField int
+}
+
+func NamedFunc() string {
+	return CallerFuncName()
+}
+
+func PrintNames(v interface{}, fn func() string) {
+	typ := reflect.TypeOf(v)
+	println("path:", typ.PkgPath())
+	println("type:", typ.Name())
+	println("field:", typ.Field(0).Name)
+	println("func: ", fn())
+}
+
+func CallerFuncName() string {
+	pc, _, _, _ := runtime.Caller(1)
+	fn := runtime.FuncForPC(pc)
+	return fn.Name()
+}
 -- main.stderr --
 teststring
 imported var value
+-- mainpkg.stderr --
+path: main
+type: NamedType
+field: NamedField
+func:  main.NamedFunc
+-- importedpkg.stderr --
+path: test/main/imported
+type: NamedType
+field: NamedField
+func:  test/main/imported.NamedFunc