hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.
pull/326/head
Daniel Martí 3 years ago committed by lu4p
parent b3db7d6fa7
commit c9b0b07853

@ -60,7 +60,7 @@ func alterToolVersion(tool string, args []string) error {
toolID = []byte(line)
}
contentID := addGarbleToBuildIDComponent(toolID)
contentID := addGarbleToHash(toolID)
// The part of the build ID that matters is the last, since it's the
// "content ID" which is used to work out whether there is a need to redo
// the action (build) or not. Since cmd/go parses the last word in the
@ -72,13 +72,13 @@ func alterToolVersion(tool string, args []string) error {
return nil
}
// addGarbleToBuildIDComponent takes a build ID component hash, such as an
// action ID or a content ID, and returns a new hash which also contains
// garble's own deterministic inputs.
// addGarbleToHash takes some arbitrary input bytes,
// typically a hash such as an action ID or a content ID,
// and returns a new hash which also contains garble's own deterministic inputs.
//
// This includes garble's own version, obtained via its own binary's content ID,
// as well as any other options which affect a build, such as GOPRIVATE and -tiny.
func addGarbleToBuildIDComponent(inputHash []byte) []byte {
func addGarbleToHash(inputHash []byte) []byte {
// Join the two content IDs together into a single base64-encoded sha256
// sum. This includes the original tool's content ID, and garble's own
// content ID.

@ -595,7 +595,6 @@ func transformCompile(args []string) ([]string, error) {
var files []*ast.File
for _, path := range paths {
// Note that we want
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
if err != nil {
return nil, err
@ -611,18 +610,9 @@ func transformCompile(args []string) ([]string, error) {
// log.Printf("seeding math/rand with %x\n", randSeed)
mathrand.Seed(int64(binary.BigEndian.Uint64(randSeed)))
tf := &transformer{
info: &types.Info{
Types: make(map[ast.Expr]types.TypeAndValue),
Defs: make(map[*ast.Ident]types.Object),
Uses: make(map[*ast.Ident]types.Object),
},
}
origTypesConfig := types.Config{Importer: origImporter}
tf.pkg, err = origTypesConfig.Check(curPkg.ImportPath, fset, files, tf.info)
if err != nil {
return nil, fmt.Errorf("typecheck error: %v", err)
tf := newTransformer()
if err := tf.typecheck(files); err != nil {
return nil, err
}
tf.recordReflectArgs(files)
@ -1028,6 +1018,78 @@ type transformer struct {
// * Types or variables from external packages which were not
// obfuscated, for caching reasons; see transformGo.
ignoreObjects map[types.Object]bool
// These fields are used to locate struct types from any of their field
// objects. Useful when obfuscating field names.
fieldToStruct map[*types.Var]*types.Struct
recordTypeDone map[types.Type]bool
}
// newTransformer helps initialize some maps.
func newTransformer() *transformer {
return &transformer{
info: &types.Info{
Types: make(map[ast.Expr]types.TypeAndValue),
Defs: make(map[*ast.Ident]types.Object),
Uses: make(map[*ast.Ident]types.Object),
},
recordTypeDone: make(map[types.Type]bool),
fieldToStruct: make(map[*types.Var]*types.Struct),
}
}
func (tf *transformer) typecheck(files []*ast.File) error {
origTypesConfig := types.Config{Importer: origImporter}
pkg, err := origTypesConfig.Check(curPkg.ImportPath, fset, files, tf.info)
if err != nil {
return fmt.Errorf("typecheck error: %v", err)
}
tf.pkg = pkg
// Run recordType on all types reachable via types.Info.
// A bit hacky, but I could not find an easier way to do this.
for _, obj := range tf.info.Defs {
if obj != nil {
tf.recordType(obj.Type())
}
}
for _, obj := range tf.info.Uses {
if obj != nil {
tf.recordType(obj.Type())
}
}
for _, tv := range tf.info.Types {
tf.recordType(tv.Type)
}
return nil
}
// recordType visits every reachable type after typechecking a package.
// Right now, all it does is fill the fieldToStruct field.
// Since types can be recursive, we need a map to avoid cycles.
func (tf *transformer) recordType(t types.Type) {
if tf.recordTypeDone[t] {
return
}
tf.recordTypeDone[t] = true
switch t := t.(type) {
case interface{ Elem() types.Type }:
tf.recordType(t.Elem())
case *types.Named:
tf.recordType(t.Underlying())
}
strct, _ := t.(*types.Struct)
if strct == nil {
return
}
for i := 0; i < strct.NumFields(); i++ {
field := strct.Field(i)
tf.fieldToStruct[field] = strct
if field.Embedded() {
tf.recordType(field.Type())
}
}
}
// transformGo obfuscates the provided Go syntax file.
@ -1085,6 +1147,7 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
if !lpkg.Private {
return true // only private packages are transformed
}
hashToUse := lpkg.GarbleActionID
// log.Printf("%#v %T", node, obj)
parentScope := obj.Parent()
@ -1095,6 +1158,30 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
return true
}
// Fields don't get hashed with the package's action ID.
// They get hashed with the type of their parent struct.
// This is because one struct can be converted to another,
// as long as the underlying types are identical,
// even if the structs are defined in different packages.
//
// TODO: Consider only doing this for structs where all
// fields are exported. We only need this special case
// for cross-package conversions, which can't work if
// any field is unexported. If that is done, add a test
// that ensures unexported fields from different
// packages result in different obfuscated names.
if obj.IsField() {
strct := tf.fieldToStruct[obj]
if strct == nil {
panic("could not find for " + node.Name)
}
// TODO: We should probably strip field tags here.
// Do we need to do anything else to make a
// struct type "canonical"?
fieldsHash := []byte(strct.String())
hashToUse = addGarbleToHash(fieldsHash)
}
// If the struct of this field was not obfuscated, do not obfuscate
// any of that struct's fields.
if parentScope != tf.pkg.Scope() && obj.IsField() && !obj.Embedded() {
@ -1172,8 +1259,8 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
origName := node.Name
_ = origName // used for debug prints below
node.Name = hashWith(lpkg.GarbleActionID, node.Name)
// log.Printf("%q hashed with %x to %q", origName, lpkg.GarbleActionID, node.Name)
node.Name = hashWith(hashToUse, node.Name)
// log.Printf("%q hashed with %x to %q", origName, hashToUse, node.Name)
return true
}
post := func(cursor *astutil.Cursor) bool {

@ -9,7 +9,7 @@ import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"go/types"
"io"
"os"
"path/filepath"
@ -53,37 +53,60 @@ func commandReverse(args []string) error {
// export data only exposes exported names. Parsing Go files is cheap,
// so it's unnecessary to try to avoid this cost.
var replaces []string
fset := token.NewFileSet()
for _, lpkg := range cache.ListedPackages {
if !lpkg.Private {
continue
}
addReplace := func(str string) {
replaces = append(replaces, hashWith(lpkg.GarbleActionID, str), str)
curPkg = lpkg
addReplace := func(hash []byte, str string) {
if hash == nil {
hash = lpkg.GarbleActionID
}
replaces = append(replaces, hashWith(hash, str), str)
}
// Package paths are obfuscated, too.
addReplace(lpkg.ImportPath)
addReplace(nil, lpkg.ImportPath)
var files []*ast.File
for _, goFile := range lpkg.GoFiles {
fullGoFile := filepath.Join(lpkg.Dir, goFile)
file, err := parser.ParseFile(fset, fullGoFile, nil, 0)
if err != nil {
return err
}
files = append(files, file)
}
tf := newTransformer()
if err := tf.typecheck(files); err != nil {
return err
}
for i, file := range files {
goFile := lpkg.GoFiles[i]
ast.Inspect(file, func(node ast.Node) bool {
switch node := node.(type) {
// Replace names.
// TODO: do var names ever show up in output?
case *ast.FuncDecl:
addReplace(node.Name.Name)
addReplace(nil, node.Name.Name)
case *ast.TypeSpec:
addReplace(node.Name.Name)
addReplace(nil, node.Name.Name)
case *ast.Field:
for _, name := range node.Names {
addReplace(name.Name)
obj, _ := tf.info.ObjectOf(name).(*types.Var)
if obj == nil || !obj.IsField() {
continue
}
strct := tf.fieldToStruct[obj]
if strct == nil {
panic("could not find for " + name.Name)
}
fieldsHash := []byte(strct.String())
hashToUse := addGarbleToHash(fieldsHash)
addReplace(hashToUse, name.Name)
}
case *ast.CallExpr:

@ -223,7 +223,7 @@ func setListedPackages(patterns []string) error {
}
if pkg.Export != "" {
actionID := decodeHash(splitActionID(pkg.BuildID))
pkg.GarbleActionID = addGarbleToBuildIDComponent(actionID)
pkg.GarbleActionID = addGarbleToHash(actionID)
}
cache.ListedPackages[pkg.ImportPath] = &pkg
}

@ -6,6 +6,13 @@ cmp stderr main.stderr
! binsubstr main$exe 'unexportedMethod' 'privateIface'
[short] stop # no need to verify this with -short
# Check that the program works as expected without garble.
go build
exec ./main
cmp stderr main.stderr
-- go.mod --
module test/main
@ -13,7 +20,12 @@ go 1.16
-- main.go --
package main
import "test/main/tinyfmt"
import (
"test/main/lib1"
"test/main/lib2"
"test/main/lib3"
"test/main/tinyfmt"
)
type T string
@ -33,10 +45,53 @@ func (T) privateIface() {}
var _ privateInterface = T("")
type StructUnnamed = struct {
Foo int
Bar struct {
Nested *[]string
}
lib3.StructEmbed
}
var _ = lib1.Struct1(lib2.Struct2{})
var _ = StructUnnamed(lib2.Struct2{})
func main() {
tinyfmt.Println(T("foo"))
tinyfmt.Println(T("foo").unexportedMethod())
}
-- lib1/lib1.go --
package lib1
import "test/main/lib3"
type Struct1 struct {
Foo int
Bar struct {
Nested *[]string
}
lib3.StructEmbed
}
-- lib2/lib2.go --
package lib2
import "test/main/lib3"
type Struct2 struct {
Foo int
Bar struct {
Nested *[]string
}
lib3.StructEmbed
}
-- lib3/lib3.go --
package lib3
type StructEmbed struct {
Baz interface{}
}
-- tinyfmt/fmt.go --
package tinyfmt

Loading…
Cancel
Save