stop loading obfuscated type information from deps

If package P1 imports package P2, P1 needs to know which names from P2
weren't obfuscated. For instance, if P2 declares T2 and does
"reflect.TypeOf(T2{...})", then P2 won't obfuscate the name T2, and
neither should P1.

This information should flow from P2 to P1, as P2 builds before
P1. We do this via obfuscatedTypesPackage; P1 loads the type information
of the obfuscated version of P2, and does a lookup for T2. If T2 exists,
then it wasn't obfuscated.

This mechanism has served us well, but it has downsides:

1) It wastes CPU; we load the type information for the entire package.

2) It's complex; for instance, we need KnownObjectFiles as an extra.

3) It makes our code harder to understand, as we load both the original
   and obfuscated type informaiton.

Instead, we now have each package record what names were not obfuscated
as part of its cachedOuput file. Much like KnownObjectFiles, the map
records incrementally through the import graph, to avoid having to load
cachedOutput files for indirect dependencies.

We shouldn't need to worry about those maps getting large;
we only skip obfuscating declared names in a few uncommon scenarios,
such as the use of reflection or cgo's "//export".

Since go/types is relatively allocation-heavy, and the export files
contain a lot of data, we get a nice speed-up:

	name      old time/op         new time/op         delta
	Build-16          11.5s ± 2%          11.1s ± 3%  -3.77%  (p=0.008 n=5+5)

	name      old bin-B           new bin-B           delta
	Build-16          5.15M ± 0%          5.15M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          375ms ± 3%          341ms ± 6%  -8.96%  (p=0.008 n=5+5)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          283ms ±17%          289ms ±13%    ~     (p=0.841 n=5+5)

	name      old user-time/op    new user-time/op    delta
	Build-16          687ms ± 6%          664ms ± 7%    ~     (p=0.548 n=5+5)

Fixes #456.
Updates #475.
pull/477/head
Daniel Martí 3 years ago committed by lu4p
parent d49c2446ee
commit 4c3b90c051

@ -152,14 +152,6 @@ var (
// Basic information about the package being currently compiled or linked.
curPkg *listedPackage
garbledImporter = importer.ForCompiler(fset, "gc", func(path string) (io.ReadCloser, error) {
pkgfile := cachedOutput.KnownObjectFiles[path]
if pkgfile == "" {
panic(fmt.Sprintf("missing in cachedOutput.KnownObjectFiles: %q", path))
}
return os.Open(pkgfile)
}).(types.ImporterFrom)
)
type importerWithMap func(path, dir string, mode types.ImportMode) (*types.Package, error)
@ -175,23 +167,6 @@ func (fn importerWithMap) ImportFrom(path, dir string, mode types.ImportMode) (*
return fn(path, dir, mode)
}
func obfuscatedTypesPackage(path string) *types.Package {
if path == curPkg.ImportPath {
panic("called obfuscatedTypesPackage on the current package?")
}
if pkg := cachedTypesPackages[path]; pkg != nil {
return pkg
}
pkg, err := garbledImporter.ImportFrom(path, parentWorkDir, 0)
if err != nil {
panic(err)
}
cachedTypesPackages[path] = pkg // cache for later use
return pkg
}
var cachedTypesPackages = make(map[string]*types.Package)
// uniqueLineWriter sits underneath log.SetOutput to deduplicate log lines.
// We log bits of useful information for debugging,
// and logging the same detail twice is not going to help the user.
@ -711,13 +686,6 @@ func transformCompile(args []string) ([]string, error) {
return nil, err
}
if err := writeGobExclusive(
garbleExportFile(curPkg),
cachedOutput,
); err != nil && !errors.Is(err, fs.ErrExist) {
return nil, err
}
// Literal obfuscation uses math/rand, so seed it deterministically.
randSeed := flagSeed.bytes
if len(randSeed) == 0 {
@ -739,17 +707,14 @@ func transformCompile(args []string) ([]string, error) {
newPaths := make([]string, 0, len(files))
for i, file := range files {
name := filepath.Base(paths[i])
filename := filepath.Base(paths[i])
debugf("obfuscating %s", filename)
if curPkg.ImportPath == "runtime" && flagTiny {
// strip unneeded runtime code
stripRuntime(name, file)
stripRuntime(filename, file)
}
if strings.HasPrefix(name, "_cgo_") {
// Don't obfuscate cgo code, since it's generated and it gets messy.
} else {
tf.handleDirectives(file.Comments)
file = tf.transformGo(file)
}
file = tf.transformGo(filename, file)
if newPkgPath != "" {
file.Name.Name = newPkgPath
}
@ -761,10 +726,10 @@ func transformCompile(args []string) ([]string, error) {
// Uncomment for some quick debugging. Do not delete.
// if curPkg.ToObfuscate {
// fmt.Fprintf(os.Stderr, "\n-- %s/%s --\n%s", curPkg.ImportPath, name, src)
// fmt.Fprintf(os.Stderr, "\n-- %s/%s --\n%s", curPkg.ImportPath, filename, src)
// }
if path, err := writeTemp(name, src); err != nil {
if path, err := writeTemp(filename, src); err != nil {
return nil, err
} else {
newPaths = append(newPaths, path)
@ -776,7 +741,7 @@ func transformCompile(args []string) ([]string, error) {
return nil, err
}
debugFilePath := filepath.Join(pkgDebugDir, name)
debugFilePath := filepath.Join(pkgDebugDir, filename)
if err := os.WriteFile(debugFilePath, src, 0o666); err != nil {
return nil, err
}
@ -784,6 +749,13 @@ func transformCompile(args []string) ([]string, error) {
}
flags = flagSetValue(flags, "-importcfg", newImportCfg)
if err := writeGobExclusive(
garbleExportFile(curPkg),
cachedOutput,
); err != nil && !errors.Is(err, fs.ErrExist) {
return nil, err
}
return append(flags, newPaths...), nil
}
@ -816,7 +788,10 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) {
newName := fields[2]
dotCnt := strings.Count(newName, ".")
if dotCnt < 1 {
// probably a malformed linkname directive
// cgo-generated code uses linknames to made up symbol names,
// which do not have a package path at all.
// Replace the comment in case the local name was obfuscated.
comment.Text = strings.Join(fields, " ")
continue
}
switch newName {
@ -833,8 +808,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) {
lpkg, err := listPackage(pkgPath)
if err != nil {
// probably a made up symbol name, replace the comment
// in case the local name was obfuscated.
// Probably a made up name like above, but with a dot.
comment.Text = strings.Join(fields, " ")
continue
}
@ -919,8 +893,7 @@ func toObfuscate(path string) bool {
return module.MatchPrefixPatterns(cache.GOGARBLE, path)
}
// processImportCfg parses the importcfg file passed to a compile or link step,
// adding missing entries to KnownObjectFiles to be stored in the build cache.
// processImportCfg parses the importcfg file passed to a compile or link step.
// It also builds a new importcfg file to account for obfuscated import paths.
func processImportCfg(flags []string) (newImportCfg string, _ error) {
importCfg := flagValue(flags, "-importcfg")
@ -962,20 +935,6 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) {
importPath, objectPath := args[:j], args[j+1:]
packagefiles = append(packagefiles, [2]string{importPath, objectPath})
if prev := cachedOutput.KnownObjectFiles[importPath]; prev != "" {
// Nothing to do; recorded by one of our dependencies.
} else if strings.HasSuffix(objectPath, "_pkg_.a") {
// The path is inside a temporary directory, to be deleted soon.
// Record the final location within the build cache instead.
finalObjectPath, err := gocachePathForFile(objectPath)
if err != nil {
return "", err
}
cachedOutput.KnownObjectFiles[importPath] = finalObjectPath
} else {
cachedOutput.KnownObjectFiles[importPath] = objectPath
}
}
}
@ -1034,27 +993,26 @@ type (
// cachedOutput contains information that will be stored as per garbleExportFile.
var cachedOutput = struct {
// KnownObjectFiles is filled from -importcfg in the current obfuscated build.
// As such, it records export data for the dependencies which might be
// themselves obfuscated, depending on GOGARBLE.
//
// TODO: We rely on obfuscated type information to know what names we didn't
// obfuscate. Instead, directly record what names we chose not to obfuscate,
// which should then avoid having to go through go/types.
KnownObjectFiles map[string]string
// KnownReflectAPIs is a static record of what std APIs use reflection on their
// parameters, so we can avoid obfuscating types used with them.
//
// TODO: we're not including fmt.Printf, as it would have many false positives,
// unless we were smart enough to detect which arguments get used as %#v or %T.
KnownReflectAPIs map[funcFullName][]reflectParameterPosition
// KnownCannotObfuscate is filled with the fully qualified names from each
// package that we could not obfuscate as per cannotObfuscateNames.
// This record is necessary for knowing what names from imported packages
// weren't obfuscated, so we can obfuscate their local uses accordingly.
//
// TODO: merge cannotObfuscateNames into this directly
KnownCannotObfuscate map[string]struct{}
}{
KnownObjectFiles: map[string]string{},
KnownReflectAPIs: map[funcFullName][]reflectParameterPosition{
"reflect.TypeOf": {0},
"reflect.ValueOf": {0},
},
KnownCannotObfuscate: map[string]struct{}{},
}
// garbleExportFile returns an absolute path to a build cache entry
@ -1381,8 +1339,76 @@ func (tf *transformer) recordType(t types.Type) {
}
}
func recordedObjectString(obj types.Object) string {
if !obj.Exported() {
// Unexported names will never be used by other packages,
// so we don't need to bother recording them.
return ""
}
if obj, ok := obj.(*types.Var); ok && obj.IsField() {
// For exported fields, "pkgpath.Field" is not unique,
// because two exported top-level types could share "Field".
//
// Moreover, note that not all fields belong to named struct types;
// an API could be exposing:
//
// var usedInReflection = struct{Field string}
//
// For now, a hack: assume that packages don't declare the same field
// more than once in the same line. This works in practice, but one
// could craft Go code to break this assumption.
// Also note that the compiler's object files include filenames and line
// numbers, but not column numbers nor byte offsets.
// TODO(mvdan): give this another think, and add tests involving anon types.
pos := fset.Position(obj.Pos())
return fmt.Sprintf("%s.%s - %s:%d", obj.Pkg().Path(), obj.Name(),
filepath.Base(pos.Filename), pos.Line)
}
// Names which are not at the top level cannot be imported,
// so we don't need to record them either.
// Note that this doesn't apply to fields, which are never top-level.
if obj.Pkg().Scope().Lookup(obj.Name()) != obj {
return ""
}
// For top-level exported names, "pkgpath.Name" is unique.
return fmt.Sprintf("%s.%s", obj.Pkg().Path(), obj.Name())
}
func recordAsNotObfuscated(obj types.Object) bool {
if obj.Pkg().Path() != curPkg.ImportPath {
panic("called recordedAsNotObfuscated with a foreign object")
}
if !obj.Exported() {
// Unexported names will never be used by other packages,
// so we don't need to bother recording them.
return true
}
if objStr := recordedObjectString(obj); objStr != "" {
cachedOutput.KnownCannotObfuscate[objStr] = struct{}{}
}
return true // to simplify early returns in astutil.ApplyFunc
}
func recordedAsNotObfuscated(obj types.Object) bool {
if obj.Pkg().Path() == curPkg.ImportPath {
// The current package knows what names it's not obfuscating.
return false
}
if !obj.Exported() {
// Not recorded, as per recordAsNotObfuscated.
return false
}
objStr := recordedObjectString(obj)
if objStr == "" {
return false
}
_, ok := cachedOutput.KnownCannotObfuscate[objStr]
return ok
}
// transformGo obfuscates the provided Go syntax file.
func (tf *transformer) transformGo(file *ast.File) *ast.File {
func (tf *transformer) transformGo(filename string, file *ast.File) *ast.File {
// Only obfuscate the literals here if the flag is on
// and if the package in question is to be obfuscated.
//
@ -1398,12 +1424,10 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
if !ok {
return true
}
// TODO(mvdan): use "name := node.Name"
if node.Name == "_" {
return true // unnamed remains unnamed
}
if strings.HasPrefix(node.Name, "_C") || strings.Contains(node.Name, "_cgo") {
return true // don't mess with cgo-generated code
}
obj := tf.info.ObjectOf(node)
if obj == nil {
_, isImplicit := tf.info.Defs[node]
@ -1484,6 +1508,16 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
// We don't want to obfuscate this object name.
if tf.cannotObfuscateNames[obj] {
return recordAsNotObfuscated(obj)
}
// The imported package that declared this object did not obfuscate it.
if recordedAsNotObfuscated(obj) {
return true
}
// TODO(mvdan): investigate obfuscating these too.
filename := fset.Position(obj.Pos()).Filename
if strings.HasPrefix(filename, "_cgo_") || strings.Contains(filename, ".cgo1.") {
return true
}
@ -1530,43 +1564,8 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File {
fieldsHash := []byte(strct.String())
hashToUse = addGarbleToHash(fieldsHash)
// If the struct of this field was not obfuscated, do not obfuscate
// any of that struct's fields.
parent, ok := cursor.Parent().(*ast.SelectorExpr)
if !ok {
break
}
named := namedType(tf.info.TypeOf(parent.X))
if named == nil {
break // TODO(mvdan): add a test
}
if name := named.Obj().Name(); strings.HasPrefix(name, "_Ctype") {
// A field accessor on a cgo type, such as a C struct.
// We're not obfuscating cgo names.
return true
}
if path != curPkg.ImportPath {
obfPkg := obfuscatedTypesPackage(path)
if obfPkg.Scope().Lookup(named.Obj().Name()) != nil {
tf.recordIgnore(named, path)
return true
}
}
case *types.TypeName:
debugName = "type"
// If the type was not obfuscated in the package were it was defined,
// do not obfuscate it here.
if path != curPkg.ImportPath {
named := namedType(obj.Type())
if named == nil {
break // TODO(mvdan): add a test
}
obfPkg := obfuscatedTypesPackage(path)
if obfPkg.Scope().Lookup(obj.Name()) != nil {
tf.recordIgnore(named, path)
return true
}
}
case *types.Func:
sign := obj.Type().(*types.Signature)
if sign.Recv() == nil {

@ -5,7 +5,7 @@ env GOGARBLE=test/main
garble build
exec ./main
cmp stdout main.stdout
binsubstr main$exe 'privateAdd'
! binsubstr main$exe 'PortedField'
[short] stop # no need to verify this with -short
@ -13,17 +13,16 @@ env GOGARBLE=*
garble build
exec ./main
cmp stdout main.stdout
binsubstr main$exe 'privateAdd'
env GOGARBLE=test/main
garble -tiny build
exec ./main
cmp stdout main.stdout
binsubstr main$exe 'privateAdd'
go build
exec ./main
cmp stdout main.stdout
binsubstr main$exe 'privateAdd'
-- go.mod --
module test/main
@ -56,7 +55,8 @@ func main() {
fmt.Println(C.privateAdd(C.int(1), C.int(2)))
_, _ = user.Current()
fmt.Printf("%#v\n", C.struct_portedStruct{})
st := C.struct_portedStruct{}
fmt.Println(st.PortedField == nil)
C.callGoCallback()
}
@ -67,5 +67,5 @@ func goCallback() {
}
-- main.stdout --
3
main._Ctype_struct_portedStruct{PortedField:(*main._Ctype_char)(nil)}
true
go callback

@ -89,7 +89,9 @@ func main() {
// Using type aliases as both regular fields, and embedded fields.
var emb EmbeddingIndirect
emb.With.IndirectUnobfuscated = "indirect-with"
emb.With.DuplicateFieldName = 3
emb.Without.IndirectObfuscated = "indirect-without"
emb.Without.DuplicateFieldName = 4
fmt.Printf("%v\n", emb)
printfWithoutPackage("%#v\n", emb.With)
@ -231,10 +233,14 @@ var _ = reflect.TypeOf(IndirectNamedWithReflect{})
type IndirectNamedWithReflect struct {
IndirectUnobfuscated string
DuplicateFieldName int
}
type IndirectNamedWithoutReflect struct {
IndirectObfuscated string
DuplicateFieldName int
}
-- main.stdout --
@ -249,7 +255,7 @@ Hello Dave.
{"InnerField":3,"Anon":{"AnonField":0}}
{downstream}
{sibling}
{{indirect-with} {indirect-without} {}}
IndirectNamedWithReflect{IndirectUnobfuscated:"indirect-with"}
{{indirect-with 3} {indirect-without 4} { 0}}
IndirectNamedWithReflect{IndirectUnobfuscated:"indirect-with", DuplicateFieldName:3}
ReflectionField
{0}

Loading…
Cancel
Save