|
|
|
// Copyright (c) 2019, The Garble Authors.
|
|
|
|
// See LICENSE for licensing information.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"flag"
|
|
|
|
"fmt"
|
reverse: support unexported names and package paths (#233)
Unexported names are a bit tricky, since they are not listed in the
export data file. Perhaps unsurprisingly, it's only meant to expose
exported objects.
One option would be to go back to adding an extra header to the export
data file, containing the unexported methods in a map[string]T or
[]string. However, we have an easier route: just parse the Go files and
look up the names directly.
This does mean that we parse the Go files every time "reverse" runs,
even if the build cache is warm, but that should not be an issue.
Parsing Go files without any typechecking is very cheap compared to
everything else we do. Plus, we save having to load go/types information
from the build cache, or having to load extra headers from export files.
It should be noted that the obfuscation process does need type
information, mainly to be careful about which names can be obfuscated
and how they should be obfuscated. Neither is a worry here; all names
belong to a single package, and it doesn't matter if some aren't
actually obfuscated, since the string replacements would simply never
trigger in practice.
The test includes an unexported func, to test the new feature. We also
start reversing the obfuscation of import paths. Now, the test's reverse
output is as follows:
goroutine 1 [running]:
runtime/debug.Stack(0x??, 0x??, 0x??)
runtime/debug/stack.go:24 +0x??
test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??)
p.go:6 +0x??
main.unexportedMainFunc(...)
C.go:2
main.main()
z.go:3 +0x??
The only major missing feature is positions and filenames. A follow-up
PR will take care of those.
Updates #5.
4 years ago
|
|
|
"go/ast"
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
"go/types"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
// commandReverse implements "garble reverse".
|
|
|
|
func commandReverse(args []string) error {
|
|
|
|
flags, args := splitFlagsFromArgs(args)
|
|
|
|
if hasHelpFlag(flags) || len(args) == 0 {
|
|
|
|
fmt.Fprint(os.Stderr, `
|
|
|
|
usage: garble [garble flags] reverse [build flags] package [files]
|
|
|
|
|
|
|
|
For example, after building an obfuscated program as follows:
|
|
|
|
|
|
|
|
garble -literals build -tags=mytag ./cmd/mycmd
|
|
|
|
|
|
|
|
One can reverse a captured panic stack trace as follows:
|
|
|
|
|
|
|
|
garble -literals reverse -tags=mytag ./cmd/mycmd panic-output.txt
|
|
|
|
`[1:])
|
|
|
|
return errJustExit(2)
|
|
|
|
}
|
|
|
|
|
|
|
|
pkg, args := args[0], args[1:]
|
|
|
|
listArgs := []string{
|
|
|
|
"-json",
|
|
|
|
"-deps",
|
|
|
|
"-export",
|
|
|
|
}
|
|
|
|
listArgs = append(listArgs, flags...)
|
|
|
|
listArgs = append(listArgs, pkg)
|
avoid one more call to 'go tool buildid' (#253)
We use it to get the content ID of garble's binary, which is used for
both the garble action IDs, as well as 'go tool compile -V=full'.
Since those two happen in separate processes, both used to call 'go tool
buildid' separately. Store it in the gob cache the first time, and reuse
it the second time.
Since each call to cmd/go costs about 10ms (new process, running its
many init funcs, etc), this results in a nice speed-up for our small
benchmark. Most builds will take many seconds though, so note that a
~15ms speedup there will likely not be noticeable.
While at it, simplify the buildInfo global, as now it just contains a
map representation of the -importcfg contents. It now has better names,
docs, and a simpler representation.
We also stop using the term "garbled import", as it was a bit confusing.
"obfuscated types.Package" is a much better description.
name old time/op new time/op delta
Build-8 106ms ± 1% 92ms ± 0% -14.07% (p=0.010 n=6+4)
name old bin-B new bin-B delta
Build-8 6.60M ± 0% 6.60M ± 0% -0.01% (p=0.002 n=6+6)
name old sys-time/op new sys-time/op delta
Build-8 208ms ± 5% 149ms ± 3% -28.27% (p=0.004 n=6+5)
name old user-time/op new user-time/op delta
Build-8 433ms ± 3% 384ms ± 3% -11.35% (p=0.002 n=6+6)
4 years ago
|
|
|
// TODO: We most likely no longer need this "list -toolexec" call, since
|
|
|
|
// we use the original build IDs.
|
actually remove temporary directories after obfuscation
Back in February 2021, we changed the obfuscation logic so that the
entire `garble build` process would use one shared temporary directory
across all package builds, reducing the amount of files we created in
the top-level system temporary directory.
However, we made one mistake: we didn't swap os.Remove for os.RemoveAll.
Ever since then, we've been leaving temporary files behind.
Add regression tests, which failed before the fix, and fix the bug.
Note that we need to test `garble reverse` as well, as it calls
toolexecCmd separately, so it needs its own cleanup as well.
The cleanup happens via the env var, which doesn't feel worse than
having toolexecCmd return an extra string or cleanup func.
While here, also test that we support TMPDIRs with special characters.
3 years ago
|
|
|
_, err := toolexecCmd("list", listArgs)
|
|
|
|
defer os.RemoveAll(os.Getenv("GARBLE_SHARED"))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// We don't actually run a main Go command with all flags,
|
|
|
|
// so if the user gave a non-build flag,
|
|
|
|
// we need this check to not silently ignore it.
|
fail if we are unexpectedly overwriting files (#418)
While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.
To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.
First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:
[...]/asm -gensymabis [...] foo.s bar.s
[...]/asm [...] foo.s bar.s
That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.
However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.
This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.
Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.
This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.
Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.
Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:
go list -export -toolexec=garble -a <dependency>
This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:
go build -toolexec=garble -a <main>
The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.
Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.
CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
3 years ago
|
|
|
if _, firstUnknown := filterForwardBuildFlags(flags); firstUnknown != "" {
|
|
|
|
// A bit of a hack to get a normal flag.Parse error.
|
|
|
|
// Longer term, "reverse" might have its own FlagSet.
|
|
|
|
return flag.NewFlagSet("", flag.ContinueOnError).Parse([]string{firstUnknown})
|
|
|
|
}
|
|
|
|
|
reverse: support unexported names and package paths (#233)
Unexported names are a bit tricky, since they are not listed in the
export data file. Perhaps unsurprisingly, it's only meant to expose
exported objects.
One option would be to go back to adding an extra header to the export
data file, containing the unexported methods in a map[string]T or
[]string. However, we have an easier route: just parse the Go files and
look up the names directly.
This does mean that we parse the Go files every time "reverse" runs,
even if the build cache is warm, but that should not be an issue.
Parsing Go files without any typechecking is very cheap compared to
everything else we do. Plus, we save having to load go/types information
from the build cache, or having to load extra headers from export files.
It should be noted that the obfuscation process does need type
information, mainly to be careful about which names can be obfuscated
and how they should be obfuscated. Neither is a worry here; all names
belong to a single package, and it doesn't matter if some aren't
actually obfuscated, since the string replacements would simply never
trigger in practice.
The test includes an unexported func, to test the new feature. We also
start reversing the obfuscation of import paths. Now, the test's reverse
output is as follows:
goroutine 1 [running]:
runtime/debug.Stack(0x??, 0x??, 0x??)
runtime/debug/stack.go:24 +0x??
test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??)
p.go:6 +0x??
main.unexportedMainFunc(...)
C.go:2
main.main()
z.go:3 +0x??
The only major missing feature is positions and filenames. A follow-up
PR will take care of those.
Updates #5.
4 years ago
|
|
|
// A package's names are generally hashed with the action ID of its
|
|
|
|
// obfuscated build. We recorded those action IDs above.
|
|
|
|
// Note that we parse Go files directly to obtain the names, since the
|
|
|
|
// export data only exposes exported names. Parsing Go files is cheap,
|
|
|
|
// so it's unnecessary to try to avoid this cost.
|
|
|
|
var replaces []string
|
|
|
|
|
|
|
|
for _, lpkg := range sharedCache.ListedPackages {
|
deprecate using GOPRIVATE in favor of GOGARBLE (#427)
Piggybacking off of GOPRIVATE is great for a number of reasons:
* People tend to obfuscate private code, whose package paths will
generally be in GOPRIVATE already
* Its meaning and syntax are well understood
* It allows all the flexibility we need without adding our own env var
or config option
However, using GOPRIVATE directly has one main drawback.
It's fairly common to also want to obfuscate public dependencies,
to make the code in private packages even harder to follow.
However, using "GOPRIVATE=*" will result in two main downsides:
* GONOPROXY defaults to GOPRIVATE, so the proxy would be entirely disabled.
Downloading modules, such as when adding or updating dependencies,
or when the local cache is cold, can be less reliable.
* GONOSUMDB defaults to GOPRIVATE, so the sumdb would be entirely disabled.
Adding entries to go.sum, such as when adding or updating dependencies,
can be less secure.
We will continue to consume GOPRIVATE as a fallback,
but we now expect users to set GOGARBLE instead.
The new logic is documented in the README.
While here, rewrite some uses of "private" with "to obfuscate",
to make the code easier to follow and harder to misunderstand.
Fixes #276.
3 years ago
|
|
|
if !lpkg.ToObfuscate {
|
refactor "current package" with TOOLEXEC_IMPORTPATH (#266)
Now that we've dropped support for Go 1.15.x, we can finally rely on
this environment variable for toolexec calls, present in Go 1.16.
Before, we had hacky ways of trying to figure out the current package's
import path, mostly from the -p flag. The biggest rough edge there was
that, for main packages, that was simply the package name, and not its
full import path.
To work around that, we had a restriction on a single main package, so
we could work around that issue. That restriction is now gone.
The new code is simpler, especially because we can set curPkg in a
single place for all toolexec transform funcs.
Since we can always rely on curPkg not being nil now, we can also start
reusing listedPackage.Private and avoid the majority of repeated calls
to isPrivate. The function is cheap, but still not free.
isPrivate itself can also get simpler. We no longer have to worry about
the "main" edge case. Plus, the sanity check for invalid package paths
is now unnecessary; we only got malformed paths from goobj2, and we now
require exact matches with the ImportPath field from "go list -json".
Another effect of clearing up the "main" edge case is that -debugdir now
uses the right directory for main packages. We also start using
consistent debugdir paths in the tests, for the sake of being easier to
read and maintain.
Finally, note that commandReverse did not need the extra call to "go
list -toolexec", as the "shared" call stored in the cache is enough. We
still call toolexecCmd to get said cache, which should probably be
simplified in a future PR.
While at it, replace the use of the "-std" compiler flag with the
Standard field from "go list -json".
4 years ago
|
|
|
continue
|
start using original action IDs (#251)
When we obfuscate a name, what we do is hash the name with the action ID
of the package that contains the name. To ensure that the hash changes
if the garble tool changes, we used the action ID of the obfuscated
build, which is different than the original action ID, as we include
garble's own content ID in "go tool compile -V=full" via -toolexec.
Let's call that the "obfuscated action ID". Remember that a content ID
is roughly the hash of a binary or object file, and an action ID
contains the hash of a package's source code plus the content IDs of its
dependencies.
This had the advantage that it did what we wanted. However, it had one
massive drawback: when we compile a package, we only have the obfuscated
action IDs of its dependencies. This is because one can't have the
content ID of dependent packages before they are built.
Usually, this is not a problem, because hashing a foreign name means it
comes from a dependency, where we already have the obfuscated action ID.
However, that's not always the case.
First, go:linkname directives can point to any symbol that ends up in
the binary, even if the package is not a dependency. So garble could
only support linkname targets belonging to dependencies. This is at the
root of why we could not obfuscate the runtime; it contains linkname
directives targeting the net package, for example, which depends on runtime.
Second, some other places did not have an easy access to obfuscated
action IDs, like transformAsm, which had to recover it from a temporary
file stored by transformCompile.
Plus, this was all pretty expensive, as each toolexec sub-process had to
make repeated calls to buildidOf with the object files of dependencies.
We even had to use extra calls to "go list" in the case of indirect
dependencies, as their export files do not appear in importcfg files.
All in all, the old method was complex and expensive. A better mechanism
is to use the original action IDs directly, as listed by "go list"
without garble in the picture.
This would mean that the hashing does not change if garble changes,
meaning weaker obfuscation. To regain that property, we define the
"garble action ID", which is just the original action ID hashed together
with garble's own content ID.
This is practically the same as the obfuscated build ID we used before,
but since it doesn't go through "go tool compile -V=full" and the
obfuscated build itself, we can work out *all* the garble action IDs
upfront, before the obfuscated build even starts.
This fixes all of our problems. Now we know all garble build IDs
upfront, so a bunch of hacks can be entirely removed. Plus, since we
know them upfront, we can also cache them and avoid repeated calls to
"go tool buildid".
While at it, make use of the new BuildID field in Go 1.16's "list -json
-export". This avoids the vast majority of "go tool buildid" calls, as
the only ones that remain are 2 on the garble binary itself.
The numbers for Go 1.16 look very good:
name old time/op new time/op delta
Build-8 146ms ± 4% 101ms ± 1% -31.01% (p=0.002 n=6+6)
name old bin-B new bin-B delta
Build-8 6.61M ± 0% 6.60M ± 0% -0.09% (p=0.002 n=6+6)
name old sys-time/op new sys-time/op delta
Build-8 321ms ± 7% 202ms ± 6% -37.11% (p=0.002 n=6+6)
name old user-time/op new user-time/op delta
Build-8 538ms ± 4% 414ms ± 4% -23.12% (p=0.002 n=6+6)
4 years ago
|
|
|
}
|
|
|
|
addHashedWithPackage := func(str string) {
|
obfuscate all names used in reflection
Go code can retrieve and use field and method names via the `reflect` package.
For that reason, historically we did not obfuscate names of fields and methods
underneath types that we detected as used for reflection, via e.g. `reflect.TypeOf`.
However, that caused a number of issues. Since we obfuscate and build one package
at a time, we could only detect when types were used for reflection in their own package
or in upstream packages. Use of reflection in downstream packages would be detected
too late, causing one package to obfuscate the names and the other not to, leading to a build failure.
A different approach is implemented here. All names are obfuscated now, but we collect
those types used for reflection, and at the end of a build in `package main`,
we inject a function into the runtime's `internal/abi` package to reverse the obfuscation
for those names which can be used for reflection.
This does mean that the obfuscation for these names is very weak, as the binary
contains a one-to-one mapping to their original names, but they cannot be obfuscated
without breaking too many Go packages out in the wild. There is also some amount
of overhead in `internal/abi` due to this, but we aim to make the overhead insignificant.
Fixes #884, #799, #817, #881, #858, #843, #842
Closes #406
4 months ago
|
|
|
replaces = append(replaces, hashWithPackage(lpkg, str), str)
|
reverse: support unexported names and package paths (#233)
Unexported names are a bit tricky, since they are not listed in the
export data file. Perhaps unsurprisingly, it's only meant to expose
exported objects.
One option would be to go back to adding an extra header to the export
data file, containing the unexported methods in a map[string]T or
[]string. However, we have an easier route: just parse the Go files and
look up the names directly.
This does mean that we parse the Go files every time "reverse" runs,
even if the build cache is warm, but that should not be an issue.
Parsing Go files without any typechecking is very cheap compared to
everything else we do. Plus, we save having to load go/types information
from the build cache, or having to load extra headers from export files.
It should be noted that the obfuscation process does need type
information, mainly to be careful about which names can be obfuscated
and how they should be obfuscated. Neither is a worry here; all names
belong to a single package, and it doesn't matter if some aren't
actually obfuscated, since the string replacements would simply never
trigger in practice.
The test includes an unexported func, to test the new feature. We also
start reversing the obfuscation of import paths. Now, the test's reverse
output is as follows:
goroutine 1 [running]:
runtime/debug.Stack(0x??, 0x??, 0x??)
runtime/debug/stack.go:24 +0x??
test/main/lib.ExportedLibFunc(0x??, 0x??, 0x??, 0x??)
p.go:6 +0x??
main.unexportedMainFunc(...)
C.go:2
main.main()
z.go:3 +0x??
The only major missing feature is positions and filenames. A follow-up
PR will take care of those.
Updates #5.
4 years ago
|
|
|
}
|
|
|
|
|
|
|
|
// Package paths are obfuscated, too.
|
|
|
|
addHashedWithPackage(lpkg.ImportPath)
|
|
|
|
|
obfuscate all names used in reflection
Go code can retrieve and use field and method names via the `reflect` package.
For that reason, historically we did not obfuscate names of fields and methods
underneath types that we detected as used for reflection, via e.g. `reflect.TypeOf`.
However, that caused a number of issues. Since we obfuscate and build one package
at a time, we could only detect when types were used for reflection in their own package
or in upstream packages. Use of reflection in downstream packages would be detected
too late, causing one package to obfuscate the names and the other not to, leading to a build failure.
A different approach is implemented here. All names are obfuscated now, but we collect
those types used for reflection, and at the end of a build in `package main`,
we inject a function into the runtime's `internal/abi` package to reverse the obfuscation
for those names which can be used for reflection.
This does mean that the obfuscation for these names is very weak, as the binary
contains a one-to-one mapping to their original names, but they cannot be obfuscated
without breaking too many Go packages out in the wild. There is also some amount
of overhead in `internal/abi` due to this, but we aim to make the overhead insignificant.
Fixes #884, #799, #817, #881, #858, #843, #842
Closes #406
4 months ago
|
|
|
files, err := parseFiles(lpkg, lpkg.Dir, lpkg.CompiledGoFiles)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
}
|
|
|
|
origImporter := importerForPkg(lpkg)
|
|
|
|
_, info, err := typecheck(lpkg.ImportPath, files, origImporter)
|
|
|
|
if err != nil {
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
return err
|
|
|
|
}
|
|
|
|
fieldToStruct := computeFieldToStruct(info)
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
for i, file := range files {
|
|
|
|
goFile := lpkg.CompiledGoFiles[i]
|
|
|
|
ast.Inspect(file, func(node ast.Node) bool {
|
|
|
|
switch node := node.(type) {
|
|
|
|
|
|
|
|
// Replace names.
|
|
|
|
// TODO: do var names ever show up in output?
|
|
|
|
case *ast.FuncDecl:
|
|
|
|
addHashedWithPackage(node.Name.Name)
|
|
|
|
case *ast.TypeSpec:
|
|
|
|
addHashedWithPackage(node.Name.Name)
|
|
|
|
case *ast.Field:
|
|
|
|
for _, name := range node.Names {
|
|
|
|
obj, _ := info.ObjectOf(name).(*types.Var)
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
if obj == nil || !obj.IsField() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
strct := fieldToStruct[obj]
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
if strct == nil {
|
|
|
|
panic("could not find struct for field " + name.Name)
|
hash field names equally in all packages
Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.
The spec defines two identical struct types as:
Two struct types are identical if they have the same sequence of
fields, and if corresponding fields have the same names, and
identical types, and identical tags. Non-exported field names
from different packages are always different.
Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.
To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.
Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.
This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.
Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.
Fixes #310.
4 years ago
|
|
|
}
|
|
|
|
replaces = append(replaces, hashWithStruct(strct, obj), name.Name)
|
|
|
|
}
|
|
|
|
|
|
|
|
case *ast.CallExpr:
|
|
|
|
// Reverse position information of call sites.
|
|
|
|
pos := fset.Position(node.Pos())
|
|
|
|
origPos := fmt.Sprintf("%s:%d", goFile, pos.Offset)
|
obfuscate all names used in reflection
Go code can retrieve and use field and method names via the `reflect` package.
For that reason, historically we did not obfuscate names of fields and methods
underneath types that we detected as used for reflection, via e.g. `reflect.TypeOf`.
However, that caused a number of issues. Since we obfuscate and build one package
at a time, we could only detect when types were used for reflection in their own package
or in upstream packages. Use of reflection in downstream packages would be detected
too late, causing one package to obfuscate the names and the other not to, leading to a build failure.
A different approach is implemented here. All names are obfuscated now, but we collect
those types used for reflection, and at the end of a build in `package main`,
we inject a function into the runtime's `internal/abi` package to reverse the obfuscation
for those names which can be used for reflection.
This does mean that the obfuscation for these names is very weak, as the binary
contains a one-to-one mapping to their original names, but they cannot be obfuscated
without breaking too many Go packages out in the wild. There is also some amount
of overhead in `internal/abi` due to this, but we aim to make the overhead insignificant.
Fixes #884, #799, #817, #881, #858, #843, #842
Closes #406
4 months ago
|
|
|
newFilename := hashWithPackage(lpkg, origPos) + ".go"
|
|
|
|
|
|
|
|
// Do "obfuscated.go:1", corresponding to the call site's line.
|
|
|
|
// Most common in stack traces.
|
|
|
|
replaces = append(replaces,
|
|
|
|
newFilename+":1",
|
|
|
|
fmt.Sprintf("%s/%s:%d", lpkg.ImportPath, goFile, pos.Line),
|
|
|
|
)
|
|
|
|
|
|
|
|
// Do "obfuscated.go" as a fallback.
|
|
|
|
// Most useful in build errors in obfuscated code,
|
|
|
|
// since those might land on any line.
|
|
|
|
// Any ":N" line number will end up being useless,
|
|
|
|
// but at least the filename will be correct.
|
|
|
|
replaces = append(replaces,
|
|
|
|
newFilename,
|
|
|
|
fmt.Sprintf("%s/%s", lpkg.ImportPath, goFile),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
repl := strings.NewReplacer(replaces...)
|
|
|
|
|
|
|
|
if len(args) == 0 {
|
|
|
|
modified, err := reverseContent(os.Stdout, os.Stdin, repl)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !modified {
|
|
|
|
return errJustExit(1)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// TODO: cover this code in the tests too
|
|
|
|
anyModified := false
|
|
|
|
for _, path := range args {
|
|
|
|
f, err := os.Open(path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
modified, err := reverseContent(os.Stdout, f, repl)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
anyModified = anyModified || modified
|
|
|
|
f.Close() // since we're in a loop
|
|
|
|
}
|
|
|
|
if !anyModified {
|
|
|
|
return errJustExit(1)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func reverseContent(w io.Writer, r io.Reader, repl *strings.Replacer) (bool, error) {
|
|
|
|
// Read line by line.
|
|
|
|
// Reading the entire content at once wouldn't be interactive,
|
|
|
|
// nor would it support large files well.
|
|
|
|
// Reading entire lines ensures we don't cut words in half.
|
|
|
|
// We use bufio.Reader instead of bufio.Scanner,
|
|
|
|
// to also obtain the newline characters themselves.
|
|
|
|
br := bufio.NewReader(r)
|
|
|
|
modified := false
|
|
|
|
for {
|
|
|
|
// Note that ReadString can return a line as well as an error if
|
|
|
|
// we hit EOF without a newline.
|
|
|
|
// In that case, we still want to process the string.
|
|
|
|
line, readErr := br.ReadString('\n')
|
|
|
|
|
|
|
|
newLine := repl.Replace(line)
|
|
|
|
if newLine != line {
|
|
|
|
modified = true
|
|
|
|
}
|
|
|
|
if _, err := io.WriteString(w, newLine); err != nil {
|
|
|
|
return modified, err
|
|
|
|
}
|
|
|
|
if readErr == io.EOF {
|
|
|
|
return modified, nil
|
|
|
|
}
|
|
|
|
if readErr != nil {
|
|
|
|
return modified, readErr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|