|
|
|
// Copyright (c) 2020, The Garble Authors.
|
|
|
|
// See LICENSE for licensing information.
|
|
|
|
|
|
|
|
package literals
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"go/ast"
|
|
|
|
"go/constant"
|
|
|
|
"go/token"
|
|
|
|
"go/types"
|
|
|
|
mathrand "math/rand"
|
|
|
|
|
|
|
|
"golang.org/x/tools/go/ast/astutil"
|
|
|
|
ah "mvdan.cc/garble/internal/asthelper"
|
|
|
|
)
|
|
|
|
|
|
|
|
// MinSize is the lower bound limit, of the size of string-like literals
|
|
|
|
// which we will obfuscate. This is needed in order for binary size to stay relatively
|
|
|
|
// moderate, this also decreases the likelihood for performance slowdowns.
|
|
|
|
const MinSize = 8
|
|
|
|
|
|
|
|
// maxSize is the upper limit of the size of string-like literals
|
|
|
|
// which we will obfuscate with any of the available obfuscators.
|
|
|
|
// Beyond that we apply only a subset of obfuscators which are guaranteed to run efficiently.
|
|
|
|
const maxSize = 2 << 10 // KiB
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
|
|
|
|
// Obfuscate replaces literals with obfuscated anonymous functions.
|
|
|
|
func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkStrings map[*types.Var]string) *ast.File {
|
|
|
|
obfRand := newObfRand(rand, file)
|
|
|
|
pre := func(cursor *astutil.Cursor) bool {
|
avoid obfuscating literals set via -ldflags=-X
The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.
The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.
Both of these features work fine separately,
but when intersecting, they break. For example, given:
var myVar = "original"
[...]
-ldflags=-X=main.myVar=replaced
The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:
var myVar = "replaced"
func init() { myVar = func() string { ... } }
Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.
We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.
Fixes #323.
3 years ago
|
|
|
switch node := cursor.Node().(type) {
|
|
|
|
case *ast.GenDecl:
|
|
|
|
// constants are obfuscated by replacing all references with the obfuscated value
|
avoid obfuscating literals set via -ldflags=-X
The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.
The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.
Both of these features work fine separately,
but when intersecting, they break. For example, given:
var myVar = "original"
[...]
-ldflags=-X=main.myVar=replaced
The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:
var myVar = "replaced"
func init() { myVar = func() string { ... } }
Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.
We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.
Fixes #323.
3 years ago
|
|
|
if node.Tok == token.CONST {
|
|
|
|
return false
|
|
|
|
}
|
avoid obfuscating literals set via -ldflags=-X
The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.
The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.
Both of these features work fine separately,
but when intersecting, they break. For example, given:
var myVar = "original"
[...]
-ldflags=-X=main.myVar=replaced
The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:
var myVar = "replaced"
func init() { myVar = func() string { ... } }
Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.
We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.
Fixes #323.
3 years ago
|
|
|
case *ast.ValueSpec:
|
|
|
|
for _, name := range node.Names {
|
slight simplifications and alloc reductions
Reuse a buffer and a map across loop iterations, because we can.
Make recordTypeDone only track named types, as that is enough to detect
type cycles. Without named types, there can be no cycles.
These two reduce allocs by a fraction of a percent:
name old time/op new time/op delta
Build-16 10.4s ± 2% 10.4s ± 1% ~ (p=0.739 n=10+10)
name old bin-B new bin-B delta
Build-16 5.51M ± 0% 5.51M ± 0% ~ (all equal)
name old cached-time/op new cached-time/op delta
Build-16 391ms ± 9% 407ms ± 7% ~ (p=0.095 n=10+9)
name old mallocs/op new mallocs/op delta
Build-16 34.5M ± 0% 34.4M ± 0% -0.12% (p=0.000 n=10+10)
name old sys-time/op new sys-time/op delta
Build-16 5.87s ± 5% 5.82s ± 5% ~ (p=0.182 n=10+9)
It doesn't seem like much, but remember that these stats are for the
entire set of processes, where garble only accounts for about 10% of the
total wall time when compared to the compiler or linker. So a ~0.1%
decrease globally is still significant.
linkerVariableStrings is also indexed by *types.Var rather than types.Object,
since -ldflags=-X only supports setting the string value of variables.
This shouldn't make a significant difference in terms of allocs,
but at least the map is less prone to confusion with other object types.
To ensure the new code doesn't trip up on non-variables, we add test cases.
Finally, for the sake of clarity, index into the types.Info maps like
Defs and Uses rather than calling ObjectOf if we know whether the
identifier we have is a definition of a name or the use of a defined name.
This isn't better in terms of performance, as ObjectOf is a tiny method,
but just like with linkerVariableStrings before, the new code is clearer.
3 years ago
|
|
|
obj := info.Defs[name].(*types.Var)
|
avoid obfuscating literals set via -ldflags=-X
The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.
The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.
Both of these features work fine separately,
but when intersecting, they break. For example, given:
var myVar = "original"
[...]
-ldflags=-X=main.myVar=replaced
The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:
var myVar = "replaced"
func init() { myVar = func() string { ... } }
Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.
We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.
Fixes #323.
3 years ago
|
|
|
if _, e := linkStrings[obj]; e {
|
|
|
|
// Skip this entire ValueSpec to not break -ldflags=-X.
|
|
|
|
// TODO: support obfuscating those injected strings, too.
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
post := func(cursor *astutil.Cursor) bool {
|
|
|
|
node, ok := cursor.Node().(ast.Expr)
|
|
|
|
if !ok {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
typeAndValue := info.Types[node]
|
|
|
|
if !typeAndValue.IsValue() {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
if typeAndValue.Type == types.Typ[types.String] && typeAndValue.Value != nil {
|
|
|
|
value := constant.StringVal(typeAndValue.Value)
|
|
|
|
if len(value) < MinSize {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
avoid using math/rand's global funcs like Seed and Intn
Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:
Deprecated: Programs that call Seed and then expect a specific sequence
of results from the global random source (using functions such as Int)
can be broken when a dependency changes how much it consumes from the
global random source. To avoid such breakages, programs that need a
specific result sequence should use NewRand(NewSource(seed)) to obtain a
random generator that other packages cannot access.
Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.
However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.
Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.
Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.
2 years ago
|
|
|
cursor.Replace(withPos(obfuscateString(obfRand, value), node.Pos()))
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
switch node := node.(type) {
|
|
|
|
case *ast.UnaryExpr:
|
|
|
|
// Account for the possibility of address operators like
|
|
|
|
// &[]byte used inline with function arguments.
|
|
|
|
//
|
|
|
|
// See issue #520.
|
|
|
|
|
|
|
|
if node.Op != token.AND {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
if child, ok := node.X.(*ast.CompositeLit); ok {
|
avoid using math/rand's global funcs like Seed and Intn
Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:
Deprecated: Programs that call Seed and then expect a specific sequence
of results from the global random source (using functions such as Int)
can be broken when a dependency changes how much it consumes from the
global random source. To avoid such breakages, programs that need a
specific result sequence should use NewRand(NewSource(seed)) to obtain a
random generator that other packages cannot access.
Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.
However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.
Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.
Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.
2 years ago
|
|
|
newnode := handleCompositeLiteral(obfRand, true, child, info)
|
|
|
|
if newnode != nil {
|
|
|
|
cursor.Replace(newnode)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
case *ast.CompositeLit:
|
|
|
|
// We replaced the &[]byte{...} case above. Here we account for the
|
|
|
|
// standard []byte{...} or [4]byte{...} value form.
|
|
|
|
//
|
|
|
|
// We need two separate calls to cursor.Replace, as it only supports
|
|
|
|
// replacing the node we're currently visiting, and the pointer variant
|
|
|
|
// requires us to move the ampersand operator.
|
|
|
|
|
|
|
|
parent, ok := cursor.Parent().(*ast.UnaryExpr)
|
|
|
|
if ok && parent.Op == token.AND {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
avoid using math/rand's global funcs like Seed and Intn
Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:
Deprecated: Programs that call Seed and then expect a specific sequence
of results from the global random source (using functions such as Int)
can be broken when a dependency changes how much it consumes from the
global random source. To avoid such breakages, programs that need a
specific result sequence should use NewRand(NewSource(seed)) to obtain a
random generator that other packages cannot access.
Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.
However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.
Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.
Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.
2 years ago
|
|
|
newnode := handleCompositeLiteral(obfRand, false, node, info)
|
|
|
|
if newnode != nil {
|
|
|
|
cursor.Replace(newnode)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return astutil.Apply(file, pre, post).(*ast.File)
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleCompositeLiteral checks if the input node is []byte or [...]byte and
|
|
|
|
// calls the appropriate obfuscation method, returning a new node that should
|
|
|
|
// be used to replace it.
|
|
|
|
//
|
|
|
|
// If the input node cannot be obfuscated nil is returned.
|
|
|
|
func handleCompositeLiteral(obfRand *obfRand, isPointer bool, node *ast.CompositeLit, info *types.Info) ast.Node {
|
|
|
|
if len(node.Elts) < MinSize {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
byteType := types.Universe.Lookup("byte").Type()
|
|
|
|
|
|
|
|
var arrayLen int64
|
|
|
|
switch y := info.TypeOf(node.Type).(type) {
|
|
|
|
case *types.Array:
|
|
|
|
if y.Elem() != byteType {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
arrayLen = y.Len()
|
|
|
|
|
|
|
|
case *types.Slice:
|
|
|
|
if y.Elem() != byteType {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
data := make([]byte, 0, len(node.Elts))
|
|
|
|
|
|
|
|
for _, el := range node.Elts {
|
|
|
|
elType := info.Types[el]
|
|
|
|
|
|
|
|
if elType.Value == nil || elType.Value.Kind() != constant.Int {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
value, ok := constant.Uint64Val(elType.Value)
|
|
|
|
if !ok {
|
|
|
|
panic(fmt.Sprintf("cannot parse byte value: %v", elType.Value))
|
|
|
|
}
|
|
|
|
|
|
|
|
data = append(data, byte(value))
|
|
|
|
}
|
|
|
|
|
|
|
|
if arrayLen > 0 {
|
avoid using math/rand's global funcs like Seed and Intn
Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:
Deprecated: Programs that call Seed and then expect a specific sequence
of results from the global random source (using functions such as Int)
can be broken when a dependency changes how much it consumes from the
global random source. To avoid such breakages, programs that need a
specific result sequence should use NewRand(NewSource(seed)) to obtain a
random generator that other packages cannot access.
Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.
However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.
Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.
Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.
2 years ago
|
|
|
return withPos(obfuscateByteArray(obfRand, isPointer, data, arrayLen), node.Pos())
|
|
|
|
}
|
|
|
|
|
avoid using math/rand's global funcs like Seed and Intn
Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:
Deprecated: Programs that call Seed and then expect a specific sequence
of results from the global random source (using functions such as Int)
can be broken when a dependency changes how much it consumes from the
global random source. To avoid such breakages, programs that need a
specific result sequence should use NewRand(NewSource(seed)) to obtain a
random generator that other packages cannot access.
Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.
However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.
Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.
Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.
2 years ago
|
|
|
return withPos(obfuscateByteSlice(obfRand, isPointer, data), node.Pos())
|
|
|
|
}
|
|
|
|
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
// withPos sets any token.Pos fields under node which affect printing to pos.
|
|
|
|
// Note that we can't set all token.Pos fields, since some affect the semantics.
|
|
|
|
//
|
|
|
|
// This function is useful so that go/printer doesn't try to estimate position
|
|
|
|
// offsets, which can end up in printing comment directives too early.
|
|
|
|
//
|
|
|
|
// We don't set any "end" or middle positions, because they seem irrelevant.
|
|
|
|
func withPos(node ast.Node, pos token.Pos) ast.Node {
|
|
|
|
for node := range ast.Preorder(node) {
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
switch node := node.(type) {
|
|
|
|
case *ast.BasicLit:
|
|
|
|
node.ValuePos = pos
|
|
|
|
case *ast.Ident:
|
|
|
|
node.NamePos = pos
|
|
|
|
case *ast.CompositeLit:
|
|
|
|
node.Lbrace = pos
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
node.Rbrace = pos
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
case *ast.ArrayType:
|
|
|
|
node.Lbrack = pos
|
|
|
|
case *ast.FuncType:
|
|
|
|
node.Func = pos
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
case *ast.BinaryExpr:
|
|
|
|
node.OpPos = pos
|
|
|
|
case *ast.StarExpr:
|
|
|
|
node.Star = pos
|
|
|
|
case *ast.CallExpr:
|
|
|
|
node.Lparen = pos
|
|
|
|
node.Rparen = pos
|
|
|
|
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
case *ast.GenDecl:
|
|
|
|
node.TokPos = pos
|
|
|
|
case *ast.ReturnStmt:
|
|
|
|
node.Return = pos
|
|
|
|
case *ast.ForStmt:
|
|
|
|
node.For = pos
|
|
|
|
case *ast.RangeStmt:
|
|
|
|
node.For = pos
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
case *ast.BranchStmt:
|
|
|
|
node.TokPos = pos
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
}
|
|
|
|
}
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
return node
|
|
|
|
}
|
|
|
|
|
|
|
|
func obfuscateString(obfRand *obfRand, data string) *ast.CallExpr {
|
|
|
|
obf := getNextObfuscator(obfRand, len(data))
|
|
|
|
block := obf.obfuscate(obfRand.Rand, []byte(data))
|
|
|
|
|
|
|
|
block.List = append(block.List, ah.ReturnStmt(ah.CallExpr(ast.NewIdent("string"), ast.NewIdent("data"))))
|
|
|
|
|
|
|
|
return ah.LambdaCall(ast.NewIdent("string"), block)
|
|
|
|
}
|
|
|
|
|
|
|
|
func obfuscateByteSlice(obfRand *obfRand, isPointer bool, data []byte) *ast.CallExpr {
|
|
|
|
obf := getNextObfuscator(obfRand, len(data))
|
|
|
|
block := obf.obfuscate(obfRand.Rand, data)
|
|
|
|
|
|
|
|
if isPointer {
|
|
|
|
block.List = append(block.List, ah.ReturnStmt(&ast.UnaryExpr{
|
|
|
|
Op: token.AND,
|
|
|
|
X: ast.NewIdent("data"),
|
|
|
|
}))
|
|
|
|
return ah.LambdaCall(&ast.StarExpr{
|
|
|
|
X: &ast.ArrayType{Elt: ast.NewIdent("byte")},
|
|
|
|
}, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
block.List = append(block.List, ah.ReturnStmt(ast.NewIdent("data")))
|
|
|
|
return ah.LambdaCall(&ast.ArrayType{Elt: ast.NewIdent("byte")}, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length int64) *ast.CallExpr {
|
|
|
|
obf := getNextObfuscator(obfRand, len(data))
|
|
|
|
block := obf.obfuscate(obfRand.Rand, data)
|
|
|
|
|
|
|
|
arrayType := &ast.ArrayType{
|
|
|
|
Len: ah.IntLit(int(length)),
|
|
|
|
Elt: ast.NewIdent("byte"),
|
|
|
|
}
|
|
|
|
|
|
|
|
sliceToArray := []ast.Stmt{
|
|
|
|
&ast.DeclStmt{
|
|
|
|
Decl: &ast.GenDecl{
|
|
|
|
Tok: token.VAR,
|
|
|
|
Specs: []ast.Spec{&ast.ValueSpec{
|
|
|
|
Names: []*ast.Ident{ast.NewIdent("newdata")},
|
|
|
|
Type: arrayType,
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
&ast.RangeStmt{
|
|
|
|
Key: ast.NewIdent("i"),
|
|
|
|
Tok: token.DEFINE,
|
|
|
|
X: ast.NewIdent("data"),
|
|
|
|
Body: &ast.BlockStmt{List: []ast.Stmt{
|
|
|
|
&ast.AssignStmt{
|
|
|
|
Lhs: []ast.Expr{ah.IndexExpr("newdata", ast.NewIdent("i"))},
|
|
|
|
Tok: token.ASSIGN,
|
|
|
|
Rhs: []ast.Expr{ah.IndexExpr("data", ast.NewIdent("i"))},
|
|
|
|
},
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
var retexpr ast.Expr = ast.NewIdent("newdata")
|
|
|
|
if isPointer {
|
|
|
|
retexpr = &ast.UnaryExpr{X: retexpr, Op: token.AND}
|
|
|
|
}
|
|
|
|
|
|
|
|
sliceToArray = append(sliceToArray, ah.ReturnStmt(retexpr))
|
|
|
|
block.List = append(block.List, sliceToArray...)
|
|
|
|
|
|
|
|
if isPointer {
|
|
|
|
return ah.LambdaCall(&ast.StarExpr{X: arrayType}, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ah.LambdaCall(arrayType, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
func getNextObfuscator(obfRand *obfRand, size int) obfuscator {
|
|
|
|
if size <= maxSize {
|
|
|
|
return obfRand.nextObfuscator()
|
|
|
|
} else {
|
|
|
|
return obfRand.nextLinearTimeObfuscator()
|
|
|
|
}
|
|
|
|
}
|