do not try to obfuscate huge literals (#204)

It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.

I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.

The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.

This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.

We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.

As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.

And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.

I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.

Fixes #178.
pull/205/head
Daniel Martí 4 years ago committed by GitHub
parent 249501b5e9
commit ba19a1d49c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -15,6 +15,19 @@ import (
ah "mvdan.cc/garble/internal/asthelper"
)
// maxSizeBytes is the limit, in bytes, of the size of string-like literals
// which we will obfuscate. This is important, because otherwise garble can take
// a very long time to obfuscate huge code-generated literals, such as those
// corresponding to large assets.
//
// Note that this is the size of the literal in source code. For example, "\xab"
// counts as four bytes.
//
// If someone truly wants to obfuscate those, they should do that when they
// generate the code, not at build time. Plus, with Go 1.16 that technique
// should largely stop being used.
const maxSizeBytes = 2 << 10 // KiB
func randObfuscator() obfuscator {
randPos := mathrand.Intn(len(obfuscators))
return obfuscators[randPos]
@ -76,6 +89,9 @@ func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, blackli
if y.Elem() != byteType {
return true
}
if y.Len() > maxSizeBytes {
return true
}
data := make([]byte, y.Len())
@ -98,6 +114,9 @@ func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, blackli
if y.Elem() != byteType {
return true
}
if len(x.Elts) > maxSizeBytes {
return true
}
data := make([]byte, 0, len(x.Elts))
@ -125,23 +144,26 @@ func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, blackli
return true // we don't want to obfuscate imports etc.
}
switch x.Kind {
case token.STRING:
typeInfo := info.TypeOf(x)
if typeInfo != types.Typ[types.String] && typeInfo != types.Typ[types.UntypedString] {
return true
}
value, err := strconv.Unquote(x.Value)
if err != nil {
panic(fmt.Sprintf("cannot unquote string: %v", err))
}
if len(value) == 0 {
return true
}
if x.Kind != token.STRING {
return true
}
if len(x.Value) > maxSizeBytes {
return true
}
typeInfo := info.TypeOf(x)
if typeInfo != types.Typ[types.String] && typeInfo != types.Typ[types.UntypedString] {
return true
}
value, err := strconv.Unquote(x.Value)
if err != nil {
panic(fmt.Sprintf("cannot unquote string: %v", err))
}
cursor.Replace(obfuscateString(value))
if len(value) == 0 {
return true
}
cursor.Replace(obfuscateString(value))
}
return true

@ -262,8 +262,8 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) {
}
}
func generateStringLit() *ast.BasicLit {
buffer := make([]byte, 1+mathrand.Intn(255))
func generateStringLit(size int) *ast.BasicLit {
buffer := make([]byte, size)
_, err := mathrand.Read(buffer)
if err != nil {
panic(err)
@ -276,34 +276,41 @@ func generateLiterals(ts *testscript.TestScript, neg bool, args []string) {
if neg {
ts.Fatalf("unsupported: ! generate-literals")
}
if len(args) != 3 {
ts.Fatalf("usage: generate-literals file literalCount funcName")
if len(args) != 1 {
ts.Fatalf("usage: generate-literals file")
}
codePath, funcName := args[0], args[2]
literalCount, err := strconv.Atoi(args[1])
if err != nil {
ts.Fatalf("%v", err)
}
codePath := args[0]
// Add 100 randomly small literals.
var statements []ast.Stmt
for i := 0; i < literalCount; i++ {
literal := generateStringLit()
statements = append(statements, ah.ExprStmt(ah.CallExpr(ast.NewIdent("println"), literal)))
for i := 0; i < 100; i++ {
literal := generateStringLit(1 + mathrand.Intn(255))
statements = append(statements, &ast.AssignStmt{
Lhs: []ast.Expr{ast.NewIdent("_")},
Tok: token.ASSIGN,
Rhs: []ast.Expr{literal},
})
}
// Add 5 huge literals, to make sure we don't try to obfuscate them.
// 5 * 128KiB is large enough that it would take a very, very long time
// to obfuscate those literals with our simple code.
for i := 0; i < 5; i++ {
literal := generateStringLit(128 << 10)
statements = append(statements, &ast.AssignStmt{
Lhs: []ast.Expr{ast.NewIdent("_")},
Tok: token.ASSIGN,
Rhs: []ast.Expr{literal},
})
}
file := &ast.File{
Name: ast.NewIdent("main"),
Decls: []ast.Decl{
&ast.FuncDecl{
Name: ast.NewIdent(funcName),
Type: &ast.FuncType{
Params: &ast.FieldList{},
},
Body: ah.BlockStmt(statements...),
},
},
Decls: []ast.Decl{&ast.FuncDecl{
Name: ast.NewIdent("extraLiterals"),
Type: &ast.FuncType{Params: &ast.FieldList{}},
Body: ah.BlockStmt(statements...),
}},
}
codeFile := createFile(ts, codePath)

@ -21,8 +21,11 @@ exec ./main$exe
cmp stderr main.stderr
binsubstr main$exe 'Lorem' 'dolor' 'second assign' 'First Line' 'Second Line' 'map value' 'to obfuscate' 'also obfuscate' 'stringTypeField String'
# Generate and write random literals into a separate file
generate-literals extraLiterals.go 100 printExtraLiterals
# Generate and write random literals into a separate file.
# Some of them will be huge; assuming that we don't try to obfuscate them, the
# test should generally run in under a second. If this test hangs for over ten
# seconds, it means we're trying to obfuscate them.
generate-literals extra_literals.go
# Also check that the binary is different from previous builds.
rm main$exe
@ -35,19 +38,19 @@ cmp stderr main.stderr
# Check obfuscators
# Xor obfuscator. Detect a[i] = a[i] (^|-|+) b[i]
grep '^\s+\w+\[\w+\] = \w+\[\w+\] [\^\-+] \w+$' .obf-src/main/extraLiterals.go
grep '^\s+\w+\[\w+\] = \w+\[\w+\] [\^\-+] \w+$' .obf-src/main/extra_literals.go
# Swap obfuscator. Detect [...]byte|uint16|uint32|uint64{...}
grep '^\s+\w+ := \[\.{3}\](byte|uint16|uint32|uint64)\{[0-9\s,]+\}$' .obf-src/main/extraLiterals.go
grep '^\s+\w+ := \[\.{3}\](byte|uint16|uint32|uint64)\{[0-9\s,]+\}$' .obf-src/main/extra_literals.go
# Split obfuscator. Detect decryptKey ^= i * counter
grep '^\s+\w+ \^= \w+ \* \w+$' .obf-src/main/extraLiterals.go
grep '^\s+\w+ \^= \w+ \* \w+$' .obf-src/main/extra_literals.go
# XorShuffle obfuscator. Detect data = append(data, x (^|-|+) y...)
grep '^\s+\w+ = append\(\w+,(\s+\w+\[\d+\][\^\-+]\w+\[\d+\],?)+\)$' .obf-src/main/extraLiterals.go
grep '^\s+\w+ = append\(\w+,(\s+\w+\[\d+\][\^\-+]\w+\[\d+\],?)+\)$' .obf-src/main/extra_literals.go
# XorSeed obfuscator. Detect type decFunc func(byte) decFunc
grep '^\s+type \w+ func\(byte\) \w+$' .obf-src/main/extraLiterals.go
grep '^\s+type \w+ func\(byte\) \w+$' .obf-src/main/extra_literals.go
-- go.mod --
module test/main

Loading…
Cancel
Save