From ba19a1d49cde5a5fb82c30b4c335133f4627d69a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Thu, 14 Jan 2021 18:38:19 +0000 Subject: [PATCH] do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178. --- internal/literals/literals.go | 52 +++++++++++++++++++++++++---------- main_test.go | 51 +++++++++++++++++++--------------- testdata/scripts/literals.txt | 17 +++++++----- 3 files changed, 76 insertions(+), 44 deletions(-) diff --git a/internal/literals/literals.go b/internal/literals/literals.go index b1f793b..a44762b 100644 --- a/internal/literals/literals.go +++ b/internal/literals/literals.go @@ -15,6 +15,19 @@ import ( ah "mvdan.cc/garble/internal/asthelper" ) +// maxSizeBytes is the limit, in bytes, of the size of string-like literals +// which we will obfuscate. This is important, because otherwise garble can take +// a very long time to obfuscate huge code-generated literals, such as those +// corresponding to large assets. +// +// Note that this is the size of the literal in source code. For example, "\xab" +// counts as four bytes. +// +// If someone truly wants to obfuscate those, they should do that when they +// generate the code, not at build time. Plus, with Go 1.16 that technique +// should largely stop being used. +const maxSizeBytes = 2 << 10 // KiB + func randObfuscator() obfuscator { randPos := mathrand.Intn(len(obfuscators)) return obfuscators[randPos] @@ -76,6 +89,9 @@ func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, blackli if y.Elem() != byteType { return true } + if y.Len() > maxSizeBytes { + return true + } data := make([]byte, y.Len()) @@ -98,6 +114,9 @@ func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, blackli if y.Elem() != byteType { return true } + if len(x.Elts) > maxSizeBytes { + return true + } data := make([]byte, 0, len(x.Elts)) @@ -125,23 +144,26 @@ func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, blackli return true // we don't want to obfuscate imports etc. } - switch x.Kind { - case token.STRING: - typeInfo := info.TypeOf(x) - if typeInfo != types.Typ[types.String] && typeInfo != types.Typ[types.UntypedString] { - return true - } - value, err := strconv.Unquote(x.Value) - if err != nil { - panic(fmt.Sprintf("cannot unquote string: %v", err)) - } - - if len(value) == 0 { - return true - } + if x.Kind != token.STRING { + return true + } + if len(x.Value) > maxSizeBytes { + return true + } + typeInfo := info.TypeOf(x) + if typeInfo != types.Typ[types.String] && typeInfo != types.Typ[types.UntypedString] { + return true + } + value, err := strconv.Unquote(x.Value) + if err != nil { + panic(fmt.Sprintf("cannot unquote string: %v", err)) + } - cursor.Replace(obfuscateString(value)) + if len(value) == 0 { + return true } + + cursor.Replace(obfuscateString(value)) } return true diff --git a/main_test.go b/main_test.go index 3a70645..e190753 100644 --- a/main_test.go +++ b/main_test.go @@ -262,8 +262,8 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) { } } -func generateStringLit() *ast.BasicLit { - buffer := make([]byte, 1+mathrand.Intn(255)) +func generateStringLit(size int) *ast.BasicLit { + buffer := make([]byte, size) _, err := mathrand.Read(buffer) if err != nil { panic(err) @@ -276,34 +276,41 @@ func generateLiterals(ts *testscript.TestScript, neg bool, args []string) { if neg { ts.Fatalf("unsupported: ! generate-literals") } - if len(args) != 3 { - ts.Fatalf("usage: generate-literals file literalCount funcName") + if len(args) != 1 { + ts.Fatalf("usage: generate-literals file") } - codePath, funcName := args[0], args[2] - - literalCount, err := strconv.Atoi(args[1]) - if err != nil { - ts.Fatalf("%v", err) - } + codePath := args[0] + // Add 100 randomly small literals. var statements []ast.Stmt - for i := 0; i < literalCount; i++ { - literal := generateStringLit() - statements = append(statements, ah.ExprStmt(ah.CallExpr(ast.NewIdent("println"), literal))) + for i := 0; i < 100; i++ { + literal := generateStringLit(1 + mathrand.Intn(255)) + statements = append(statements, &ast.AssignStmt{ + Lhs: []ast.Expr{ast.NewIdent("_")}, + Tok: token.ASSIGN, + Rhs: []ast.Expr{literal}, + }) + } + // Add 5 huge literals, to make sure we don't try to obfuscate them. + // 5 * 128KiB is large enough that it would take a very, very long time + // to obfuscate those literals with our simple code. + for i := 0; i < 5; i++ { + literal := generateStringLit(128 << 10) + statements = append(statements, &ast.AssignStmt{ + Lhs: []ast.Expr{ast.NewIdent("_")}, + Tok: token.ASSIGN, + Rhs: []ast.Expr{literal}, + }) } file := &ast.File{ Name: ast.NewIdent("main"), - Decls: []ast.Decl{ - &ast.FuncDecl{ - Name: ast.NewIdent(funcName), - Type: &ast.FuncType{ - Params: &ast.FieldList{}, - }, - Body: ah.BlockStmt(statements...), - }, - }, + Decls: []ast.Decl{&ast.FuncDecl{ + Name: ast.NewIdent("extraLiterals"), + Type: &ast.FuncType{Params: &ast.FieldList{}}, + Body: ah.BlockStmt(statements...), + }}, } codeFile := createFile(ts, codePath) diff --git a/testdata/scripts/literals.txt b/testdata/scripts/literals.txt index 6a7d944..4d36ac4 100644 --- a/testdata/scripts/literals.txt +++ b/testdata/scripts/literals.txt @@ -21,8 +21,11 @@ exec ./main$exe cmp stderr main.stderr binsubstr main$exe 'Lorem' 'dolor' 'second assign' 'First Line' 'Second Line' 'map value' 'to obfuscate' 'also obfuscate' 'stringTypeField String' -# Generate and write random literals into a separate file -generate-literals extraLiterals.go 100 printExtraLiterals +# Generate and write random literals into a separate file. +# Some of them will be huge; assuming that we don't try to obfuscate them, the +# test should generally run in under a second. If this test hangs for over ten +# seconds, it means we're trying to obfuscate them. +generate-literals extra_literals.go # Also check that the binary is different from previous builds. rm main$exe @@ -35,19 +38,19 @@ cmp stderr main.stderr # Check obfuscators # Xor obfuscator. Detect a[i] = a[i] (^|-|+) b[i] -grep '^\s+\w+\[\w+\] = \w+\[\w+\] [\^\-+] \w+$' .obf-src/main/extraLiterals.go +grep '^\s+\w+\[\w+\] = \w+\[\w+\] [\^\-+] \w+$' .obf-src/main/extra_literals.go # Swap obfuscator. Detect [...]byte|uint16|uint32|uint64{...} -grep '^\s+\w+ := \[\.{3}\](byte|uint16|uint32|uint64)\{[0-9\s,]+\}$' .obf-src/main/extraLiterals.go +grep '^\s+\w+ := \[\.{3}\](byte|uint16|uint32|uint64)\{[0-9\s,]+\}$' .obf-src/main/extra_literals.go # Split obfuscator. Detect decryptKey ^= i * counter -grep '^\s+\w+ \^= \w+ \* \w+$' .obf-src/main/extraLiterals.go +grep '^\s+\w+ \^= \w+ \* \w+$' .obf-src/main/extra_literals.go # XorShuffle obfuscator. Detect data = append(data, x (^|-|+) y...) -grep '^\s+\w+ = append\(\w+,(\s+\w+\[\d+\][\^\-+]\w+\[\d+\],?)+\)$' .obf-src/main/extraLiterals.go +grep '^\s+\w+ = append\(\w+,(\s+\w+\[\d+\][\^\-+]\w+\[\d+\],?)+\)$' .obf-src/main/extra_literals.go # XorSeed obfuscator. Detect type decFunc func(byte) decFunc -grep '^\s+type \w+ func\(byte\) \w+$' .obf-src/main/extraLiterals.go +grep '^\s+type \w+ func\(byte\) \w+$' .obf-src/main/extra_literals.go -- go.mod -- module test/main