From b587d8c01a510ec2f40a01a974fcadbfa67e7142 Mon Sep 17 00:00:00 2001 From: Dominic Breuker Date: Sun, 30 Apr 2023 12:09:31 +0200 Subject: [PATCH] use the "simple" obfuscator for large literals Changes literal obfuscation such that literals of any size will be obfuscated, but beyond `maxSize` we only use the `simple` obfuscator. This one seems to apply AND, OR, or XOR operators byte-wise and should be safe to use, unlike some of the other obfuscators which are quadratic on the literal size or worse. The test for literals is changed a bit to verify that obfuscation is applied. The code written to the `extra_literals.go` file by the test helper now ensures that Go does not optimize the literals away when we build the binary. We also append a unique string to all literals so that we can test that an unobfuscated build contains this string while an obfuscated build does not. --- internal/literals/literals.go | 35 +++++++------ internal/literals/obfuscators.go | 16 +++++- main_test.go | 84 +++++++++++++++++++++++--------- testdata/script/literals.txtar | 6 +++ 4 files changed, 100 insertions(+), 41 deletions(-) diff --git a/internal/literals/literals.go b/internal/literals/literals.go index 0549bad..f5f2ae4 100644 --- a/internal/literals/literals.go +++ b/internal/literals/literals.go @@ -20,14 +20,9 @@ import ( // moderate, this also decreases the likelihood for performance slowdowns. const minSize = 8 -// maxSize is the upper bound limit, of the size of string-like literals -// which we will obfuscate. This is important, because otherwise garble can take -// a very long time to obfuscate huge code-generated literals, such as those -// corresponding to large assets. -// -// If someone truly wants to obfuscate those, they should do that when they -// generate the code, not at build time. Plus, with Go 1.16 that technique -// should largely stop being used. +// maxSize is the upper limit of the size of string-like literals +// which we will obfuscate with any of the available obfuscators. +// Beyond that we apply only a subset of obfuscators which are guaranteed to run efficiently. const maxSize = 2 << 10 // KiB // Obfuscate replaces literals with obfuscated anonymous functions. @@ -66,7 +61,7 @@ func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkString if typeAndValue.Type == types.Typ[types.String] && typeAndValue.Value != nil { value := constant.StringVal(typeAndValue.Value) - if len(value) < minSize || len(value) > maxSize { + if len(value) < minSize { return true } @@ -124,7 +119,7 @@ func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkString // // If the input node cannot be obfuscated nil is returned. func handleCompositeLiteral(obfRand *obfRand, isPointer bool, node *ast.CompositeLit, info *types.Info) ast.Node { - if len(node.Elts) < minSize || len(node.Elts) > maxSize { + if len(node.Elts) < minSize { return nil } @@ -218,8 +213,8 @@ func withPos(node ast.Node, pos token.Pos) ast.Node { } func obfuscateString(obfRand *obfRand, data string) *ast.CallExpr { - obfuscator := obfRand.nextObfuscator() - block := obfuscator.obfuscate(obfRand.Rand, []byte(data)) + obf := getNextObfuscator(obfRand, len(data)) + block := obf.obfuscate(obfRand.Rand, []byte(data)) block.List = append(block.List, ah.ReturnStmt(ah.CallExpr(ast.NewIdent("string"), ast.NewIdent("data")))) @@ -227,8 +222,8 @@ func obfuscateString(obfRand *obfRand, data string) *ast.CallExpr { } func obfuscateByteSlice(obfRand *obfRand, isPointer bool, data []byte) *ast.CallExpr { - obfuscator := obfRand.nextObfuscator() - block := obfuscator.obfuscate(obfRand.Rand, data) + obf := getNextObfuscator(obfRand, len(data)) + block := obf.obfuscate(obfRand.Rand, data) if isPointer { block.List = append(block.List, ah.ReturnStmt(&ast.UnaryExpr{ @@ -245,8 +240,8 @@ func obfuscateByteSlice(obfRand *obfRand, isPointer bool, data []byte) *ast.Call } func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length int64) *ast.CallExpr { - obfuscator := obfRand.nextObfuscator() - block := obfuscator.obfuscate(obfRand.Rand, data) + obf := getNextObfuscator(obfRand, len(data)) + block := obf.obfuscate(obfRand.Rand, data) arrayType := &ast.ArrayType{ Len: ah.IntLit(int(length)), @@ -291,3 +286,11 @@ func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length in return ah.LambdaCall(arrayType, block) } + +func getNextObfuscator(obfRand *obfRand, size int) obfuscator { + if size <= maxSize { + return obfRand.nextObfuscator() + } else { + return obfRand.nextLinearTimeObfuscator() + } +} diff --git a/internal/literals/obfuscators.go b/internal/literals/obfuscators.go index 228fe6c..10de7d8 100644 --- a/internal/literals/obfuscators.go +++ b/internal/literals/obfuscators.go @@ -16,15 +16,22 @@ type obfuscator interface { } var ( + simpleObfuscator = simple{} + // Obfuscators contains all types which implement the obfuscator Interface Obfuscators = []obfuscator{ - simple{}, + simpleObfuscator, swap{}, split{}, shuffle{}, seed{}, } + // LinearTimeObfuscators contains all types which implement the obfuscator Interface and can safely be used on large literals + LinearTimeObfuscators = []obfuscator{ + simpleObfuscator, + } + TestObfuscator string testPkgToObfuscatorMap map[string]obfuscator ) @@ -84,6 +91,13 @@ func (r *obfRand) nextObfuscator() obfuscator { return Obfuscators[r.Intn(len(Obfuscators))] } +func (r *obfRand) nextLinearTimeObfuscator() obfuscator { + if r.testObfuscator != nil { + return r.testObfuscator + } + return Obfuscators[r.Intn(len(LinearTimeObfuscators))] +} + func newObfRand(rand *mathrand.Rand, file *ast.File) *obfRand { testObf := testPkgToObfuscatorMap[file.Name.Name] return &obfRand{rand, testObf} diff --git a/main_test.go b/main_test.go index 002f39b..5300c7e 100644 --- a/main_test.go +++ b/main_test.go @@ -241,16 +241,23 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) { var testRand = mathrand.New(mathrand.NewSource(time.Now().UnixNano())) -func generateStringLit(size int) *ast.BasicLit { - buffer := make([]byte, size) +func generateStringLit(minSize int) *ast.BasicLit { + buffer := make([]byte, minSize) _, err := testRand.Read(buffer) if err != nil { panic(err) } - return ah.StringLit(string(buffer)) + return ah.StringLit(string(buffer) + "a_unique_string_that_is_part_of_all_extra_literals") } +// generateLiterals creates a new source code file with a few random literals inside. +// All literals contain the string "a_unique_string_that_is_part_of_all_extra_literals" +// so we can later check if they are all obfuscated by looking for this substring. +// The code is designed such that the Go compiler does not optimize away the literals, +// which would destroy the test. +// This is achieved by defining a global variable `var x = ""` and an `init` function +// which appends all literals to `x`. func generateLiterals(ts *testscript.TestScript, neg bool, args []string) { if neg { ts.Fatalf("unsupported: ! generate-literals") @@ -261,35 +268,64 @@ func generateLiterals(ts *testscript.TestScript, neg bool, args []string) { codePath := args[0] - // Add 100 randomly small literals. + // Global string variable to which which we append string literals: `var x = ""` + globalVar := &ast.GenDecl{ + Tok: token.VAR, + Specs: []ast.Spec{ + &ast.ValueSpec{ + Names: []*ast.Ident{ast.NewIdent("x")}, + Values: []ast.Expr{ + &ast.BasicLit{Kind: token.STRING, Value: `""`}, + }, + }, + }, + } + var statements []ast.Stmt + + // Assignments which append 100 random small literals to x: `x += "the_small_random_literal"` for i := 0; i < 100; i++ { - literal := generateStringLit(1 + testRand.Intn(255)) - statements = append(statements, &ast.AssignStmt{ - Lhs: []ast.Expr{ast.NewIdent("_")}, - Tok: token.ASSIGN, - Rhs: []ast.Expr{literal}, - }) - } - // Add 5 huge literals, to make sure we don't try to obfuscate them. + statements = append( + statements, + &ast.AssignStmt{ + Lhs: []ast.Expr{ast.NewIdent("x")}, + Tok: token.ADD_ASSIGN, + Rhs: []ast.Expr{generateStringLit(1 + testRand.Intn(255))}, + }, + ) + } + + // Assignments which append 5 random huge literals to x: `x += "the_huge_random_literal"` + // We add huge literals to make sure we obfuscate them fast. // 5 * 128KiB is large enough that it would take a very, very long time - // to obfuscate those literals with our simple code. + // to obfuscate those literals if too complex obfuscators are used. for i := 0; i < 5; i++ { - literal := generateStringLit(128 << 10) - statements = append(statements, &ast.AssignStmt{ - Lhs: []ast.Expr{ast.NewIdent("_")}, - Tok: token.ASSIGN, - Rhs: []ast.Expr{literal}, - }) + statements = append( + statements, + &ast.AssignStmt{ + Lhs: []ast.Expr{ast.NewIdent("x")}, + Tok: token.ADD_ASSIGN, + Rhs: []ast.Expr{generateStringLit(128 << 10)}, + }, + ) + } + + // An `init` function which includes all assignments from above + initFunc := &ast.FuncDecl{ + Name: &ast.Ident{ + Name: "init", + }, + Type: &ast.FuncType{}, + Body: ah.BlockStmt(statements...), } + // A file with the global string variable and init function file := &ast.File{ Name: ast.NewIdent("main"), - Decls: []ast.Decl{&ast.FuncDecl{ - Name: ast.NewIdent("extraLiterals"), - Type: &ast.FuncType{Params: &ast.FieldList{}}, - Body: ah.BlockStmt(statements...), - }}, + Decls: []ast.Decl{ + globalVar, + initFunc, + }, } codeFile := createFile(ts, codePath) diff --git a/testdata/script/literals.txtar b/testdata/script/literals.txtar index b4813c9..ff65467 100644 --- a/testdata/script/literals.txtar +++ b/testdata/script/literals.txtar @@ -25,9 +25,15 @@ binsubstr main$exe 'Lorem Ipsum' 'dolor sit amet' 'second assign' 'First Line' ' # seconds, it means we're trying to obfuscate them. generate-literals extra_literals.go +# ensure we find the extra literals in an unobfuscated build +go build +binsubstr main$exe 'a_unique_string_that_is_part_of_all_extra_literals' + +# ensure we don't find the extra literals in an obfuscated build garble -literals -debugdir=debug1 build exec ./main$exe cmp stderr main.stderr +! binsubstr main$exe 'a_unique_string_that_is_part_of_all_extra_literals' # Check obfuscators.