From b587d8c01a510ec2f40a01a974fcadbfa67e7142 Mon Sep 17 00:00:00 2001
From: Dominic Breuker <dominic.breuker@protonmail.com>
Date: Sun, 30 Apr 2023 12:09:31 +0200
Subject: [PATCH] use the "simple" obfuscator for large literals

Changes literal obfuscation such that literals of any size will be obfuscated,
but beyond `maxSize` we only use the `simple` obfuscator.
This one seems to apply AND, OR, or XOR operators byte-wise and should be safe to use,
unlike some of the other obfuscators which are quadratic on the literal size or worse.

The test for literals is changed a bit to verify that obfuscation is applied.
The code written to the `extra_literals.go` file by the test helper now ensures
that Go does not optimize the literals away when we build the binary.
We also append a unique string to all literals so that we can test that
an unobfuscated build contains this string while an obfuscated build does not.
---
 internal/literals/literals.go    | 35 +++++++------
 internal/literals/obfuscators.go | 16 +++++-
 main_test.go                     | 84 +++++++++++++++++++++++---------
 testdata/script/literals.txtar   |  6 +++
 4 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/internal/literals/literals.go b/internal/literals/literals.go
index 0549bad..f5f2ae4 100644
--- a/internal/literals/literals.go
+++ b/internal/literals/literals.go
@@ -20,14 +20,9 @@ import (
 // moderate, this also decreases the likelihood for performance slowdowns.
 const minSize = 8
 
-// maxSize is the upper bound limit, of the size of string-like literals
-// which we will obfuscate. This is important, because otherwise garble can take
-// a very long time to obfuscate huge code-generated literals, such as those
-// corresponding to large assets.
-//
-// If someone truly wants to obfuscate those, they should do that when they
-// generate the code, not at build time. Plus, with Go 1.16 that technique
-// should largely stop being used.
+// maxSize is the upper limit of the size of string-like literals
+// which we will obfuscate with any of the available obfuscators.
+// Beyond that we apply only a subset of obfuscators which are guaranteed to run efficiently.
 const maxSize = 2 << 10 // KiB
 
 // Obfuscate replaces literals with obfuscated anonymous functions.
@@ -66,7 +61,7 @@ func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkString
 
 		if typeAndValue.Type == types.Typ[types.String] && typeAndValue.Value != nil {
 			value := constant.StringVal(typeAndValue.Value)
-			if len(value) < minSize || len(value) > maxSize {
+			if len(value) < minSize {
 				return true
 			}
 
@@ -124,7 +119,7 @@ func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkString
 //
 // If the input node cannot be obfuscated nil is returned.
 func handleCompositeLiteral(obfRand *obfRand, isPointer bool, node *ast.CompositeLit, info *types.Info) ast.Node {
-	if len(node.Elts) < minSize || len(node.Elts) > maxSize {
+	if len(node.Elts) < minSize {
 		return nil
 	}
 
@@ -218,8 +213,8 @@ func withPos(node ast.Node, pos token.Pos) ast.Node {
 }
 
 func obfuscateString(obfRand *obfRand, data string) *ast.CallExpr {
-	obfuscator := obfRand.nextObfuscator()
-	block := obfuscator.obfuscate(obfRand.Rand, []byte(data))
+	obf := getNextObfuscator(obfRand, len(data))
+	block := obf.obfuscate(obfRand.Rand, []byte(data))
 
 	block.List = append(block.List, ah.ReturnStmt(ah.CallExpr(ast.NewIdent("string"), ast.NewIdent("data"))))
 
@@ -227,8 +222,8 @@ func obfuscateString(obfRand *obfRand, data string) *ast.CallExpr {
 }
 
 func obfuscateByteSlice(obfRand *obfRand, isPointer bool, data []byte) *ast.CallExpr {
-	obfuscator := obfRand.nextObfuscator()
-	block := obfuscator.obfuscate(obfRand.Rand, data)
+	obf := getNextObfuscator(obfRand, len(data))
+	block := obf.obfuscate(obfRand.Rand, data)
 
 	if isPointer {
 		block.List = append(block.List, ah.ReturnStmt(&ast.UnaryExpr{
@@ -245,8 +240,8 @@ func obfuscateByteSlice(obfRand *obfRand, isPointer bool, data []byte) *ast.Call
 }
 
 func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length int64) *ast.CallExpr {
-	obfuscator := obfRand.nextObfuscator()
-	block := obfuscator.obfuscate(obfRand.Rand, data)
+	obf := getNextObfuscator(obfRand, len(data))
+	block := obf.obfuscate(obfRand.Rand, data)
 
 	arrayType := &ast.ArrayType{
 		Len: ah.IntLit(int(length)),
@@ -291,3 +286,11 @@ func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length in
 
 	return ah.LambdaCall(arrayType, block)
 }
+
+func getNextObfuscator(obfRand *obfRand, size int) obfuscator {
+	if size <= maxSize {
+		return obfRand.nextObfuscator()
+	} else {
+		return obfRand.nextLinearTimeObfuscator()
+	}
+}
diff --git a/internal/literals/obfuscators.go b/internal/literals/obfuscators.go
index 228fe6c..10de7d8 100644
--- a/internal/literals/obfuscators.go
+++ b/internal/literals/obfuscators.go
@@ -16,15 +16,22 @@ type obfuscator interface {
 }
 
 var (
+	simpleObfuscator = simple{}
+
 	// Obfuscators contains all types which implement the obfuscator Interface
 	Obfuscators = []obfuscator{
-		simple{},
+		simpleObfuscator,
 		swap{},
 		split{},
 		shuffle{},
 		seed{},
 	}
 
+	// LinearTimeObfuscators contains all types which implement the obfuscator Interface and can safely be used on large literals
+	LinearTimeObfuscators = []obfuscator{
+		simpleObfuscator,
+	}
+
 	TestObfuscator         string
 	testPkgToObfuscatorMap map[string]obfuscator
 )
@@ -84,6 +91,13 @@ func (r *obfRand) nextObfuscator() obfuscator {
 	return Obfuscators[r.Intn(len(Obfuscators))]
 }
 
+func (r *obfRand) nextLinearTimeObfuscator() obfuscator {
+	if r.testObfuscator != nil {
+		return r.testObfuscator
+	}
+	return Obfuscators[r.Intn(len(LinearTimeObfuscators))]
+}
+
 func newObfRand(rand *mathrand.Rand, file *ast.File) *obfRand {
 	testObf := testPkgToObfuscatorMap[file.Name.Name]
 	return &obfRand{rand, testObf}
diff --git a/main_test.go b/main_test.go
index 002f39b..5300c7e 100644
--- a/main_test.go
+++ b/main_test.go
@@ -241,16 +241,23 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) {
 
 var testRand = mathrand.New(mathrand.NewSource(time.Now().UnixNano()))
 
-func generateStringLit(size int) *ast.BasicLit {
-	buffer := make([]byte, size)
+func generateStringLit(minSize int) *ast.BasicLit {
+	buffer := make([]byte, minSize)
 	_, err := testRand.Read(buffer)
 	if err != nil {
 		panic(err)
 	}
 
-	return ah.StringLit(string(buffer))
+	return ah.StringLit(string(buffer) + "a_unique_string_that_is_part_of_all_extra_literals")
 }
 
+// generateLiterals creates a new source code file with a few random literals inside.
+// All literals contain the string "a_unique_string_that_is_part_of_all_extra_literals"
+// so we can later check if they are all obfuscated by looking for this substring.
+// The code is designed such that the Go compiler does not optimize away the literals,
+// which would destroy the test.
+// This is achieved by defining a global variable `var x = ""` and an `init` function
+// which appends all literals to `x`.
 func generateLiterals(ts *testscript.TestScript, neg bool, args []string) {
 	if neg {
 		ts.Fatalf("unsupported: ! generate-literals")
@@ -261,35 +268,64 @@ func generateLiterals(ts *testscript.TestScript, neg bool, args []string) {
 
 	codePath := args[0]
 
-	// Add 100 randomly small literals.
+	// Global string variable to which which we append string literals: `var x = ""`
+	globalVar := &ast.GenDecl{
+		Tok: token.VAR,
+		Specs: []ast.Spec{
+			&ast.ValueSpec{
+				Names: []*ast.Ident{ast.NewIdent("x")},
+				Values: []ast.Expr{
+					&ast.BasicLit{Kind: token.STRING, Value: `""`},
+				},
+			},
+		},
+	}
+
 	var statements []ast.Stmt
+
+	// Assignments which append 100 random small literals to x: `x += "the_small_random_literal"`
 	for i := 0; i < 100; i++ {
-		literal := generateStringLit(1 + testRand.Intn(255))
-		statements = append(statements, &ast.AssignStmt{
-			Lhs: []ast.Expr{ast.NewIdent("_")},
-			Tok: token.ASSIGN,
-			Rhs: []ast.Expr{literal},
-		})
-	}
-	// Add 5 huge literals, to make sure we don't try to obfuscate them.
+		statements = append(
+			statements,
+			&ast.AssignStmt{
+				Lhs: []ast.Expr{ast.NewIdent("x")},
+				Tok: token.ADD_ASSIGN,
+				Rhs: []ast.Expr{generateStringLit(1 + testRand.Intn(255))},
+			},
+		)
+	}
+
+	// Assignments which append 5 random huge literals to x: `x += "the_huge_random_literal"`
+	// We add huge literals to make sure we obfuscate them fast.
 	// 5 * 128KiB is large enough that it would take a very, very long time
-	// to obfuscate those literals with our simple code.
+	// to obfuscate those literals if too complex obfuscators are used.
 	for i := 0; i < 5; i++ {
-		literal := generateStringLit(128 << 10)
-		statements = append(statements, &ast.AssignStmt{
-			Lhs: []ast.Expr{ast.NewIdent("_")},
-			Tok: token.ASSIGN,
-			Rhs: []ast.Expr{literal},
-		})
+		statements = append(
+			statements,
+			&ast.AssignStmt{
+				Lhs: []ast.Expr{ast.NewIdent("x")},
+				Tok: token.ADD_ASSIGN,
+				Rhs: []ast.Expr{generateStringLit(128 << 10)},
+			},
+		)
+	}
+
+	// An `init` function which includes all assignments from above
+	initFunc := &ast.FuncDecl{
+		Name: &ast.Ident{
+			Name: "init",
+		},
+		Type: &ast.FuncType{},
+		Body: ah.BlockStmt(statements...),
 	}
 
+	// A file with the global string variable and init function
 	file := &ast.File{
 		Name: ast.NewIdent("main"),
-		Decls: []ast.Decl{&ast.FuncDecl{
-			Name: ast.NewIdent("extraLiterals"),
-			Type: &ast.FuncType{Params: &ast.FieldList{}},
-			Body: ah.BlockStmt(statements...),
-		}},
+		Decls: []ast.Decl{
+			globalVar,
+			initFunc,
+		},
 	}
 
 	codeFile := createFile(ts, codePath)
diff --git a/testdata/script/literals.txtar b/testdata/script/literals.txtar
index b4813c9..ff65467 100644
--- a/testdata/script/literals.txtar
+++ b/testdata/script/literals.txtar
@@ -25,9 +25,15 @@ binsubstr main$exe 'Lorem Ipsum' 'dolor sit amet' 'second assign' 'First Line' '
 # seconds, it means we're trying to obfuscate them.
 generate-literals extra_literals.go
 
+# ensure we find the extra literals in an unobfuscated build
+go build
+binsubstr main$exe 'a_unique_string_that_is_part_of_all_extra_literals'
+
+# ensure we don't find the extra literals in an obfuscated build
 garble -literals -debugdir=debug1 build
 exec ./main$exe
 cmp stderr main.stderr
+! binsubstr main$exe 'a_unique_string_that_is_part_of_all_extra_literals'
 
 # Check obfuscators.