You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
garble/internal/literals/literals.go

277 lines
6.5 KiB
Go

// Copyright (c) 2020, The Garble Authors.
// See LICENSE for licensing information.
package literals
import (
"fmt"
"go/ast"
"go/token"
"go/types"
mathrand "math/rand"
"strconv"
"golang.org/x/tools/go/ast/astutil"
ah "mvdan.cc/garble/internal/asthelper"
)
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
4 years ago
// maxSizeBytes is the limit, in bytes, of the size of string-like literals
// which we will obfuscate. This is important, because otherwise garble can take
// a very long time to obfuscate huge code-generated literals, such as those
// corresponding to large assets.
//
// Note that this is the size of the literal in source code. For example, "\xab"
// counts as four bytes.
//
// If someone truly wants to obfuscate those, they should do that when they
// generate the code, not at build time. Plus, with Go 1.16 that technique
// should largely stop being used.
const maxSizeBytes = 2 << 10 // KiB
func randObfuscator() obfuscator {
randPos := mathrand.Intn(len(obfuscators))
return obfuscators[randPos]
}
// Obfuscate replace literals with obfuscated lambda functions
func Obfuscate(files []*ast.File, info *types.Info, fset *token.FileSet, ignoreObj map[types.Object]bool) []*ast.File {
pre := func(cursor *astutil.Cursor) bool {
switch x := cursor.Node().(type) {
case *ast.GenDecl:
if x.Tok != token.CONST {
return true
}
for _, spec := range x.Specs {
spec, ok := spec.(*ast.ValueSpec)
if !ok {
return false
}
for _, name := range spec.Names {
obj := info.ObjectOf(name)
basic, ok := obj.Type().(*types.Basic)
if !ok {
// skip the block if it contains non basic types
return false
}
if basic.Info()&types.IsUntyped != 0 {
// skip the block if it contains untyped constants
return false
}
// The object cannot be obfuscated, e.g. a value that needs to be constant
if ignoreObj[obj] {
return false
}
}
}
x.Tok = token.VAR
// constants are not possible if we want to obfuscate literals, therefore
// move all constant blocks which only contain strings to variables
}
return true
}
post := func(cursor *astutil.Cursor) bool {
switch x := cursor.Node().(type) {
case *ast.CompositeLit:
byteType := types.Universe.Lookup("byte").Type()
if len(x.Elts) == 0 {
return true
}
switch y := info.TypeOf(x.Type).(type) {
case *types.Array:
if y.Elem() != byteType {
return true
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
4 years ago
if y.Len() > maxSizeBytes {
return true
}
data := make([]byte, y.Len())
for i, el := range x.Elts {
lit, ok := el.(*ast.BasicLit)
if !ok {
return true
}
value, err := strconv.Atoi(lit.Value)
if err != nil {
return true
}
data[i] = byte(value)
}
cursor.Replace(obfuscateByteArray(data, y.Len()))
case *types.Slice:
if y.Elem() != byteType {
return true
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
4 years ago
if len(x.Elts) > maxSizeBytes {
return true
}
data := make([]byte, 0, len(x.Elts))
for _, el := range x.Elts {
lit, ok := el.(*ast.BasicLit)
if !ok {
return true
}
value, err := strconv.Atoi(lit.Value)
if err != nil {
return true
}
data = append(data, byte(value))
}
cursor.Replace(obfuscateByteSlice(data))
}
case *ast.BasicLit:
switch cursor.Name() {
case "Values", "Rhs", "Value", "Args", "X", "Y", "Results":
default:
return true // we don't want to obfuscate imports etc.
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
4 years ago
if x.Kind != token.STRING {
return true
}
if len(x.Value) > maxSizeBytes {
return true
}
typeInfo := info.TypeOf(x)
if typeInfo != types.Typ[types.String] && typeInfo != types.Typ[types.UntypedString] {
return true
}
value, err := strconv.Unquote(x.Value)
if err != nil {
panic(fmt.Sprintf("cannot unquote string: %v", err))
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
4 years ago
if len(value) == 0 {
return true
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
4 years ago
cursor.Replace(obfuscateString(value))
}
return true
}
for i := range files {
files[i] = astutil.Apply(files[i], pre, post).(*ast.File)
}
return files
}
func obfuscateString(data string) *ast.CallExpr {
obfuscator := randObfuscator()
block := obfuscator.obfuscate([]byte(data))
block.List = append(block.List, ah.ReturnStmt(ah.CallExpr(ast.NewIdent("string"), ast.NewIdent("data"))))
return ah.LambdaCall(ast.NewIdent("string"), block)
}
func obfuscateByteSlice(data []byte) *ast.CallExpr {
obfuscator := randObfuscator()
block := obfuscator.obfuscate(data)
block.List = append(block.List, ah.ReturnStmt(ast.NewIdent("data")))
return ah.LambdaCall(&ast.ArrayType{Elt: ast.NewIdent("byte")}, block)
}
func obfuscateByteArray(data []byte, length int64) *ast.CallExpr {
obfuscator := randObfuscator()
block := obfuscator.obfuscate(data)
arrayType := &ast.ArrayType{
Len: ah.IntLit(int(length)),
Elt: ast.NewIdent("byte"),
}
sliceToArray := []ast.Stmt{
&ast.DeclStmt{
Decl: &ast.GenDecl{
Tok: token.VAR,
Specs: []ast.Spec{&ast.ValueSpec{
Names: []*ast.Ident{ast.NewIdent("newdata")},
Type: arrayType,
}},
},
},
&ast.RangeStmt{
Key: ast.NewIdent("i"),
Tok: token.DEFINE,
X: ast.NewIdent("newdata"),
Body: &ast.BlockStmt{List: []ast.Stmt{
&ast.AssignStmt{
Lhs: []ast.Expr{ah.IndexExpr("newdata", ast.NewIdent("i"))},
Tok: token.ASSIGN,
Rhs: []ast.Expr{ah.IndexExpr("data", ast.NewIdent("i"))},
},
}},
},
ah.ReturnStmt(ast.NewIdent("newdata")),
}
block.List = append(block.List, sliceToArray...)
return ah.LambdaCall(arrayType, block)
}
// RecordUsedAsConstants records identifieres used in constant expressions.
func RecordUsedAsConstants(node ast.Node, info *types.Info, ignoreObj map[types.Object]bool) {
visit := func(node ast.Node) bool {
ident, ok := node.(*ast.Ident)
if !ok {
return true
}
obj := info.ObjectOf(ident)
ignoreObj[obj] = true
return true
}
switch x := node.(type) {
// in a slice or array composite literal all explicit keys must be constant representable
case *ast.CompositeLit:
if _, ok := x.Type.(*ast.ArrayType); !ok {
break
}
for _, elt := range x.Elts {
if kv, ok := elt.(*ast.KeyValueExpr); ok {
ast.Inspect(kv.Key, visit)
}
}
// in an array type the length must be a constant representable
case *ast.ArrayType:
if x.Len != nil {
ast.Inspect(x.Len, visit)
}
// in a const declaration all values must be constant representable
case *ast.GenDecl:
if x.Tok != token.CONST {
break
}
for _, spec := range x.Specs {
spec := spec.(*ast.ValueSpec)
for _, val := range spec.Values {
ast.Inspect(val, visit)
}
}
}
}