From fc91758b4967e588bb48fa3925611da35474bede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Wed, 21 Sep 2022 10:54:53 +0100 Subject: [PATCH] obfuscate Go names in asm header files Assembly files can include header files within the same Go module, and those header files can include "defines" which refer to Go names. Since those Go names are likely being obfuscated, we need to replace them just like we do in assembly files. The added mechanism is rather basic; we add two TODOs to improve it. This should help when building projects like go-ethereum. Fixes #553. --- main.go | 171 ++++++++++++++++++++++++-------------- testdata/script/asm.txtar | 51 ++++++++++-- 2 files changed, 155 insertions(+), 67 deletions(-) diff --git a/main.go b/main.go index 4a8915d..a299826 100644 --- a/main.go +++ b/main.go @@ -584,6 +584,8 @@ var transformFuncs = map[string]func([]string) ([]string, error){ "link": transformLink, } +var rxIncludeHeader = regexp.MustCompile(`#include\s+"([^"]+)"`) + func transformAsm(args []string) ([]string, error) { if !curPkg.ToObfuscate { return args, nil // we're not obfuscating this package @@ -612,15 +614,8 @@ func transformAsm(args []string) ([]string, error) { return append(flags, newPaths...), nil } - // We need to replace all function references with their obfuscated name - // counterparts. - // Luckily, all func names in Go assembly files are immediately followed - // by the unicode "middle dot", like: - // - // TEXT ·privateAdd(SB),$0-24 - const middleDot = '·' - middleDotLen := utf8.RuneLen(middleDot) - + const missingHeader = "missing header path" + newHeaderPaths := make(map[string]string) var buf bytes.Buffer for _, path := range paths { // Read the entire file into memory. @@ -629,67 +624,57 @@ func transformAsm(args []string) ([]string, error) { if err != nil { return nil, err } - buf.Reset() - - // Find all middle-dot names, and replace them. - remaining := content - for { - i := bytes.IndexRune(remaining, middleDot) - if i < 0 { - buf.Write(remaining) - remaining = nil - break + offset := 0 + for _, match := range rxIncludeHeader.FindAllSubmatchIndex(content, -1) { + start, end := offset+match[2], offset+match[3] + path := string(content[start:end]) + if strings.ContainsAny(path, "\n\"") { + // If we failed to keep track of offsets, we could see a header + // path that contains quotes or newlines, which should not happen. + return nil, fmt.Errorf("bad offset tracking? %q", path) } - - // We want to replace "OP ·foo" and "OP $·foo", - // but not "OP somepkg·foo" just yet. - // "somepkg" is often runtime, syscall, etc. - // We don't obfuscate any of those for now. - // - // TODO: we'll likely need to deal with this - // when we start obfuscating the runtime. - // When we do, note that we can't hash with curPkg. - localName := false - if i >= 0 { - switch remaining[i-1] { - case ' ', '\t', '$': - localName = true + newPath := newHeaderPaths[path] + switch newPath { + case missingHeader: // no need to try again + continue + case "": // first time we see this header + buf.Reset() + content, err := os.ReadFile(path) + if errors.Is(err, fs.ErrNotExist) { + newHeaderPaths[path] = missingHeader + continue // a header file provided by Go or the system + } else if err != nil { + return nil, err } - } + replaceAsmNames(&buf, content) - i += middleDotLen - buf.Write(remaining[:i]) - remaining = remaining[i:] - - // The name ends at the first rune which cannot be part - // of a Go identifier, such as a comma or space. - nameEnd := 0 - for nameEnd < len(remaining) { - c, size := utf8.DecodeRune(remaining[nameEnd:]) - if !unicode.IsLetter(c) && c != '_' && !unicode.IsDigit(c) { - break - } - nameEnd += size - } - name := string(remaining[:nameEnd]) - remaining = remaining[nameEnd:] + // For now, we replace `foo.h` or `dir/foo.h` with `garbled_foo.h`. + // The different name ensures we don't use the unobfuscated file. + // This is far from perfect, but does the job for the time being. + // In the future, use a randomized name. + newPath = "garbled_" + filepath.Base(path) - if !localName { - buf.WriteString(name) - continue - } + // Uncomment for some quick debugging. Do not delete. + // fmt.Fprintf(os.Stderr, "\n-- %s --\n%s", path, buf.Bytes()) - newName := hashWithPackage(curPkg, name) - if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed - log.Printf("asm name %q hashed with %x to %q", name, curPkg.GarbleActionID, newName) + if _, err := writeTemp(newPath, buf.Bytes()); err != nil { + return nil, err + } + newHeaderPaths[path] = newPath } - buf.WriteString(newName) + offset += len(newPath) - len(path) + // TODO: copying the bytes in a loop like this is far from optimal. + var newContent []byte + newContent = append(newContent, content[:start]...) + newContent = append(newContent, newPath...) + newContent = append(newContent, content[end:]...) + content = newContent } + buf.Reset() + replaceAsmNames(&buf, content) // Uncomment for some quick debugging. Do not delete. - // if curPkg.ToObfuscate { - // fmt.Fprintf(os.Stderr, "\n-- %s --\n%s", path, buf.Bytes()) - // } + // fmt.Fprintf(os.Stderr, "\n-- %s --\n%s", path, buf.Bytes()) name := filepath.Base(path) if path, err := writeTemp(name, buf.Bytes()); err != nil { @@ -702,6 +687,70 @@ func transformAsm(args []string) ([]string, error) { return append(flags, newPaths...), nil } +func replaceAsmNames(buf *bytes.Buffer, remaining []byte) { + // We need to replace all function references with their obfuscated name + // counterparts. + // Luckily, all func names in Go assembly files are immediately followed + // by the unicode "middle dot", like: + // + // TEXT ·privateAdd(SB),$0-24 + const middleDot = '·' + middleDotLen := utf8.RuneLen(middleDot) + + for { + i := bytes.IndexRune(remaining, middleDot) + if i < 0 { + buf.Write(remaining) + remaining = nil + break + } + + // We want to replace "OP ·foo" and "OP $·foo", + // but not "OP somepkg·foo" just yet. + // "somepkg" is often runtime, syscall, etc. + // We don't obfuscate any of those for now. + // + // TODO: we'll likely need to deal with this + // when we start obfuscating the runtime. + // When we do, note that we can't hash with curPkg. + localName := false + if i >= 0 { + switch remaining[i-1] { + case ' ', '\t', '$', ',', '(': + localName = true + } + } + + i += middleDotLen + buf.Write(remaining[:i]) + remaining = remaining[i:] + + // The name ends at the first rune which cannot be part + // of a Go identifier, such as a comma or space. + nameEnd := 0 + for nameEnd < len(remaining) { + c, size := utf8.DecodeRune(remaining[nameEnd:]) + if !unicode.IsLetter(c) && c != '_' && !unicode.IsDigit(c) { + break + } + nameEnd += size + } + name := string(remaining[:nameEnd]) + remaining = remaining[nameEnd:] + + if !localName { + buf.WriteString(name) + continue + } + + newName := hashWithPackage(curPkg, name) + if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed + log.Printf("asm name %q hashed with %x to %q", name, curPkg.GarbleActionID, newName) + } + buf.WriteString(newName) + } +} + // writeTemp is a mix between os.CreateTemp and os.WriteFile, as it writes a // named source file in sharedTempDir given an input buffer. // diff --git a/testdata/script/asm.txtar b/testdata/script/asm.txtar index 2768ec7..9f3842c 100644 --- a/testdata/script/asm.txtar +++ b/testdata/script/asm.txtar @@ -1,11 +1,14 @@ +# Note that it doesn't really matter if the assembly below is badly written. +# We just care enough to see that it obfuscates and keeps the same behavior. # TODO: support arm64, at least -[!386] [!amd64] skip 'the assembly is only written for 386 and amd64' +[!amd64] skip 'the assembly is only written for amd64' env GOGARBLE=test/main garble build exec ./main cmp stderr main.stderr +# TODO: ! binsubstr main$exe 'test/main' 'privateAdd' 'PublicAdd' 'garble_main' 'garble_define' ! binsubstr main$exe 'privateAdd' 'PublicAdd' [short] stop # no need to verify this with -short @@ -33,26 +36,59 @@ import ( func privateAdd(x, y int32) int32 +// goData is used from both assembly and header files. +var goData = [4]uint64{1, 2, 3, 4} + +func modifyGoData() +func modifyGoData2() + func main() { println(privateAdd(1, 2)) + + println(goData[0], goData[1]) + modifyGoData() + println(goData[0], goData[1]) + modifyGoData2() + println(goData[0], goData[1]) + println(imported.PublicAdd(3, 4)) } --- main_x86.s -- -//go:build 386 || amd64 - +-- garble_main_amd64.s -- TEXT ·privateAdd(SB),$0-16 MOVL x+0(FP), BX MOVL y+4(FP), BP ADDL BP, BX MOVL BX, ret+8(FP) RET + +#include "garble_define_amd64.h" + +#include "extra/garble_define2_amd64.h" + +TEXT ·modifyGoData(SB),$0-16 + addGoDataTo($12) + ADDL $34, ·goData+8(SB) + RET + +TEXT ·modifyGoData2(SB),$0-16 + addGoDataTo2($12) + ADDL $34,·goData+8(SB) // note the lack of a space + RET + +-- garble_define_amd64.h -- +#define addGoDataTo(arg) \ + ADDL arg, ·goData+0(SB) + +-- extra/garble_define2_amd64.h -- +#define addGoDataTo2(arg) \ + ADDL arg, ·goData+0(SB) + -- imported/imported.go -- package imported func PublicAdd(x, y int32) int32 --- imported/imported_x86.s -- -//go:build 386 || amd64 +-- imported/imported_amd64.s -- TEXT ·PublicAdd(SB),$0-16 MOVL x+0(FP), BX MOVL y+4(FP), BP @@ -61,4 +97,7 @@ TEXT ·PublicAdd(SB),$0-16 RET -- main.stderr -- 3 +1 2 +13 36 +25 70 7