You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
garble/hash.go

164 lines
4.6 KiB
Go

// Copyright (c) 2019, The Garble Authors.
// See LICENSE for licensing information.
package main
import (
"bytes"
"crypto/sha256"
"encoding/base64"
"fmt"
"go/token"
"io"
"os"
"os/exec"
"strings"
)
const buildIDSeparator = "/"
// splitActionID returns the action ID half of a build ID, the first element.
func splitActionID(buildID string) string {
i := strings.Index(buildID, buildIDSeparator)
if i < 0 {
return buildID
}
return buildID[:i]
}
// splitContentID returns the content ID half of a build ID, the last element.
func splitContentID(buildID string) string {
return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:]
}
// decodeHash is the opposite of hashToString, but with a panic for error
// handling since it should never happen.
func decodeHash(str string) []byte {
h, err := base64.RawURLEncoding.DecodeString(str)
if err != nil {
panic(fmt.Sprintf("invalid hash %q: %v", str, err))
}
return h
}
func alterToolVersion(tool string, args []string) error {
cmd := exec.Command(args[0], args[1:]...)
out, err := cmd.Output()
if err != nil {
if err, _ := err.(*exec.ExitError); err != nil {
return fmt.Errorf("%v: %s", err, err.Stderr)
}
return err
}
line := string(bytes.TrimSpace(out)) // no trailing newline
f := strings.Fields(line)
if len(f) < 3 || f[0] != tool || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") {
return fmt.Errorf("%s -V=full: unexpected output:\n\t%s", args[0], line)
}
var toolID []byte
if f[2] == "devel" {
// On the development branch, use the content ID part of the build ID.
toolID = decodeHash(splitContentID(f[len(f)-1]))
} else {
// For a release, the output is like: "compile version go1.9.1 X:framepointer".
// Use the whole line.
toolID = []byte(line)
}
contentID, err := ownContentID(toolID)
if err != nil {
return fmt.Errorf("cannot obtain garble's own version: %v", err)
}
// The part of the build ID that matters is the last, since it's the
// "content ID" which is used to work out whether there is a need to redo
// the action (build) or not. Since cmd/go parses the last word in the
// output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}".
// The slashes let us imitate a full binary build ID, but we assume that
// the other components such as the action ID are not necessary, since the
// only reader here is cmd/go and it only consumes the content ID.
fmt.Printf("%s +garble buildID=_/_/_/%s\n", line, contentID)
return nil
}
func ownContentID(toolID []byte) (string, error) {
// We can't rely on the module version to exist, because it's
// missing in local builds without 'go get'.
// For now, use 'go tool buildid' on the binary that's running. Just
// like Go's own cache, we use hex-encoded sha256 sums.
// Once https://github.com/golang/go/issues/37475 is fixed, we
// can likely just use that.
path, err := os.Executable()
if err != nil {
return "", err
}
buildID, err := buildidOf(path)
if err != nil {
return "", err
}
ownID := decodeHash(splitContentID(buildID))
// Join the two content IDs together into a single base64-encoded sha256
// sum. This includes the original tool's content ID, and garble's own
// content ID.
h := sha256.New()
h.Write(toolID)
h.Write(ownID)
// We also need to add the selected options to the full version string,
// because all of them result in different output. We use spaces to
// separate the env vars and flags, to reduce the chances of collisions.
if envGoPrivate != "" {
fmt.Fprintf(h, " GOPRIVATE=%s", envGoPrivate)
}
if opts.GarbleLiterals {
fmt.Fprintf(h, " -literals")
}
if opts.Tiny {
fmt.Fprintf(h, " -tiny")
}
if len(opts.Seed) > 0 {
fmt.Fprintf(h, " -seed=%x", opts.Seed)
}
return hashToString(h.Sum(nil)), nil
}
// hashToString encodes the first 120 bits of a sha256 sum in base64, the same
// format used for elements in a build ID.
func hashToString(h []byte) string {
return base64.RawURLEncoding.EncodeToString(h[:15])
}
func buildidOf(path string) (string, error) {
cmd := exec.Command("go", "tool", "buildid", path)
out, err := cmd.Output()
if err != nil {
if err, _ := err.(*exec.ExitError); err != nil {
return "", fmt.Errorf("%v: %s", err, err.Stderr)
}
return "", err
}
return string(out), nil
}
func hashWith(salt []byte, name string) string {
obfuscate unexported names like exported ones (#227) In 90fa325da7, the obfuscation logic was changed to use hashes for exported names, but incremental names starting at just one letter for unexported names. Presumably, this was done for the sake of binary size. I argue that this is not a good idea for the default mode for a number of reasons: 1) It makes reversing of stack traces nearly impossible for unexported names, since replacing an obfuscated name "c" with "originalName" would trigger too many false positives by matching single characters. 2) Exported and unexported names aren't different. We need to know how names were obfuscated at a later time in both cases, thanks to use cases like -ldflags=-X. Using short names for one but not the other doesn't make a lot of sense, and makes the logic inconsistent. 3) Shaving off three bytes for unexported names doesn't seem like a huge deal for the default mode, when we already have -tiny to optimize for size. This saves us a bit of work, but most importantly, simplifies the obfuscation state as we no longer need to carry privateNameMap between the compile and link stages. name old time/op new time/op delta Build-8 153ms ± 2% 150ms ± 2% ~ (p=0.065 n=6+6) name old bin-B new bin-B delta Build-8 7.09M ± 0% 7.08M ± 0% -0.24% (p=0.002 n=6+6) name old sys-time/op new sys-time/op delta Build-8 296ms ± 5% 277ms ± 6% -6.50% (p=0.026 n=6+6) name old user-time/op new user-time/op delta Build-8 562ms ± 1% 558ms ± 3% ~ (p=0.329 n=5+6) Note that I do not oppose using short names for both exported and unexported names in the future for -tiny, since reversing of stack traces will by design not work there. The code can be resurrected from the git history if we want to improve -tiny that way in the future, as we'd need to store state in header files again. Another major cleanup we can do here is to no longer use the garbledImports map. From a look at obfuscateImports, we hash a package's import path with its action ID, much like exported names, so we can simply re-do that hashing for the linker's -X flag. garbledImports does have some logic to handle duplicate package names, but it's worth noting that should not affect package paths, as they are always unique. That area of code could probably do with some simplification in the future, too. While at it, make hashWith panic if either parameter is empty. obfuscateImports was hashing the main package path without a salt due to a bug, so we want to catch those in the future. Finally, make some tiny spacing and typo tweaks to the README.
3 years ago
if len(salt) == 0 {
panic("hashWith: empty salt")
}
if name == "" {
panic("hashWith: empty name")
}
const length = 4
d := sha256.New()
d.Write(salt)
d.Write(opts.Seed)
io.WriteString(d, name)
sum := b64.EncodeToString(d.Sum(nil))
if token.IsExported(name) {
return "Z" + sum[:length]
}
return "z" + sum[:length]
}