You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
garble/internal/linker/linker.go

283 lines
7.8 KiB
Go

// Copyright (c) 2022, The Garble Authors.
// See LICENSE for licensing information.
package linker
import (
"bytes"
"crypto/sha256"
"embed"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"github.com/bluekeyes/go-gitdiff/gitdiff"
"github.com/rogpeppe/go-internal/lockedfile"
)
const (
MagicValueEnv = "GARBLE_LINK_MAGIC"
TinyEnv = "GARBLE_LINK_TINY"
EntryOffKeyEnv = "GARBLE_LINK_ENTRYOFF_KEY"
cacheDirName = "garble"
versionExt = ".version"
garbleCacheDir = "GARBLE_CACHE_DIR"
baseSrcSubdir = "src"
)
//go:embed patches/*/*.patch
use fewer build flags when building std or cmd When we use `go list` on the standard library, we need to be careful about what flags are passed from the top-level build command, because some flags are not going to be appropriate. In particular, GOFLAGS=-modfile=... resulted in a failure, reproduced via the GOFLAGS variable added to linker.txtar: go: inconsistent vendoring in /home/mvdan/tip/src: golang.org/x/crypto@v0.5.1-0.20230203195927-310bfa40f1e4: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod golang.org/x/net@v0.7.0: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod golang.org/x/sys@v0.5.1-0.20230208141308-4fee21c92339: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod golang.org/x/text@v0.7.1-0.20230207171107-30dadde3188b: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod To ignore the vendor directory, use -mod=readonly or -mod=mod. To sync the vendor directory, run: go mod vendor To work around this problem, reset the -mod and -modfile flags when calling "go list" on the standard library, as those are the only two flags which alter how we load the main module in a build. The code which builds a modified cmd/link has a similar problem; it already reset GOOS and GOARCH, but it could similarly run into problems if other env vars like GOFLAGS were set. To be on the safe side, we also disable GOENV and GOEXPERIMENT, which we borrow from Go's bootstrapping commands.
1 year ago
var linkerPatchesFS embed.FS
func loadLinkerPatches(majorGoVersion string) (version string, modFiles map[string]bool, patches [][]byte, err error) {
modFiles = make(map[string]bool)
versionHash := sha256.New()
if err := fs.WalkDir(linkerPatchesFS, "patches/"+majorGoVersion, func(path string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return err
}
patchBytes, err := linkerPatchesFS.ReadFile(path)
if err != nil {
return err
}
if _, err := versionHash.Write(patchBytes); err != nil {
return err
}
files, _, err := gitdiff.Parse(bytes.NewReader(patchBytes))
if err != nil {
return err
}
for _, file := range files {
if file.IsNew || file.IsDelete || file.IsCopy || file.IsRename {
panic("only modification patch is supported")
}
modFiles[file.OldName] = true
}
patches = append(patches, patchBytes)
return nil
}); err != nil {
return "", nil, nil, err
}
version = base64.RawStdEncoding.EncodeToString(versionHash.Sum(nil))
return
}
func copyFile(src, target string) error {
targetDir := filepath.Dir(target)
if err := os.MkdirAll(targetDir, 0o777); err != nil {
return err
}
srcFile, err := os.Open(src)
if err != nil {
return err
}
defer srcFile.Close()
targetFile, err := os.Create(target)
if err != nil {
return err
}
defer targetFile.Close()
_, err = io.Copy(targetFile, srcFile)
return err
}
func fileExists(path string) bool {
stat, err := os.Stat(path)
if err != nil {
return false
}
return !stat.IsDir()
}
// TODO(pagran): Remove git dependency in future
// more information in README.md
func applyPatches(srcDir, workingDir string, modFiles map[string]bool, patches [][]byte) (map[string]string, error) {
mod := make(map[string]string)
for fileName := range modFiles {
oldPath := filepath.Join(srcDir, fileName)
newPath := filepath.Join(workingDir, fileName)
mod[oldPath] = newPath
if err := copyFile(oldPath, newPath); err != nil {
return nil, err
}
}
// If one of parent folders of workingDir contains repository, set current directory is not enough because git
// by default treats workingDir as a subfolder of repository, so it will break git apply. Adding --git-dir flag blocks this behavior.
cmd := exec.Command("git", "--git-dir", workingDir, "apply", "--verbose")
cmd.Dir = workingDir
cmd.Env = append(cmd.Env, "LANG=en_US")
cmd.Stdin = bytes.NewReader(bytes.Join(patches, []byte("\n")))
out, err := cmd.CombinedOutput()
if err != nil {
if err, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("%v:\n%s", err, out)
}
return nil, err
}
// Running git without errors does not guarantee that all patches have been applied.
// Make sure that all passed patches have been applied correctly.
rx := regexp.MustCompile(`(?m)^Applied patch .+ cleanly\.$`)
if appliedPatches := len(rx.FindAllIndex(out, -1)); appliedPatches != len(patches) {
return nil, fmt.Errorf("expected %d applied patches, actually %d:\n\n%s", len(patches), appliedPatches, string(out))
}
return mod, nil
}
replace our caching inside GOCACHE with GARBLE_CACHE For each Go package we obfuscate, we need to store information about how we obfuscated it, which is needed when obfuscating its dependents. For example, if A depends on B to use the type B.Foo, A needs to know whether or not B.Foo was obfuscated; it depends on B's use of reflect. We record this information in a gob file, which is cached on disk. To avoid rolling our own custom cache, and since garble is so closely connected with cmd/go already, we piggybacked off of Go's GOCACHE. In particular, for each build cache entry per `go list`'s Export field, we would store a "garble" sibling file with that gob content. However, this was brittle for two reasons: 1) We were doing this without cmd/go's permission or knowledge. We were careful to use filename suffixes similar to Export files, meaning that `go clean` and other commands would treat them the same. However, this could confuse cmd/go at any point in the future. 2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of the build and test caches under control. Right now, this means that every 24h, any file not accessed in the last five days is deleted. However, that trimming heuristic is done per-file. If the trimming removed Garble's sibling file but not the original Export file, this could cause errors such as "cannot load garble export file" which users already ran into. Instead, start using github.com/rogpeppe/go-internal/cache, an exported copy of cmd/go's own cache implementation for GOCACHE. Since we need an entirely separate directory, we introduce GARBLE_CACHE, defaulting to the "garble" directory inside the user's cache directory. For example, on Linux this would be ~/.cache/garble. Inside GARBLE_CACHE, our gob file cache will be under "build", which helps clarify that this cache is used when obfuscating Go builds, and allows placing other kinds of caches inside GARBLE_CACHE. For example, we already have a need for storing linker binaries, which for now still use their own caching mechanism. This commit does not make our cache properly resistant to removed files. The proof is that our seed.txtar testscript still fails the second case. However, we do rewrite all of our caching logic away from Export files, which in itself is a considerable refactor, and we add a few TODOs. One notable change is how we load gob files from dependencies when building the cache entry for the current package. We used to load the gob files from all packages in the Deps field. However, that is the list of all _transitive_ dependencies. Since these gob files are already flat, meaning they contain information about all of their transitive dependencies as well, we need only load the gob files from the direct dependencies, the Imports field. Performance is largely unchanged, since the behavior is similar. However, the change from Deps to Imports saves us some work, which can be seen in the reduced mallocs per obfuscated build. It's unclear why the binary size isn't stable. When reverting the Deps to Imports change, it then settles at 5.386Mi, which is almost exactly in between the two measurements below. I'm not sure why, but that metric appears to be slightly unstable. goos: linux goarch: amd64 pkg: mvdan.cc/garble cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics │ old │ new │ │ sec/op │ sec/op vs base │ Build-8 11.09 ± 1% 11.08 ± 1% ~ (p=0.796 n=10) │ old │ new │ │ bin-B │ bin-B vs base │ Build-8 5.390Mi ± 0% 5.382Mi ± 0% -0.14% (p=0.000 n=10) │ old │ new │ │ cached-sec/op │ cached-sec/op vs base │ Build-8 415.5m ± 4% 421.6m ± 1% ~ (p=0.190 n=10) │ old │ new │ │ mallocs/op │ mallocs/op vs base │ Build-8 35.43M ± 0% 34.05M ± 0% -3.89% (p=0.000 n=10) │ old │ new │ │ sys-sec/op │ sys-sec/op vs base │ Build-8 5.662 ± 1% 5.701 ± 2% ~ (p=0.280 n=10)
1 year ago
// TODO: put linker binaries into fsCache in the main package
func cachePath() (string, error) {
var cacheDir string
if val, ok := os.LookupEnv(garbleCacheDir); ok {
cacheDir = val
} else {
userCacheDir, err := os.UserCacheDir()
if err != nil {
panic(fmt.Errorf("cannot retreive user cache directory: %v", err))
}
cacheDir = userCacheDir
}
cacheDir = filepath.Join(cacheDir, cacheDirName)
if err := os.MkdirAll(cacheDir, 0o777); err != nil {
return "", err
}
goExe := ""
if runtime.GOOS == "windows" {
goExe = ".exe"
}
// Note that we only keep one patched and built linker in the cache.
// If the user switches between Go versions or garble versions often,
// this may result in rebuilds since we don't keep multiple binaries in the cache.
// We can consider keeping multiple versions of the binary in our cache in the future,
// similar to how GOCACHE works with multiple built versions of the same package.
return filepath.Join(cacheDir, "link"+goExe), nil
}
func getCurrentVersion(goVersion, patchesVer string) string {
return goVersion + " " + patchesVer
}
func checkVersion(linkerPath, goVersion, patchesVer string) (bool, error) {
versionPath := linkerPath + versionExt
version, err := os.ReadFile(versionPath)
if os.IsNotExist(err) {
return false, nil
}
if err != nil {
return false, err
}
return string(version) == getCurrentVersion(goVersion, patchesVer), nil
}
func writeVersion(linkerPath, goVersion, patchesVer string) error {
versionPath := linkerPath + versionExt
return os.WriteFile(versionPath, []byte(getCurrentVersion(goVersion, patchesVer)), 0o777)
}
func buildLinker(workingDir string, overlay map[string]string, outputLinkPath string) error {
file, err := json.Marshal(&struct{ Replace map[string]string }{overlay})
if err != nil {
return err
}
overlayPath := filepath.Join(workingDir, "overlay.json")
if err := os.WriteFile(overlayPath, file, 0o777); err != nil {
return err
}
cmd := exec.Command("go", "build", "-overlay", overlayPath, "-o", outputLinkPath, "cmd/link")
use fewer build flags when building std or cmd When we use `go list` on the standard library, we need to be careful about what flags are passed from the top-level build command, because some flags are not going to be appropriate. In particular, GOFLAGS=-modfile=... resulted in a failure, reproduced via the GOFLAGS variable added to linker.txtar: go: inconsistent vendoring in /home/mvdan/tip/src: golang.org/x/crypto@v0.5.1-0.20230203195927-310bfa40f1e4: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod golang.org/x/net@v0.7.0: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod golang.org/x/sys@v0.5.1-0.20230208141308-4fee21c92339: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod golang.org/x/text@v0.7.1-0.20230207171107-30dadde3188b: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod To ignore the vendor directory, use -mod=readonly or -mod=mod. To sync the vendor directory, run: go mod vendor To work around this problem, reset the -mod and -modfile flags when calling "go list" on the standard library, as those are the only two flags which alter how we load the main module in a build. The code which builds a modified cmd/link has a similar problem; it already reset GOOS and GOARCH, but it could similarly run into problems if other env vars like GOFLAGS were set. To be on the safe side, we also disable GOENV and GOEXPERIMENT, which we borrow from Go's bootstrapping commands.
1 year ago
// Ignore any build settings from the environment or GOENV.
// We want to build cmd/link like the rest of the toolchain,
// regardless of what build options are set for the current build.
//
// TODO: a nicer way would be to use the same flags recorded in the current
// cmd/link binary, which can be seen via:
//
// go version -m ~/tip/pkg/tool/linux_amd64/link
//
// and which can be done from Go via debug/buildinfo.ReadFile.
cmd.Env = append(os.Environ(),
"GOENV=off", "GOOS=", "GOARCH=", "GOEXPERIMENT=", "GOFLAGS=",
)
// Building cmd/link is possible from anywhere, but to avoid any possible side effects build in a temp directory
cmd.Dir = workingDir
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("compiler compile error: %v\n\n%s", err, string(out))
}
return nil
}
func PatchLinker(goRoot, goVersion, tempDir string) (string, func(), error) {
// rxVersion looks for a version like "go1.19" or "go1.20"
rxVersion := regexp.MustCompile(`go\d+\.\d+`)
majorGoVersion := rxVersion.FindString(goVersion)
patchesVer, modFiles, patches, err := loadLinkerPatches(majorGoVersion)
if err != nil {
panic(fmt.Errorf("cannot retrieve linker patches: %v", err))
}
outputLinkPath, err := cachePath()
if err != nil {
return "", nil, err
}
mutex := lockedfile.MutexAt(outputLinkPath + ".lock")
unlock, err := mutex.Lock()
if err != nil {
return "", nil, err
}
// If build is successful, mutex unlocking must be on the caller's side
successBuild := false
defer func() {
if !successBuild {
unlock()
}
}()
isCorrectVer, err := checkVersion(outputLinkPath, goVersion, patchesVer)
if err != nil {
return "", nil, err
}
if isCorrectVer && fileExists(outputLinkPath) {
successBuild = true
return outputLinkPath, unlock, nil
}
srcDir := filepath.Join(goRoot, baseSrcSubdir)
workingDir := filepath.Join(tempDir, "linker-src")
overlay, err := applyPatches(srcDir, workingDir, modFiles, patches)
if err != nil {
return "", nil, err
}
if err := buildLinker(workingDir, overlay, outputLinkPath); err != nil {
return "", nil, err
}
if err := writeVersion(outputLinkPath, goVersion, patchesVer); err != nil {
return "", nil, err
}
successBuild = true
return outputLinkPath, unlock, nil
}