|
|
|
// Copyright (c) 2020, The Garble Authors.
|
|
|
|
// See LICENSE for licensing information.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
_ "embed"
|
|
|
|
"flag"
|
|
|
|
"fmt"
|
|
|
|
"math/rand/v2"
|
|
|
|
"os"
|
|
|
|
"os/exec"
|
|
|
|
"path/filepath"
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
"regexp"
|
|
|
|
"strconv"
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
"strings"
|
|
|
|
"testing"
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/go-quicktest/qt"
|
|
|
|
)
|
|
|
|
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
//go:embed testdata/bench/main.go
|
|
|
|
var benchSourceMain []byte
|
|
|
|
|
|
|
|
var (
|
|
|
|
rxBuiltRuntime = regexp.MustCompile(`(?m)^runtime$`)
|
|
|
|
rxBuiltMain = regexp.MustCompile(`(?m)^test/main$`)
|
|
|
|
)
|
|
|
|
|
|
|
|
// BenchmarkBuild is a benchmark for 'garble build' on a fairly simple
|
|
|
|
// main package with a handful of standard library depedencies.
|
|
|
|
//
|
|
|
|
// We use a real garble binary and exec it, to simulate what the real user would
|
|
|
|
// run. The real obfuscation and compilation will happen in sub-processes
|
|
|
|
// anyway, so skipping one exec layer doesn't help us in any way.
|
|
|
|
//
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
// The benchmark isn't parallel, because in practice users build once at a time,
|
|
|
|
// and each build already spawns concurrent processes and goroutines to do work.
|
|
|
|
//
|
|
|
|
// At the moment, each iteration takes 1-2s on a laptop, so we can't make the
|
|
|
|
// benchmark include any more features unless we make it significantly faster.
|
|
|
|
func BenchmarkBuild(b *testing.B) {
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
// As of Go 1.17, using -benchtime=Nx with N larger than 1 results in two
|
|
|
|
// calls to BenchmarkBuild, with the first having b.N==1 to discover
|
|
|
|
// sub-benchmarks. Unfortunately, we do a significant amount of work both
|
|
|
|
// during setup and during that first iteration, which is pointless.
|
|
|
|
// To avoid that, detect the scenario in a hacky way, and return early.
|
|
|
|
// See https://github.com/golang/go/issues/32051.
|
|
|
|
benchtime := flag.Lookup("test.benchtime").Value.String()
|
|
|
|
if b.N == 1 && strings.HasSuffix(benchtime, "x") && benchtime != "1x" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
tdir := b.TempDir()
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
|
|
|
|
// We collect extra metrics.
|
|
|
|
var memoryAllocs, cachedTime, systemTime int64
|
|
|
|
|
|
|
|
outputBin := filepath.Join(tdir, "output")
|
|
|
|
sourceDir := filepath.Join(tdir, "src")
|
|
|
|
qt.Assert(b, qt.IsNil(os.Mkdir(sourceDir, 0o777)))
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
|
|
|
|
writeSourceFile := func(name string, content []byte) {
|
|
|
|
err := os.WriteFile(filepath.Join(sourceDir, name), content, 0o666)
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
}
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
writeSourceFile("go.mod", []byte("module test/main"))
|
|
|
|
writeSourceFile("main.go", benchSourceMain)
|
|
|
|
|
|
|
|
rxGarbleAllocs := regexp.MustCompile(`(?m)^garble allocs: ([0-9]+)`)
|
|
|
|
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
b.ResetTimer()
|
|
|
|
b.StopTimer()
|
|
|
|
for i := range b.N {
|
replace our caching inside GOCACHE with GARBLE_CACHE
For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.
We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.
However, this was brittle for two reasons:
1) We were doing this without cmd/go's permission or knowledge.
We were careful to use filename suffixes similar to Export files,
meaning that `go clean` and other commands would treat them the same.
However, this could confuse cmd/go at any point in the future.
2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
the build and test caches under control. Right now, this means that
every 24h, any file not accessed in the last five days is deleted.
However, that trimming heuristic is done per-file.
If the trimming removed Garble's sibling file but not the original
Export file, this could cause errors such as
"cannot load garble export file" which users already ran into.
Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.
Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.
This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.
One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.
Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.
It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.
goos: linux
goarch: amd64
pkg: mvdan.cc/garble
cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
│ old │ new │
│ sec/op │ sec/op vs base │
Build-8 11.09 ± 1% 11.08 ± 1% ~ (p=0.796 n=10)
│ old │ new │
│ bin-B │ bin-B vs base │
Build-8 5.390Mi ± 0% 5.382Mi ± 0% -0.14% (p=0.000 n=10)
│ old │ new │
│ cached-sec/op │ cached-sec/op vs base │
Build-8 415.5m ± 4% 421.6m ± 1% ~ (p=0.190 n=10)
│ old │ new │
│ mallocs/op │ mallocs/op vs base │
Build-8 35.43M ± 0% 34.05M ± 0% -3.89% (p=0.000 n=10)
│ old │ new │
│ sys-sec/op │ sys-sec/op vs base │
Build-8 5.662 ± 1% 5.701 ± 2% ~ (p=0.280 n=10)
2 years ago
|
|
|
// First we do a fresh build, using empty cache directories,
|
|
|
|
// and the second does an incremental rebuild reusing the same cache directories.
|
|
|
|
goCache := filepath.Join(tdir, "go-cache")
|
|
|
|
qt.Assert(b, qt.IsNil(os.RemoveAll(goCache)))
|
|
|
|
qt.Assert(b, qt.IsNil(os.Mkdir(goCache, 0o777)))
|
replace our caching inside GOCACHE with GARBLE_CACHE
For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.
We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.
However, this was brittle for two reasons:
1) We were doing this without cmd/go's permission or knowledge.
We were careful to use filename suffixes similar to Export files,
meaning that `go clean` and other commands would treat them the same.
However, this could confuse cmd/go at any point in the future.
2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
the build and test caches under control. Right now, this means that
every 24h, any file not accessed in the last five days is deleted.
However, that trimming heuristic is done per-file.
If the trimming removed Garble's sibling file but not the original
Export file, this could cause errors such as
"cannot load garble export file" which users already ran into.
Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.
Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.
This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.
One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.
Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.
It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.
goos: linux
goarch: amd64
pkg: mvdan.cc/garble
cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
│ old │ new │
│ sec/op │ sec/op vs base │
Build-8 11.09 ± 1% 11.08 ± 1% ~ (p=0.796 n=10)
│ old │ new │
│ bin-B │ bin-B vs base │
Build-8 5.390Mi ± 0% 5.382Mi ± 0% -0.14% (p=0.000 n=10)
│ old │ new │
│ cached-sec/op │ cached-sec/op vs base │
Build-8 415.5m ± 4% 421.6m ± 1% ~ (p=0.190 n=10)
│ old │ new │
│ mallocs/op │ mallocs/op vs base │
Build-8 35.43M ± 0% 34.05M ± 0% -3.89% (p=0.000 n=10)
│ old │ new │
│ sys-sec/op │ sys-sec/op vs base │
Build-8 5.662 ± 1% 5.701 ± 2% ~ (p=0.280 n=10)
2 years ago
|
|
|
garbleCache := filepath.Join(tdir, "garble-cache")
|
|
|
|
qt.Assert(b, qt.IsNil(os.RemoveAll(garbleCache)))
|
|
|
|
qt.Assert(b, qt.IsNil(os.Mkdir(garbleCache, 0o777)))
|
|
|
|
env := []string{
|
|
|
|
"RUN_GARBLE_MAIN=true",
|
|
|
|
"GOCACHE=" + goCache,
|
|
|
|
"GARBLE_CACHE=" + garbleCache,
|
|
|
|
"GARBLE_WRITE_ALLOCS=true",
|
|
|
|
}
|
|
|
|
if prof := flag.Lookup("test.cpuprofile").Value.String(); prof != "" {
|
|
|
|
// Ensure the directory is empty and created, and pass it along, so that the garble
|
|
|
|
// sub-processes can also write CPU profiles.
|
|
|
|
// Collect and then merge the profiles as follows:
|
|
|
|
//
|
|
|
|
// go test -run=- -vet=off -bench=. -benchtime=5x -cpuprofile=cpu.pprof
|
|
|
|
// go tool pprof -proto cpu.pprof cpu.pprof-subproc/* >merged.pprof
|
|
|
|
dir, err := filepath.Abs(prof + "-subproc")
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
err = os.RemoveAll(dir)
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
err = os.MkdirAll(dir, 0o777)
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
env = append(env, "GARBLE_WRITE_CPUPROFILES="+dir)
|
|
|
|
}
|
|
|
|
if prof := flag.Lookup("test.memprofile").Value.String(); prof != "" {
|
|
|
|
// Same as before, but for allocation profiles.
|
|
|
|
dir, err := filepath.Abs(prof + "-subproc")
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
err = os.RemoveAll(dir)
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
err = os.MkdirAll(dir, 0o777)
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
env = append(env, "GARBLE_WRITE_MEMPROFILES="+dir)
|
|
|
|
}
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
args := []string{"build", "-v", "-o=" + outputBin, sourceDir}
|
|
|
|
|
|
|
|
for _, cached := range []bool{false, true} {
|
|
|
|
// The cached rebuild will reuse all dependencies,
|
|
|
|
// but rebuild the main package itself.
|
|
|
|
if cached {
|
|
|
|
writeSourceFile("rebuild.go", fmt.Appendf(nil, "package main\nvar v%d int", i))
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd := exec.Command(os.Args[0], args...)
|
|
|
|
cmd.Env = append(cmd.Environ(), env...)
|
|
|
|
cmd.Dir = sourceDir
|
|
|
|
|
|
|
|
cachedStart := time.Now()
|
|
|
|
b.StartTimer()
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
b.StopTimer()
|
|
|
|
if cached {
|
|
|
|
cachedTime += time.Since(cachedStart).Nanoseconds()
|
|
|
|
}
|
|
|
|
|
|
|
|
qt.Assert(b, qt.IsNil(err), qt.Commentf("output: %s", out))
|
|
|
|
if !cached {
|
|
|
|
// Ensure that we built all packages, as expected.
|
|
|
|
qt.Assert(b, qt.IsTrue(rxBuiltRuntime.Match(out)))
|
|
|
|
} else {
|
|
|
|
// Ensure that we only rebuilt the main package, as expected.
|
|
|
|
qt.Assert(b, qt.IsFalse(rxBuiltRuntime.Match(out)))
|
|
|
|
}
|
|
|
|
qt.Assert(b, qt.IsTrue(rxBuiltMain.Match(out)))
|
|
|
|
|
|
|
|
matches := rxGarbleAllocs.FindAllSubmatch(out, -1)
|
|
|
|
if !cached {
|
|
|
|
// The non-cached version should have at least a handful of
|
|
|
|
// sub-processes; catch if our logic breaks.
|
|
|
|
qt.Assert(b, qt.IsTrue(len(matches) > 5))
|
|
|
|
}
|
|
|
|
for _, match := range matches {
|
|
|
|
allocs, err := strconv.ParseInt(string(match[1]), 10, 64)
|
|
|
|
qt.Assert(b, qt.IsNil(err))
|
|
|
|
memoryAllocs += allocs
|
|
|
|
}
|
|
|
|
|
|
|
|
systemTime += int64(cmd.ProcessState.SystemTime())
|
|
|
|
}
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
}
|
|
|
|
// We can't use "allocs/op" as it's reserved for ReportAllocs.
|
|
|
|
b.ReportMetric(float64(memoryAllocs)/float64(b.N), "mallocs/op")
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
b.ReportMetric(float64(cachedTime)/float64(b.N), "cached-ns/op")
|
|
|
|
b.ReportMetric(float64(systemTime)/float64(b.N), "sys-ns/op")
|
|
|
|
info, err := os.Stat(outputBin)
|
|
|
|
if err != nil {
|
|
|
|
b.Fatal(err)
|
obfuscate unexported names like exported ones (#227)
In 90fa325da7, the obfuscation logic was changed to use hashes for
exported names, but incremental names starting at just one letter for
unexported names. Presumably, this was done for the sake of binary size.
I argue that this is not a good idea for the default mode for a number
of reasons:
1) It makes reversing of stack traces nearly impossible for unexported
names, since replacing an obfuscated name "c" with "originalName"
would trigger too many false positives by matching single characters.
2) Exported and unexported names aren't different. We need to know how
names were obfuscated at a later time in both cases, thanks to use
cases like -ldflags=-X. Using short names for one but not the other
doesn't make a lot of sense, and makes the logic inconsistent.
3) Shaving off three bytes for unexported names doesn't seem like a huge
deal for the default mode, when we already have -tiny to optimize for
size.
This saves us a bit of work, but most importantly, simplifies the
obfuscation state as we no longer need to carry privateNameMap between
the compile and link stages.
name old time/op new time/op delta
Build-8 153ms ± 2% 150ms ± 2% ~ (p=0.065 n=6+6)
name old bin-B new bin-B delta
Build-8 7.09M ± 0% 7.08M ± 0% -0.24% (p=0.002 n=6+6)
name old sys-time/op new sys-time/op delta
Build-8 296ms ± 5% 277ms ± 6% -6.50% (p=0.026 n=6+6)
name old user-time/op new user-time/op delta
Build-8 562ms ± 1% 558ms ± 3% ~ (p=0.329 n=5+6)
Note that I do not oppose using short names for both exported and
unexported names in the future for -tiny, since reversing of stack
traces will by design not work there. The code can be resurrected from
the git history if we want to improve -tiny that way in the future, as
we'd need to store state in header files again.
Another major cleanup we can do here is to no longer use the
garbledImports map. From a look at obfuscateImports, we hash a package's
import path with its action ID, much like exported names, so we can
simply re-do that hashing for the linker's -X flag.
garbledImports does have some logic to handle duplicate package names,
but it's worth noting that should not affect package paths, as they are
always unique. That area of code could probably do with some
simplification in the future, too.
While at it, make hashWith panic if either parameter is empty.
obfuscateImports was hashing the main package path without a salt due to
a bug, so we want to catch those in the future.
Finally, make some tiny spacing and typo tweaks to the README.
4 years ago
|
|
|
}
|
redesign benchmark to be more useful and realistic
First, join the two benchmarks into one.
The previous "cached" benchmark was borderline pointless,
as it built the same package with the existing output binary,
so it would quickly realise it had nothing to do and take ~100ms.
The previous "noncached" benchmark input had no dependencies,
so it was only really benchmarking the non-obfuscation of the runtime.
All in all, neither benchmark measured obfuscating multiple packages.
The new benchmark reuses the "cached" input, but with GOCACHE="*",
meaning that we now obfuscate dozens of standard library packages.
Each iteration first does a built from scratch, the worst case scenario,
and then does an incremental rebuild of just the main package,
which is the closest to a best case scenario without being a no-op.
Since each iteration now performs both kinds of builds,
we include a new "cached-time" metric to report what portion of the
"time" metric corresponds to the incremental build.
Thus, we can see a clean build takes ~11s, and a cached takes ~0.3s:
name time/op
Build-16 11.6s ± 1%
name bin-B
Build-16 5.34M ± 0%
name cached-time/op
Build-16 326ms ± 5%
name sys-time/op
Build-16 184ms ±13%
name user-time/op
Build-16 611ms ± 5%
The benchmark is also no logner parallel; see the docs.
Note that the old benchmark also reported bin-B incorrectly,
as it looked at the binary size of garble itself, not the input program.
3 years ago
|
|
|
b.ReportMetric(float64(info.Size()), "bin-B")
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkAbiOriginalNames(b *testing.B) {
|
|
|
|
// Benchmark two thousand obfuscated names in _originalNamePairs
|
|
|
|
// and a variety of input strings to reverse.
|
|
|
|
// As an example, the cmd/go binary ends up with about 2200 entries
|
|
|
|
// in _originalNamePairs as of November 2024, so it's a realistic figure.
|
|
|
|
// Structs with tens of fields are also relatively normal.
|
|
|
|
salt := []byte("some salt bytes")
|
|
|
|
for n := range 2000 {
|
|
|
|
name := fmt.Sprintf("name_%d", n)
|
|
|
|
garbled := hashWithCustomSalt(salt, name)
|
|
|
|
_originalNamePairs = append(_originalNamePairs, garbled, name)
|
|
|
|
}
|
|
|
|
_originalNamesInit()
|
|
|
|
// Pick twenty obfuscated names at random to use as inputs below.
|
|
|
|
// Use a deterministic random source so it's stable between benchmark runs.
|
|
|
|
rnd := rand.New(rand.NewPCG(1, 2))
|
|
|
|
var chosen []string
|
|
|
|
for i := 0; i < len(_originalNamePairs); i += 2 {
|
|
|
|
chosen = append(chosen, _originalNamePairs[i])
|
|
|
|
}
|
|
|
|
rnd.Shuffle(len(chosen), func(i, j int) {
|
|
|
|
chosen[i], chosen[j] = chosen[j], chosen[i]
|
|
|
|
})
|
|
|
|
chosen = chosen[:20]
|
|
|
|
|
|
|
|
inputs := []string{
|
|
|
|
// non-obfuscated names and types
|
|
|
|
"Error",
|
|
|
|
"int",
|
|
|
|
"*[]*interface {}",
|
|
|
|
"*map[uint64]bool",
|
|
|
|
// an obfuscated name
|
|
|
|
chosen[0],
|
|
|
|
// an obfuscated *pkg.Name
|
|
|
|
fmt.Sprintf("*%s.%s", chosen[1], chosen[2]),
|
|
|
|
// big struct with more than a dozen string field types
|
|
|
|
fmt.Sprintf("struct { %s string }", strings.Join(chosen[3:], " string ")),
|
|
|
|
}
|
|
|
|
|
|
|
|
var inputBytes int
|
|
|
|
for _, input := range inputs {
|
|
|
|
inputBytes += len(input)
|
|
|
|
}
|
|
|
|
b.SetBytes(int64(inputBytes))
|
|
|
|
b.ReportAllocs()
|
|
|
|
b.ResetTimer()
|
|
|
|
|
|
|
|
// We use a parallel benchmark because internal/abi's Name method
|
|
|
|
// is meant to be called by any goroutine at any time.
|
|
|
|
b.RunParallel(func(pb *testing.PB) {
|
|
|
|
for pb.Next() {
|
|
|
|
for _, input := range inputs {
|
|
|
|
_originalNames(input)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
_originalNamePairs = []string{}
|
|
|
|
_originalNamesReplacer = nil
|
|
|
|
}
|