From ec5b6df439189874103ef572eaa34d8d46e1bc22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sun, 24 Nov 2024 13:18:52 +0000 Subject: [PATCH] support collecting deep cpu and memory profiles from benchmarks This allows me to collect a full CPU profile, showing us that we clearly spend too much CPU time in garbage collection. When collecting a full memory profile, we can see where the allocations come from now: Showing nodes accounting for 5636770, 31.07% of 18141579 total Dropped 630 nodes (cum <= 90707) Showing top 10 nodes out of 278 flat flat% sum% cum cum% 1692229 9.33% 9.33% 1910679 10.53% encoding/gob.decStringSlice 1005596 5.54% 14.87% 1005596 5.54% golang.org/x/tools/go/ssa.buildReferrers 458753 2.53% 17.40% 458753 2.53% go/scanner.(*Scanner).scanIdentifier 458752 2.53% 19.93% 458752 2.53% reflect.(*structType).Field 425984 2.35% 22.28% 448136 2.47% go/parser.(*parser).parseIdent 390049 2.15% 24.43% 390049 2.15% golang.org/x/tools/go/ssa.(*BasicBlock).emit 327683 1.81% 26.23% 371373 2.05% golang.org/x/tools/go/ssa.NewConst 311296 1.72% 27.95% 1024551 5.65% mvdan.cc/garble.(*transformer).transformGoFile.func1 287891 1.59% 29.54% 287891 1.59% encoding/gob.decString 278537 1.54% 31.07% 657043 3.62% golang.org/x/tools/go/ssa.(*builder).compLit This method can work for any invocation of garble, but for now we only directly wire it up for `go test -bench`. It can still be used for regular invocations of `garble build`. --- bench_test.go | 25 +++++++++++++++++++++++++ main.go | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/bench_test.go b/bench_test.go index 7a9199e..5b8aad1 100644 --- a/bench_test.go +++ b/bench_test.go @@ -85,6 +85,31 @@ func BenchmarkBuild(b *testing.B) { "GARBLE_CACHE=" + garbleCache, "GARBLE_WRITE_ALLOCS=true", } + if prof := flag.Lookup("test.cpuprofile").Value.String(); prof != "" { + // Ensure the directory is empty and created, and pass it along, so that the garble + // sub-processes can also write CPU profiles. + // Collect and then merge the profiles as follows: + // + // go test -run=- -vet=off -bench=. -benchtime=5x -cpuprofile=cpu.pprof + // go tool pprof -proto cpu.pprof cpu.pprof-subproc/* >merged.pprof + dir, err := filepath.Abs(prof + "-subproc") + qt.Assert(b, qt.IsNil(err)) + err = os.RemoveAll(dir) + qt.Assert(b, qt.IsNil(err)) + err = os.MkdirAll(dir, 0o777) + qt.Assert(b, qt.IsNil(err)) + env = append(env, "GARBLE_WRITE_CPUPROFILES="+dir) + } + if prof := flag.Lookup("test.memprofile").Value.String(); prof != "" { + // Same as before, but for allocation profiles. + dir, err := filepath.Abs(prof + "-subproc") + qt.Assert(b, qt.IsNil(err)) + err = os.RemoveAll(dir) + qt.Assert(b, qt.IsNil(err)) + err = os.MkdirAll(dir, 0o777) + qt.Assert(b, qt.IsNil(err)) + env = append(env, "GARBLE_WRITE_MEMPROFILES="+dir) + } args := []string{"build", "-v", "-o=" + outputBin, sourceDir} for _, cached := range []bool{false, true} { diff --git a/main.go b/main.go index a31bd39..8202fb2 100644 --- a/main.go +++ b/main.go @@ -33,6 +33,7 @@ import ( "regexp" "runtime" "runtime/debug" + "runtime/pprof" "slices" "strconv" "strings" @@ -220,13 +221,40 @@ func debugSince(start time.Time) time.Duration { } func main1() int { + if dir := os.Getenv("GARBLE_WRITE_CPUPROFILES"); dir != "" { + f, err := os.CreateTemp(dir, "garble-cpu-*.pprof") + if err != nil { + panic(err) + } + if err := pprof.StartCPUProfile(f); err != nil { + panic(err) + } + defer func() { + pprof.StopCPUProfile() + if err := f.Close(); err != nil { + panic(err) + } + }() + } defer func() { - if os.Getenv("GARBLE_WRITE_ALLOCS") != "true" { - return + if dir := os.Getenv("GARBLE_WRITE_MEMPROFILES"); dir != "" { + f, err := os.CreateTemp(dir, "garble-mem-*.pprof") + if err != nil { + panic(err) + } + runtime.GC() // get up-to-date statistics + if err := pprof.WriteHeapProfile(f); err != nil { + panic(err) + } + if err := f.Close(); err != nil { + panic(err) + } + } + if os.Getenv("GARBLE_WRITE_ALLOCS") == "true" { + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + fmt.Fprintf(os.Stderr, "garble allocs: %d\n", memStats.Mallocs) } - var memStats runtime.MemStats - runtime.ReadMemStats(&memStats) - fmt.Fprintf(os.Stderr, "garble allocs: %d\n", memStats.Mallocs) }() if err := flagSet.Parse(os.Args[1:]); err != nil { return 2