replace go/parser with go/scanner in printFile

printFile is one of the functions to blame for most of the CPU cost and
allocations for garble itself, as reported by `perf record` for a clean build.

One contributor is how we print each file and then parse it again,
which we did for the sake of inserting line directives correctly.

With a bit of care, we can do this by tokenizing after printing,
as opposed to parsing into a full go/ast again.

This is moderately cheaper, but more than anything, allocates far less.
That is to be expected given how go/ast is a tree of pointers,
whereas go/scanner simply gives us a stream of tokens.

	name      old time/op         new time/op         delta
	Build-16          10.4s ± 2%          10.3s ± 1%    ~     (p=0.393 n=10+10)

	name      old bin-B           new bin-B           delta
	Build-16          5.51M ± 0%          5.51M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          398ms ±12%          391ms ±10%    ~     (p=0.529 n=10+10)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          34.4M ± 0%          31.8M ± 0%  -7.65%  (p=0.000 n=10+10)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.80s ± 6%          5.86s ± 4%    ~     (p=0.218 n=10+10)

The new code is shorter, but perhaps a bit trickier,
so I also added more comments to explain what's going on.

Note how the time/op change is practically noise,
but mallocs/op goes down significantly, which is always a good sign.
pull/556/head
Daniel Martí 2 years ago committed by lu4p
parent 21bd89ff73
commit d2622e8223

@ -7,12 +7,11 @@ import (
"bytes"
"fmt"
"go/ast"
"go/parser"
"go/printer"
"go/scanner"
"go/token"
"path/filepath"
"strings"
"golang.org/x/exp/slices"
)
var printBuf1, printBuf2 bytes.Buffer
@ -20,11 +19,11 @@ var printBuf1, printBuf2 bytes.Buffer
// printFile prints a Go file to a buffer, while also removing non-directive
// comments and adding extra compiler directives to obfuscate position
// information.
func printFile(file1 *ast.File) ([]byte, error) {
func printFile(file *ast.File) ([]byte, error) {
printConfig := printer.Config{Mode: printer.RawFormat}
printBuf1.Reset()
if err := printConfig.Fprint(&printBuf1, fset, file1); err != nil {
if err := printConfig.Fprint(&printBuf1, fset, file); err != nil {
return nil, err
}
src := printBuf1.Bytes()
@ -36,7 +35,8 @@ func printFile(file1 *ast.File) ([]byte, error) {
return src, nil
}
filename := filepath.Base(fset.Position(file1.Pos()).Filename)
fsetFile := fset.File(file.Pos())
filename := filepath.Base(fsetFile.Name())
if strings.HasPrefix(filename, "_cgo_") {
// cgo-generated files don't need changed line numbers.
// Plus, the compiler can complain rather easily.
@ -47,78 +47,25 @@ func printFile(file1 *ast.File) ([]byte, error) {
// Unfortunately, comments are free-floating in File.Comments,
// and those are the only source of truth that go/printer uses.
// So the positions of the comments in the given file are wrong.
// The only way we can get the final ones is to parse again.
//
// We use an empty filename here.
// Syntax errors should be rare, and when they do happen,
// we don't want to point to the original source file on disk.
// That would be confusing, as we've changed the source in memory.
file2, err := parser.ParseFile(fset, "", src, parser.SkipObjectResolution|parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("re-parse error: %w", err)
}
// Remove any comments by making them whitespace.
// Keep directives, as they affect the build.
// This is superior to removing the comments before printing,
// as otherwise 'garble reverse' would show different line numbers.
for _, group := range file2.Comments {
for _, comment := range group.List {
if strings.HasPrefix(comment.Text, "//go:") {
continue
}
start := fset.Position(comment.Pos()).Offset
end := fset.Position(comment.End()).Offset
for i := start; i < end; i++ {
src[i] = ' '
}
}
}
// The only way we can get the final ones is to tokenize again.
// Using go/scanner is slightly awkward, but cheaper than parsing again.
// We want to use the original positions for the hashed positions.
var origCallExprs []*ast.CallExpr
ast.Inspect(file1, func(node ast.Node) bool {
if node, ok := node.(*ast.CallExpr); ok {
origCallExprs = append(origCallExprs, node)
}
return true
})
type commentToAdd struct {
offset int
text string
}
var toAdd []commentToAdd
i := 0
ast.Inspect(file2, func(node ast.Node) bool {
node, ok := node.(*ast.CallExpr)
if !ok {
return true
// Since later we'll iterate on tokens rather than walking an AST,
// we use a list of offsets indexed by identifiers in source order.
var origCallOffsets []int
nextOffset := -1
ast.Inspect(file, func(node ast.Node) bool {
switch node := node.(type) {
case *ast.CallExpr:
nextOffset = fsetFile.Position(node.Pos()).Offset
case *ast.Ident:
origCallOffsets = append(origCallOffsets, nextOffset)
nextOffset = -1
}
origNode := origCallExprs[i]
i++
newName := ""
if !flagTiny {
origPos := fmt.Sprintf("%s:%d", filename, fset.Position(origNode.Pos()).Offset)
newName = hashWithPackage(curPkg, origPos) + ".go"
// log.Printf("%q hashed with %x to %q", origPos, curPkg.GarbleActionID, newName)
}
pos := fset.Position(node.Pos())
// We use the "/*text*/" form, since we can use multiple of them
// on a single line, and they don't require extra newlines.
toAdd = append(toAdd, commentToAdd{
offset: pos.Offset,
text: fmt.Sprintf("/*line %s:1*/", newName),
})
return true
})
// We add comments in order.
slices.SortFunc(toAdd, func(a, b commentToAdd) bool {
return a.offset < b.offset
})
copied := 0
printBuf2.Reset()
@ -128,19 +75,58 @@ func printFile(file1 *ast.File) ([]byte, error) {
// toAdd is for /*-style comments, so add it to printBuf2 directly.
printBuf2.WriteString("//line :1\n")
for _, comment := range toAdd {
printBuf2.Write(src[copied:comment.offset])
copied = comment.offset
// We use an empty filename when tokenizing below.
// We use a nil go/scanner.ErrorHandler because src comes from go/printer.
// Syntax errors should be rare, and when they do happen,
// we don't want to point to the original source file on disk.
// That would be confusing, as we've changed the source in memory.
var s scanner.Scanner
fsetFile = fset.AddFile("", fset.Base(), len(src))
s.Init(fsetFile, src, nil, scanner.ScanComments)
identIndex := 0
for {
pos, tok, lit := s.Scan()
switch tok {
case token.EOF:
// Copy the rest and return.
printBuf2.Write(src[copied:])
return printBuf2.Bytes(), nil
case token.COMMENT:
// Omit comments from the final Go code.
// Keep directives, as they affect the build.
// This is superior to removing the comments before printing,
// because then the final source would have different line numbers.
if strings.HasPrefix(lit, "//go:") {
continue // directives are kept
}
offset := fsetFile.Position(pos).Offset
printBuf2.Write(src[copied:offset])
copied = offset + len(lit)
case token.IDENT:
origOffset := origCallOffsets[identIndex]
identIndex++
if origOffset == -1 {
continue // identifiers which don't start func calls are left untouched
}
newName := ""
if !flagTiny {
origPos := fmt.Sprintf("%s:%d", filename, origOffset)
newName = hashWithPackage(curPkg, origPos) + ".go"
// log.Printf("%q hashed with %x to %q", origPos, curPkg.GarbleActionID, newName)
}
// We assume that all comments are of the form "/*text*/".
// Make sure there is whitespace at either side of a comment.
// Otherwise, we could change the syntax of the program.
// Inserting "/*text*/" in "a/b" // must be "a/ /*text*/ b",
// as "a//*text*/b" is tokenized as a "//" comment.
printBuf2.WriteByte(' ')
printBuf2.WriteString(comment.text)
printBuf2.WriteByte(' ')
offset := fsetFile.Position(pos).Offset
printBuf2.Write(src[copied:offset])
copied = offset
// We use the "/*text*/" form, since we can use multiple of them
// on a single line, and they don't require extra newlines.
// Make sure there is whitespace at either side of a comment.
// Otherwise, we could change the syntax of the program.
// Inserting "/*text*/" in "a/b" // must be "a/ /*text*/ b",
// as "a//*text*/b" is tokenized as a "//" comment.
fmt.Fprintf(&printBuf2, " /*line %s:1*/ ", newName)
}
}
printBuf2.Write(src[copied:])
return printBuf2.Bytes(), nil
}

Loading…
Cancel
Save