mmcloughlin · mmcloughlin · Feb 5, 2020 · Feb 5, 2020 · Feb 5, 2020 · Feb 5, 2020
diff --git a/.golangci.yml b/.golangci.yml
@@ -6,6 +6,7 @@ linters:
     - gosec
     - prealloc
     - funlen
+    - lll
 
 issues:
   exclude-use-default: false
diff --git a/format.go b/format.go
@@ -2,14 +2,13 @@ package main
 
 import (
 	"bytes"
-	"errors"
 	"go/ast"
 	"go/format"
 	"go/parser"
 	"go/token"
+	"regexp"
 	"strings"
 	"unicode"
-	"unicode/utf8"
 
 	"golang.org/x/tools/go/ast/astutil"
 )
@@ -23,18 +22,8 @@ func Format(src []byte) ([]byte, error) {
 		return nil, err
 	}
 
-	// Apply transform.
-	transformed := CommentTransform(f, func(text string) string {
-		newtext, errf := formula(text)
-		if errf != nil {
-			err = errf
-			return text
-		}
-		return newtext
-	})
-	if err != nil {
-		return nil, err
-	}
+	// Process every comment as a formula.
+	transformed := commentreplace(f, formula)
 
 	// Format.
 	buf := bytes.NewBuffer(nil)
@@ -44,19 +33,19 @@ func Format(src []byte) ([]byte, error) {
 	return buf.Bytes(), nil
 }
 
-// CommentTransform applies transform to the text of every comment under the root AST.
-func CommentTransform(root ast.Node, transform func(string) string) ast.Node {
+// commentreplace applies repl function to the text of every comment under the root AST.
+func commentreplace(root ast.Node, repl func(string) string) ast.Node {
 	return astutil.Apply(root, func(c *astutil.Cursor) bool {
 		switch n := c.Node().(type) {
 		case *ast.Comment:
 			c.Replace(&ast.Comment{
 				Slash: n.Slash,
-				Text:  transform(n.Text),
+				Text:  repl(n.Text),
 			})
 		case *ast.File:
 			for _, g := range n.Comments {
 				for _, comment := range g.List {
-					comment.Text = transform(comment.Text)
+					comment.Text = repl(comment.Text)
 				}
 			}
 		}
@@ -66,9 +55,16 @@ func CommentTransform(root ast.Node, transform func(string) string) ast.Node {
 
 // Fixed data structures required for formula processing.
 var (
-	replacer *strings.Replacer // replacer for symbols.
-	super    = map[rune]rune{} // replacement map for superscript characters.
-	sub      = map[rune]rune{} // replacement map for subscript characters.
+	// Symbol replacer.
+	replacer *strings.Replacer
+
+	// Regular expressions for super/subscripts.
+	supregexp *regexp.Regexp
+	subregexp *regexp.Regexp
+
+	// Rune replacement maps.
+	super = map[rune]rune{}
+	sub   = map[rune]rune{}
 )
 
 func init() {
@@ -79,151 +75,55 @@ func init() {
 	}
 	replacer = strings.NewReplacer(oldnew...)
 
-	// Build super/subscript replacement maps.
+	// Build super/subscript character classes and replacement maps.
+	var superclass, subclass []rune
 	for _, char := range chars {
 		if char.Super != None {
+			superclass = append(superclass, char.Char)
 			super[char.Char] = char.Super
 		}
 		if char.Sub != None {
+			subclass = append(subclass, char.Char)
 			sub[char.Char] = char.Sub
 		}
 	}
-}
-
-// formula processes a formula in s, writing the result to w.
-func formula(s string) (string, error) {
-	if len(s) == 0 {
-		return "", nil
-	}
-
-	// Replace symbols.
-	s = replacer.Replace(s)
-
-	// Replace super/subscripts.
-	buf := bytes.NewBuffer(nil)
-	last := None
-	for len(s) > 0 {
-		r, size := utf8.DecodeRuneInString(s)
-
-		// Look for a super/subscript character.
-		var repl map[rune]rune
-		switch r {
-		case '^':
-			repl = super
-		case '_':
-			repl = sub
-		default:
-			buf.WriteRune(r)
-			last = r
-			s = s[size:]
-			continue
-		}
-
-		// Perform replacement.
-		if unicode.IsPrint(last) && !unicode.IsSpace(last) {
-			var err error
-			s, err = supsub(buf, s, repl)
-			if err != nil {
-				return "", err
-			}
-		} else {
-			buf.WriteRune(r)
-			s = s[size:]
-		}
-
-		last = None
-	}
 
-	return buf.String(), nil
+	// Build regular expressions.
+	supregexp = regexp.MustCompile(`(\b[A-Za-z0-9]|\pS)\^(\d+|\{` + charclass(superclass) + `+\}|` + charclass(superclass) + `\s)`)
+	subregexp = regexp.MustCompile(`(\b[A-Za-z]|\pS)_(\d+\b|\{` + charclass(subclass) + `+\})`)
 }
 
-// supsub processes a super/subscript starting at s, writing the result to w.
-// The repl map provides the mapping from runes to the corresponding
-// super/subscripted versions. Note the first character of s should be the "^"
-// or "_" operator.
-func supsub(w *bytes.Buffer, s string, repl map[rune]rune) (string, error) {
-	arg, rest, err := parsearg(s[1:])
-	if err != nil {
-		return "", err
-	}
-
-	// If we could not parse an argument, or its not replaceable, just write the
-	// sub/script operator and return.
-	if len(arg) == 0 || !replaceable(arg, repl) {
-		w.WriteByte(s[0])
-		return s[1:], nil
-	}
-
-	// Perform the replacement.
-	for _, r := range arg {
-		w.WriteRune(repl[r])
-	}
-
-	return rest, nil
+// charclass builds a regular expression character class from a list of runes.
+func charclass(runes []rune) string {
+	return strings.ReplaceAll("["+string(runes)+"]", "-", `\-`)
 }
 
-// parsearg parses the argument to a super/subscript.
-func parsearg(s string) (string, string, error) {
-	if len(s) == 0 {
-		return "", "", nil
-	}
-
-	// Braced.
-	if s[0] == '{' {
-		arg, rest, err := parsebraces(s)
-		if err != nil {
-			return "", "", err
-		}
-		return arg[1 : len(arg)-1], rest, nil
-	}
-
-	// Look for a numeral.
-	i := 0
-	for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
-	}
-	if i > 0 {
-		return s[:i], s[i:], nil
-	}
-
-	// Default to the first rune.
-	_, i = utf8.DecodeRuneInString(s)
-	return s[:i], s[i:], nil
-}
-
-// parsebraces parses matching braces starting at the beginning of s.
-func parsebraces(s string) (string, string, error) {
-	if len(s) == 0 || s[0] != '{' {
-		return "", "", errors.New("expected {")
-	}
-
-	depth := 0
-	for i, r := range s {
-		// Adjust depth if we see open or close brace.
-		switch r {
-		case '{':
-			depth++
-		case '}':
-			depth--
-		}
+// formula processes a formula in s, writing the result to w.
+func formula(s string) string {
+	// Replace symbols.
+	s = replacer.Replace(s)
 
-		// Continue if we have not reached matched braces.
-		if depth > 0 {
-			continue
-		}
+	// Replace superscripts.
+	s = supregexp.ReplaceAllStringFunc(s, subsupreplacer(super))
 
-		// Return the matched braces.
-		return s[:i+1], s[i+1:], nil
-	}
+	// Replace subscripts.
+	s = subregexp.ReplaceAllStringFunc(s, subsupreplacer(sub))
 
-	return "", "", errors.New("unmatched braces")
+	return s
 }
 
-// replaceable returns whether every rune in s has a replacement in repl.
-func replaceable(s string, repl map[rune]rune) bool {
-	for _, r := range s {
-		if _, ok := repl[r]; !ok {
-			return false
+// subsupreplacer builds a replacement function that applies the repl rune map
+// to a matched super/subscript.
+func subsupreplacer(repl map[rune]rune) func(string) string {
+	return func(s string) string {
+		var runes []rune
+		for i, r := range s {
+			if i == 0 || unicode.IsSpace(r) {
+				runes = append(runes, r)
+			} else if repl[r] != None {
+				runes = append(runes, repl[r])
+			}
 		}
+		return string(runes)
 	}
-	return true
 }
diff --git a/format_test.go b/format_test.go
@@ -16,27 +16,31 @@ func TestFormula(t *testing.T) {
 
 		// Symbols.
 		{Name: "basic_symbol", Input: "x +- y", Expect: "x ± y"},
-		{Name: "basic_latex_symbol", Input: "x \\oplus y", Expect: "x ⊕ y"},
+		{Name: "basic_latex_symbol", Input: `x \oplus y`, Expect: "x ⊕ y"},
 		{Name: "multi_symbols", Input: "2 <= x <= 10", Expect: "2 ⩽ x ⩽ 10"},
 
 		// Super/subscripts.
 		{Name: "sup_brace_replaceable", Input: "x^{i+j}ab", Expect: "xⁱ⁺ʲab"},
 		{Name: "sup_numeral_replaceable", Input: "x^123a", Expect: "x¹²³a"},
-		{Name: "sup_char_replaceable", Input: "x^ijk", Expect: "xⁱjk"},
+		{Name: "sup_char_replaceable", Input: "x^ijk", Expect: "x^ijk"},
 
 		{Name: "sup_brace_nonreplaceable", Input: "x^{p+q}pq", Expect: "x^{p+q}pq"},
 		{Name: "sup_char_nonreplaceable", Input: "x^qrs", Expect: "x^qrs"},
 
 		{Name: "sub_brace_replaceable", Input: "x_{i+j}ab", Expect: "xᵢ₊ⱼab"},
-		{Name: "sub_numeral_replaceable", Input: "x_123a", Expect: "x₁₂₃a"},
-		{Name: "sub_char_replaceable", Input: "x_ijk", Expect: "xᵢjk"},
+		{Name: "sub_digit_brace_replaceable", Input: "2_{i+j}ab", Expect: "2_{i+j}ab"},
+		{Name: "sub_numeral_boundary_replaceable", Input: "x_123 a", Expect: "x₁₂₃ a"},
+		{Name: "sub_numeral_non_boundary", Input: "x_123a", Expect: "x_123a"},
+		{Name: "sub_char_replaceable", Input: "x_ijk", Expect: "x_ijk"},
 
 		{Name: "sub_brace_nonreplaceable", Input: "x_{w+x}wx", Expect: "x_{w+x}wx"},
 		{Name: "sub_char_nonreplaceable", Input: "x_wxy", Expect: "x_wxy"},
 
-		// Combination.
-		{Name: "sup_with_symbol", Input: "\\oplus^23", Expect: "⊕²³"},
-		{Name: "sub_with_symbol", Input: "\\oplus_23", Expect: "⊕₂₃"},
+		// Combination of symbols and super/subscripts.
+		{Name: "sup_with_symbol", Input: `\oplus^23`, Expect: "⊕²³"},
+		{Name: "sub_with_symbol", Input: `\oplus_23`, Expect: "⊕₂₃"},
+		{Name: "sup_brace_with_symbol", Input: `\oplus^{i+j}`, Expect: "⊕ⁱ⁺ʲ"},
+		{Name: "sub_brace_with_symbol", Input: `\oplus_{i+j}`, Expect: "⊕ᵢ₊ⱼ"},
 
 		// Malformed.
 		{Name: "sup_first_char", Input: "^a", Expect: "^a"},
@@ -48,25 +52,58 @@ func TestFormula(t *testing.T) {
 		{Name: "sup_space_before", Input: "pre ^a", Expect: "pre ^a"},
 		{Name: "sub_space_before", Input: "pre _a", Expect: "pre _a"},
 
-		{Name: "sup_consecutive", Input: "pre ^^^^^^^a post", Expect: "pre ^^^^^^^a post"},
-		{Name: "sub_consecutive", Input: "pre _______a post", Expect: "pre _______a post"},
-
 		// Regression.
-		{Name: "sup_with_minus", Input: "2^32-1", Expect: "2³²-1"},
+		{
+			Name:   "sup_with_minus",
+			Input:  "2^32-1",
+			Expect: "2³²-1",
+		},
+		{
+			Name:   "exp_with_minus",
+			Input:  "p256Invert calculates |out| = |in|^{-1}",
+			Expect: "p256Invert calculates |out| = |in|⁻¹",
+		},
 	}
 	for _, c := range cases {
 		c := c // scopelint
 		t.Run(c.Name, func(t *testing.T) {
-			got, err := formula(c.Input)
-			if err != nil {
-				t.Fatal(err)
-			}
-			if got != c.Expect {
-				t.Logf("input  = %q", c.Input)
-				t.Logf("got    = %q", got)
-				t.Logf("expect = %q", c.Expect)
-				t.FailNow()
-			}
+			AssertFormulaOutput(t, c.Input, c.Expect)
 		})
 	}
 }
+
+func TestFormulaNoChange(t *testing.T) {
+	// Regression tests for inputs that should have been left alone.
+	cases := []string{
+		// golang.org/x/crypto
+		"\"_acme-challenge\" name of the domain being validated.",                                                            // subscript "_a"
+		"echo -n cert | base64 | tr -d '=' | tr '/+' '_-'",                                                                   // subscript "_-"
+		"thumbprint is precomputed for testKeyEC in jws_test.go",                                                             // subscript "_t"
+		"The \"signature_algorithms\" extension, if present, limits the key exchange",                                        // subscript "_a"
+		"testGetCertificate_tokenCache tests the fallback of token certificate fetches",                                      // subscript "_t"
+		"https://en.wikipedia.org/wiki/Automated_Certificate_Management_Environment#CAs_&_PKIs_that_offer_ACME_certificates", // subscripts in URL
+		"g8TuAS9g5zhq8ELQ3kmjr-KV86GAMgI6VAcGlq3QrzpTCf_30Ab7-zawrfRaFON",                                                    // subscript "_30"
+		"JAumQ_I2fjj98_97mk3ihOY4AgVdCDj1z_GCoZkG5Rq7nbCGyosyKWyDX00Zs-n",                                                    // subscript "_97"
+		"xiToPMinus1Over3 is ξ^((p-1)/3) where ξ = i+3.",                                                                     // superscript "^("
+		"FrobeniusP2 computes (xτ²+yτ+z)^(p²) = xτ^(2p²) + yτ^(p²) + z",                                                      // superscript "^("
+		"x for a moment, then after applying the Frobenius, we have x̄ω^(2p)",                                                // superscript "^("
+		"x̄ξ^((p-1)/3)ω² and applying the inverse isomorphism eliminates the",                                                // superscript "^("
+		"be called when the vector facility is available. Implementation in asm_s390x.s.",                                    // subscript "_s"
+		"[1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf",                                           // subscript "_202"
+		"Cert generated by ssh-keygen OpenSSH_6.8p1 OS X 10.10.3",                                                            // subscript "_6"
+	}
+	for _, input := range cases {
+		AssertFormulaOutput(t, input, input)
+	}
+}
+
+func AssertFormulaOutput(t *testing.T, input, expect string) {
+	t.Helper()
+	got := formula(input)
+	if got != expect {
+		t.Logf("input  = %q", input)
+		t.Logf("got    = %q", got)
+		t.Logf("expect = %q", expect)
+		t.Fail()
+	}
+}
diff --git a/testdata/poly1305.in b/testdata/poly1305.in
@@ -133,7 +133,7 @@ func shiftRightBy2(a uint128) uint128 {
 // updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
 // 128 bits of message, it computes
 //
-//     h_+ = (h + m) * r  mod  2^130 - 5
+//     h_{+} = (h + m) * r  mod  2^130 - 5
 //
 // If the msg length is not a multiple of TagSize, it assumes the last
 // incomplete chunk is the final one.
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ linters: @@
         - gosec
         - prealloc
         - funlen
+        - lll
     issues:
       exclude-use-default: false