diff --git a/.golangci.yml b/.golangci.yml index 2b5eaff..ecf4e07 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -6,6 +6,7 @@ linters: - gosec - prealloc - funlen + - lll issues: exclude-use-default: false diff --git a/format.go b/format.go index c5ddd3e..99558df 100644 --- a/format.go +++ b/format.go @@ -2,14 +2,13 @@ package main import ( "bytes" - "errors" "go/ast" "go/format" "go/parser" "go/token" + "regexp" "strings" "unicode" - "unicode/utf8" "golang.org/x/tools/go/ast/astutil" ) @@ -23,18 +22,8 @@ func Format(src []byte) ([]byte, error) { return nil, err } - // Apply transform. - transformed := CommentTransform(f, func(text string) string { - newtext, errf := formula(text) - if errf != nil { - err = errf - return text - } - return newtext - }) - if err != nil { - return nil, err - } + // Process every comment as a formula. + transformed := commentreplace(f, formula) // Format. buf := bytes.NewBuffer(nil) @@ -44,19 +33,19 @@ func Format(src []byte) ([]byte, error) { return buf.Bytes(), nil } -// CommentTransform applies transform to the text of every comment under the root AST. -func CommentTransform(root ast.Node, transform func(string) string) ast.Node { +// commentreplace applies repl function to the text of every comment under the root AST. +func commentreplace(root ast.Node, repl func(string) string) ast.Node { return astutil.Apply(root, func(c *astutil.Cursor) bool { switch n := c.Node().(type) { case *ast.Comment: c.Replace(&ast.Comment{ Slash: n.Slash, - Text: transform(n.Text), + Text: repl(n.Text), }) case *ast.File: for _, g := range n.Comments { for _, comment := range g.List { - comment.Text = transform(comment.Text) + comment.Text = repl(comment.Text) } } } @@ -66,9 +55,16 @@ func CommentTransform(root ast.Node, transform func(string) string) ast.Node { // Fixed data structures required for formula processing. var ( - replacer *strings.Replacer // replacer for symbols. - super = map[rune]rune{} // replacement map for superscript characters. - sub = map[rune]rune{} // replacement map for subscript characters. + // Symbol replacer. + replacer *strings.Replacer + + // Regular expressions for super/subscripts. + supregexp *regexp.Regexp + subregexp *regexp.Regexp + + // Rune replacement maps. + super = map[rune]rune{} + sub = map[rune]rune{} ) func init() { @@ -79,151 +75,55 @@ func init() { } replacer = strings.NewReplacer(oldnew...) - // Build super/subscript replacement maps. + // Build super/subscript character classes and replacement maps. + var superclass, subclass []rune for _, char := range chars { if char.Super != None { + superclass = append(superclass, char.Char) super[char.Char] = char.Super } if char.Sub != None { + subclass = append(subclass, char.Char) sub[char.Char] = char.Sub } } -} - -// formula processes a formula in s, writing the result to w. -func formula(s string) (string, error) { - if len(s) == 0 { - return "", nil - } - - // Replace symbols. - s = replacer.Replace(s) - - // Replace super/subscripts. - buf := bytes.NewBuffer(nil) - last := None - for len(s) > 0 { - r, size := utf8.DecodeRuneInString(s) - - // Look for a super/subscript character. - var repl map[rune]rune - switch r { - case '^': - repl = super - case '_': - repl = sub - default: - buf.WriteRune(r) - last = r - s = s[size:] - continue - } - - // Perform replacement. - if unicode.IsPrint(last) && !unicode.IsSpace(last) { - var err error - s, err = supsub(buf, s, repl) - if err != nil { - return "", err - } - } else { - buf.WriteRune(r) - s = s[size:] - } - - last = None - } - return buf.String(), nil + // Build regular expressions. + supregexp = regexp.MustCompile(`(\b[A-Za-z0-9]|\pS)\^(\d+|\{` + charclass(superclass) + `+\}|` + charclass(superclass) + `\s)`) + subregexp = regexp.MustCompile(`(\b[A-Za-z]|\pS)_(\d+\b|\{` + charclass(subclass) + `+\})`) } -// supsub processes a super/subscript starting at s, writing the result to w. -// The repl map provides the mapping from runes to the corresponding -// super/subscripted versions. Note the first character of s should be the "^" -// or "_" operator. -func supsub(w *bytes.Buffer, s string, repl map[rune]rune) (string, error) { - arg, rest, err := parsearg(s[1:]) - if err != nil { - return "", err - } - - // If we could not parse an argument, or its not replaceable, just write the - // sub/script operator and return. - if len(arg) == 0 || !replaceable(arg, repl) { - w.WriteByte(s[0]) - return s[1:], nil - } - - // Perform the replacement. - for _, r := range arg { - w.WriteRune(repl[r]) - } - - return rest, nil +// charclass builds a regular expression character class from a list of runes. +func charclass(runes []rune) string { + return strings.ReplaceAll("["+string(runes)+"]", "-", `\-`) } -// parsearg parses the argument to a super/subscript. -func parsearg(s string) (string, string, error) { - if len(s) == 0 { - return "", "", nil - } - - // Braced. - if s[0] == '{' { - arg, rest, err := parsebraces(s) - if err != nil { - return "", "", err - } - return arg[1 : len(arg)-1], rest, nil - } - - // Look for a numeral. - i := 0 - for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ { - } - if i > 0 { - return s[:i], s[i:], nil - } - - // Default to the first rune. - _, i = utf8.DecodeRuneInString(s) - return s[:i], s[i:], nil -} - -// parsebraces parses matching braces starting at the beginning of s. -func parsebraces(s string) (string, string, error) { - if len(s) == 0 || s[0] != '{' { - return "", "", errors.New("expected {") - } - - depth := 0 - for i, r := range s { - // Adjust depth if we see open or close brace. - switch r { - case '{': - depth++ - case '}': - depth-- - } +// formula processes a formula in s, writing the result to w. +func formula(s string) string { + // Replace symbols. + s = replacer.Replace(s) - // Continue if we have not reached matched braces. - if depth > 0 { - continue - } + // Replace superscripts. + s = supregexp.ReplaceAllStringFunc(s, subsupreplacer(super)) - // Return the matched braces. - return s[:i+1], s[i+1:], nil - } + // Replace subscripts. + s = subregexp.ReplaceAllStringFunc(s, subsupreplacer(sub)) - return "", "", errors.New("unmatched braces") + return s } -// replaceable returns whether every rune in s has a replacement in repl. -func replaceable(s string, repl map[rune]rune) bool { - for _, r := range s { - if _, ok := repl[r]; !ok { - return false +// subsupreplacer builds a replacement function that applies the repl rune map +// to a matched super/subscript. +func subsupreplacer(repl map[rune]rune) func(string) string { + return func(s string) string { + var runes []rune + for i, r := range s { + if i == 0 || unicode.IsSpace(r) { + runes = append(runes, r) + } else if repl[r] != None { + runes = append(runes, repl[r]) + } } + return string(runes) } - return true } diff --git a/format_test.go b/format_test.go index 2811659..f154c42 100644 --- a/format_test.go +++ b/format_test.go @@ -16,27 +16,31 @@ func TestFormula(t *testing.T) { // Symbols. {Name: "basic_symbol", Input: "x +- y", Expect: "x ± y"}, - {Name: "basic_latex_symbol", Input: "x \\oplus y", Expect: "x ⊕ y"}, + {Name: "basic_latex_symbol", Input: `x \oplus y`, Expect: "x ⊕ y"}, {Name: "multi_symbols", Input: "2 <= x <= 10", Expect: "2 ⩽ x ⩽ 10"}, // Super/subscripts. {Name: "sup_brace_replaceable", Input: "x^{i+j}ab", Expect: "xⁱ⁺ʲab"}, {Name: "sup_numeral_replaceable", Input: "x^123a", Expect: "x¹²³a"}, - {Name: "sup_char_replaceable", Input: "x^ijk", Expect: "xⁱjk"}, + {Name: "sup_char_replaceable", Input: "x^ijk", Expect: "x^ijk"}, {Name: "sup_brace_nonreplaceable", Input: "x^{p+q}pq", Expect: "x^{p+q}pq"}, {Name: "sup_char_nonreplaceable", Input: "x^qrs", Expect: "x^qrs"}, {Name: "sub_brace_replaceable", Input: "x_{i+j}ab", Expect: "xᵢ₊ⱼab"}, - {Name: "sub_numeral_replaceable", Input: "x_123a", Expect: "x₁₂₃a"}, - {Name: "sub_char_replaceable", Input: "x_ijk", Expect: "xᵢjk"}, + {Name: "sub_digit_brace_replaceable", Input: "2_{i+j}ab", Expect: "2_{i+j}ab"}, + {Name: "sub_numeral_boundary_replaceable", Input: "x_123 a", Expect: "x₁₂₃ a"}, + {Name: "sub_numeral_non_boundary", Input: "x_123a", Expect: "x_123a"}, + {Name: "sub_char_replaceable", Input: "x_ijk", Expect: "x_ijk"}, {Name: "sub_brace_nonreplaceable", Input: "x_{w+x}wx", Expect: "x_{w+x}wx"}, {Name: "sub_char_nonreplaceable", Input: "x_wxy", Expect: "x_wxy"}, - // Combination. - {Name: "sup_with_symbol", Input: "\\oplus^23", Expect: "⊕²³"}, - {Name: "sub_with_symbol", Input: "\\oplus_23", Expect: "⊕₂₃"}, + // Combination of symbols and super/subscripts. + {Name: "sup_with_symbol", Input: `\oplus^23`, Expect: "⊕²³"}, + {Name: "sub_with_symbol", Input: `\oplus_23`, Expect: "⊕₂₃"}, + {Name: "sup_brace_with_symbol", Input: `\oplus^{i+j}`, Expect: "⊕ⁱ⁺ʲ"}, + {Name: "sub_brace_with_symbol", Input: `\oplus_{i+j}`, Expect: "⊕ᵢ₊ⱼ"}, // Malformed. {Name: "sup_first_char", Input: "^a", Expect: "^a"}, @@ -48,25 +52,58 @@ func TestFormula(t *testing.T) { {Name: "sup_space_before", Input: "pre ^a", Expect: "pre ^a"}, {Name: "sub_space_before", Input: "pre _a", Expect: "pre _a"}, - {Name: "sup_consecutive", Input: "pre ^^^^^^^a post", Expect: "pre ^^^^^^^a post"}, - {Name: "sub_consecutive", Input: "pre _______a post", Expect: "pre _______a post"}, - // Regression. - {Name: "sup_with_minus", Input: "2^32-1", Expect: "2³²-1"}, + { + Name: "sup_with_minus", + Input: "2^32-1", + Expect: "2³²-1", + }, + { + Name: "exp_with_minus", + Input: "p256Invert calculates |out| = |in|^{-1}", + Expect: "p256Invert calculates |out| = |in|⁻¹", + }, } for _, c := range cases { c := c // scopelint t.Run(c.Name, func(t *testing.T) { - got, err := formula(c.Input) - if err != nil { - t.Fatal(err) - } - if got != c.Expect { - t.Logf("input = %q", c.Input) - t.Logf("got = %q", got) - t.Logf("expect = %q", c.Expect) - t.FailNow() - } + AssertFormulaOutput(t, c.Input, c.Expect) }) } } + +func TestFormulaNoChange(t *testing.T) { + // Regression tests for inputs that should have been left alone. + cases := []string{ + // golang.org/x/crypto + "\"_acme-challenge\" name of the domain being validated.", // subscript "_a" + "echo -n cert | base64 | tr -d '=' | tr '/+' '_-'", // subscript "_-" + "thumbprint is precomputed for testKeyEC in jws_test.go", // subscript "_t" + "The \"signature_algorithms\" extension, if present, limits the key exchange", // subscript "_a" + "testGetCertificate_tokenCache tests the fallback of token certificate fetches", // subscript "_t" + "https://en.wikipedia.org/wiki/Automated_Certificate_Management_Environment#CAs_&_PKIs_that_offer_ACME_certificates", // subscripts in URL + "g8TuAS9g5zhq8ELQ3kmjr-KV86GAMgI6VAcGlq3QrzpTCf_30Ab7-zawrfRaFON", // subscript "_30" + "JAumQ_I2fjj98_97mk3ihOY4AgVdCDj1z_GCoZkG5Rq7nbCGyosyKWyDX00Zs-n", // subscript "_97" + "xiToPMinus1Over3 is ξ^((p-1)/3) where ξ = i+3.", // superscript "^(" + "FrobeniusP2 computes (xτ²+yτ+z)^(p²) = xτ^(2p²) + yτ^(p²) + z", // superscript "^(" + "x for a moment, then after applying the Frobenius, we have x̄ω^(2p)", // superscript "^(" + "x̄ξ^((p-1)/3)ω² and applying the inverse isomorphism eliminates the", // superscript "^(" + "be called when the vector facility is available. Implementation in asm_s390x.s.", // subscript "_s" + "[1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf", // subscript "_202" + "Cert generated by ssh-keygen OpenSSH_6.8p1 OS X 10.10.3", // subscript "_6" + } + for _, input := range cases { + AssertFormulaOutput(t, input, input) + } +} + +func AssertFormulaOutput(t *testing.T, input, expect string) { + t.Helper() + got := formula(input) + if got != expect { + t.Logf("input = %q", input) + t.Logf("got = %q", got) + t.Logf("expect = %q", expect) + t.Fail() + } +} diff --git a/testdata/poly1305.in b/testdata/poly1305.in index 92f9c50..199bd1b 100644 --- a/testdata/poly1305.in +++ b/testdata/poly1305.in @@ -133,7 +133,7 @@ func shiftRightBy2(a uint128) uint128 { // updateGeneric absorbs msg into the state.h accumulator. For each chunk m of // 128 bits of message, it computes // -// h_+ = (h + m) * r mod 2^130 - 5 +// h_{+} = (h + m) * r mod 2^130 - 5 // // If the msg length is not a multiple of TagSize, it assumes the last // incomplete chunk is the final one.