Skip to content

Commit

Permalink
Support Unicode superscripts for HTML note markers
Browse files Browse the repository at this point in the history
Since HTML doesn't have semantic "footnote" elements, Pandoc has
historically used the <sup> tag to mark the numeric reference to
footnotes. In some fonts, depending on line-spacing, the common default
<sup> style of "font-size: smaller; vertical-align: super;" doesn't look
very good, spilling beyond the font's cap height and making browsers
add extra space at the top of the text line.

Many fonts include characters from the Unicode superscripts and
subscripts block (https://unicode.org/charts/nameslist/n_2070.html)
which are designed to function as footnote markers. Using these
characters to render note marks, instead of a <sup> tag, yields better
typographical results in these cases without additional CSS. The <sup>
tag is purely typographical so losing it from the output doesn't cost
anything semantically.

This diff adds a --note-style option to pandoc, taking the values
"sup-tag" (the default and hitherto only method) and
"unicode-superscript" (print marks using superscript chars, no
surrounding tag).

Due to the nature of Note output in the HTML writer, a Lua filter cannot
really customize how footnote marks are printed, justifying a writer
option here. An alternative to adding this feature to Pandoc would be
for authors to use CSS like 'a.footnote-ref sup { font-size: inherit;
vertical-align: inherit; font-feature-settings: "sups"; }' which would
work for fonts where the "sups" OpenType feature replaces digits with
their superscript forms. That solution only works for fonts encoding
that feature though; Times New Roman on my system has the superscript
characters but do not support the "sups" OpenType feature.

Future work could extend support for this writer option to plain output
and possibly other formats where note marks are emitted by Pandoc rather
than the renderer of the output document. (The present author has not
studied whether there are such writer formats.)
  • Loading branch information
silby committed Feb 8, 2024
1 parent 9ccd40f commit 9397c9e
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 6 deletions.
12 changes: 12 additions & 0 deletions src/Text/Pandoc/App/CommandLineOptions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,18 @@ options =
"true|false")
"" -- "Use <q> tags for quotes in HTML"

, Option "" ["note-style"]
(ReqArg
(\arg opt -> do
style <- case arg of
"sup-tag" -> return SupTag
"unicode-superscript" -> return UnicodeSuperscript
_ -> optError $ PandocOptionError $ T.pack
"Argument of --note-style must be sup-tag or unicode-superscript"
return opt {optNoteStyle = style })
"sup-tag|unicode-superscript")
"" -- "How to print note marks in HTML"

, Option "" ["email-obfuscation"]
(ReqArg
(\arg opt -> do
Expand Down
7 changes: 6 additions & 1 deletion src/Text/Pandoc/App/Opt.hs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault),
WrapOption (WrapAuto), HTMLMathMethod (PlainMath),
ReferenceLocation (EndOfDocument),
ObfuscationMethod (NoObfuscation),
CiteMethod (Citeproc))
CiteMethod (Citeproc), NoteStyle (SupTag))
import Text.Pandoc.Class (readFileStrict, fileExists, setVerbosity, report,
PandocMonad(lookupEnv), getUserDataDir)
import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError))
Expand Down Expand Up @@ -120,6 +120,7 @@ data Opt = Opt
, optSelfContained :: Bool -- ^ Make HTML accessible offline (deprecated)
, optEmbedResources :: Bool -- ^ Make HTML accessible offline
, optHtmlQTags :: Bool -- ^ Use <q> tags in HTML
, optNoteStyle :: NoteStyle -- ^ How to print note marks in HTML
, optHighlightStyle :: Maybe Text -- ^ Style to use for highlighted code
, optSyntaxDefinitions :: [FilePath] -- ^ xml syntax defs to load
, optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions
Expand Down Expand Up @@ -202,6 +203,7 @@ instance FromJSON Opt where
<*> o .:? "self-contained" .!= optSelfContained defaultOpts
<*> o .:? "embed-resources" .!= optEmbedResources defaultOpts
<*> o .:? "html-q-tags" .!= optHtmlQTags defaultOpts
<*> o .:? "note-style" .!= optNoteStyle defaultOpts
<*> o .:? "highlight-style"
<*> o .:? "syntax-definitions" .!= optSyntaxDefinitions defaultOpts
<*> o .:? "top-level-division" .!= optTopLevelDivision defaultOpts
Expand Down Expand Up @@ -528,6 +530,8 @@ doOpt (k,v) = do
parseJSON v >>= \x -> return (\o -> o{ optEmbedResources = x })
"html-q-tags" ->
parseJSON v >>= \x -> return (\o -> o{ optHtmlQTags = x })
"note-style" ->
parseJSON v >>= \x -> return (\o -> o{ optNoteStyle = x })
"highlight-style" ->
parseJSON v >>= \x -> return (\o -> o{ optHighlightStyle = x })
"syntax-definition" ->
Expand Down Expand Up @@ -739,6 +743,7 @@ defaultOpts = Opt
, optSelfContained = False
, optEmbedResources = False
, optHtmlQTags = False
, optNoteStyle = SupTag
, optHighlightStyle = Just "pygments"
, optSyntaxDefinitions = []
, optTopLevelDivision = TopLevelDefault
Expand Down
1 change: 1 addition & 0 deletions src/Text/Pandoc/App/OutputSettings.hs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ optToOutputSettings scriptingEngine opts = do
, writerReferenceDoc = optReferenceDoc opts
, writerSyntaxMap = syntaxMap
, writerPreferAscii = optAscii opts
, writerNoteStyle = optNoteStyle opts
}
return $ OutputSettings
{ outputFormat = format
Expand Down
20 changes: 20 additions & 0 deletions src/Text/Pandoc/Options.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions
, WriterOptions (..)
, TrackChanges (..)
, ReferenceLocation (..)
, NoteStyle (..)
, def
, isEnabled
, defaultMathJaxURL
Expand Down Expand Up @@ -286,6 +287,23 @@ instance ToJSON ReferenceLocation where
toJSON EndOfSection = "end-of-section"
toJSON EndOfDocument = "end-of-document"

-- | Style for printing note indicators in HTML output
data NoteStyle = SupTag -- Numbers in @<sup>@ tag
| UnicodeSuperscript -- Unicode superscript number characters
deriving (Show, Read, Eq, Data, Typeable, Generic)

instance FromJSON NoteStyle where
parseJSON v =
case v of
String "sup-tag" -> return SupTag
String "unicode-superscript" -> return UnicodeSuperscript
_ -> fail $ "Unknown note style " <> toStringLazy (encode v)

instance ToJSON NoteStyle where
toJSON SupTag = "sup-tag"
toJSON UnicodeSuperscript = "unicode-superscript"


-- | Options for writers
data WriterOptions = WriterOptions
{ writerTemplate :: Maybe (Template Text) -- ^ Template to use
Expand Down Expand Up @@ -325,6 +343,7 @@ data WriterOptions = WriterOptions
, writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
, writerSyntaxMap :: SyntaxMap
, writerPreferAscii :: Bool -- ^ Prefer ASCII representations of characters when possible
, writerNoteStyle :: NoteStyle -- ^ How to print note marks in HTML
} deriving (Show, Data, Typeable, Generic)

instance Default WriterOptions where
Expand Down Expand Up @@ -363,6 +382,7 @@ instance Default WriterOptions where
, writerReferenceLocation = EndOfDocument
, writerSyntaxMap = defaultSyntaxMap
, writerPreferAscii = False
, writerNoteStyle = SupTag
}

instance HasSyntaxExtensions WriterOptions where
Expand Down
22 changes: 17 additions & 5 deletions src/Text/Pandoc/Writers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import Control.Monad.State.Strict
( StateT, MonadState(get), gets, modify, evalStateT )
import Control.Monad ( liftM, when, foldM, unless )
import Control.Monad.Trans ( MonadTrans(lift) )
import Data.Char (ord)
import Data.Char (ord, isDigit, digitToInt)
import Data.List (intercalate, intersperse, partition, delete, (\\), foldl')
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Containers.ListUtils (nubOrd)
Expand Down Expand Up @@ -133,6 +133,15 @@ strToHtml t
= h <> preEscapedString (T.unpack txt <> "\xFE0E")
go h txt = h <> toHtml txt

digitsToUnicodeSuperscript :: Text -> Text
digitsToUnicodeSuperscript =
let superscripts = "⁰¹²³⁴⁵⁶⁷⁸⁹"
go x
-- By construction, digitToInt and the list index cannot fail
| isDigit x = superscripts !! (digitToInt x)
| otherwise = x
in T.map go

-- See #5469: this prevents iOS from substituting emojis.
needsVariationSelector :: Char -> Bool
needsVariationSelector '' = True
Expand Down Expand Up @@ -1596,6 +1605,11 @@ inlineToHtml opts inline = do
let ref = tshow number
htmlContents <- blockListToNote opts ref contents
epubVersion <- gets stEPUBVersion
let style = writerNoteStyle opts
let (noteMark, noteTag) = case style of
_ | isJust epubVersion -> (ref, id)
SupTag -> (ref, H.sup)
UnicodeSuperscript -> (digitsToUnicodeSuperscript ref, id)
-- push contents onto front of notes
modify $ \st -> st {stNotes = htmlContents:notes}
slideVariant <- gets stSlideVariant
Expand All @@ -1605,10 +1619,8 @@ inlineToHtml opts inline = do
writerIdentifierPrefix opts <> "fn" <> ref)
! A.class_ "footnote-ref"
! prefixedId opts ("fnref" <> ref)
$ (if isJust epubVersion
then id
else H.sup)
$ toHtml ref
$ noteTag
$ toHtml noteMark
return $ case epubVersion of
Just EPUB3 -> link ! customAttribute "epub:type" "noteref" ! customAttribute "role" "doc-noteref"
_ | html5 -> link ! A5.role "doc-noteref"
Expand Down
21 changes: 21 additions & 0 deletions test/Tests/Writers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,27 @@ tests =
, "</div>"
, "</div>"
]
, test (htmlWithOpts def{writerNoteStyle=UnicodeSuperscript})
"using Unicode superscript marks" $
noteTestDoc =?>
T.unlines
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\">¹</a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<blockquote>"
, "<p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\">²</a></p>"
, "<p>A second paragraph.</p>"
, "</blockquote>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
, "<div class=\"footnotes footnotes-end-of-document\">"
, "<hr />"
, "<ol>"
, "<li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li>"
, "<li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li>"
, "</ol>"
, "</div>"
]
]
]
where
Expand Down

0 comments on commit 9397c9e

Please sign in to comment.