Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(math): Support the MathML operator dictionary and many TeX-like aliases #2167

Merged
merged 15 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile-fonts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

if FONT_DOWNLOAD_TOOLS

# Target defined in Makefile.am, just adding a dependency here
.sources: fonttooling

.fonts: fonttooling
[ -h .fonts ] || mkdir -p $@

.sources: fonttooling
[ -h .sources ] || mkdir -p $@

fonttooling:
$(if $(BSDTAR),,$(error Please set BSDTAR with path or `./configure --enable-developer-mode`))
$(if $(CURL),,$(error Please set CURL with path or `./configure --enable-developer-mode`))
Expand Down
17 changes: 17 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ dist_license_DATA = LICENSE.md
EXTRA_DIST = spec tests documentation sile-dev-1.rockspec fontconfig.conf
EXTRA_DIST += build-aux/action-updater.js build-aux/cargo-updater.js build-aux/config.ld build-aux/decore-automake.sh build-aux/git-version-gen
EXTRA_DIST += Dockerfile build-aux/docker-bootstrap.sh build-aux/docker-fontconfig.conf hooks/build
EXTRA_DIST += build-aux/xml-entities-to-lua.xsl
EXTRA_DIST += default.nix flake.nix flake.lock shell.nix build-aux/pkg.nix
EXTRA_DIST += package.json # imported by both Nix and Docker
EXTRA_DIST += $(FIGURES)
Expand All @@ -94,6 +95,10 @@ else !SHARED
EXTRA_RUNTIME_DEPS =
endif

MATHML_ENTITIES = packages/math/mathml-entities.lua
EXTRA_DIST += $(MATHML_ENTITIES)
BUILT_SOURCES += $(MATHML_ENTITIES)

CLEANFILES = $(MANUAL)

DISTCLEANFILES = @AMINCLUDE@
Expand Down Expand Up @@ -312,6 +317,18 @@ patterndeps = $(_FORCED) $(_TEST_DEPS) $(_DOCS_DEPS) | $(bin_PROGRAMS) $(EXTRA_R
$(DOT) -Tpdf $< -o [email protected]
$(GS) -q -sDEVICE=pdfwrite -dCompatibilityLevel=1.5 -o $@ [email protected]

XML_ENTITIES = .sources/unicode.xml
XML_ENTITIES_COMMIT = 77acf14428202e4e1dba54ff1e5ed43fe5ab474f

.sources:
[ -h .sources ] || mkdir -p $@

$(XML_ENTITIES):
$(CURL) https://raw.githubusercontent.com/w3c/xml-entities/$(XML_ENTITIES_COMMIT)/unicode.xml -o $@

$(MATHML_ENTITIES): build-aux/xml-entities-to-lua.xsl
$(XSLTPROC) $< $(XML_ENTITIES) | $(or $(STYLUA),cat) - > $@

.PHONY: force
force: ;

Expand Down
2 changes: 2 additions & 0 deletions build-aux/pkg.nix
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
icu,
fontconfig,
libiconv,
libxslt,
stylua,
taplo,
typos,
Expand Down Expand Up @@ -125,6 +126,7 @@ stdenv.mkDerivation (finalAttrs: {
icu
fontconfig
libiconv
libxslt
stylua
taplo
typos
Expand Down
199 changes: 199 additions & 0 deletions build-aux/xml-entities-to-lua.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" indent="no"/>

<xsl:template name="format-value">
<xsl:param name="value" />
<xsl:choose>
<!-- integer -->
<xsl:when test="floor($value) = $value"><xsl:value-of select="$value" /></xsl:when>
<!-- boolean -->
<xsl:when test="$value = 'true' or $value = 'false'"><xsl:value-of select="$value" /></xsl:when>
<!-- string -->
<xsl:otherwise>"<xsl:value-of select="$value" />"</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template name="format-codepoint">
<xsl:param name="codepoint" />
<!-- Codepoint is UXXXX, remove the U -->
<xsl:variable name="hex" select="concat('U(0x', substring($codepoint, 2), ')')" />
<xsl:choose>
<xsl:when test="contains($hex, '-')">
<!-- Special case for 2-characters operators -->
<!-- CAVEAT: We do not expect operators with more than 2 characters -->
<xsl:value-of select="substring-before($hex, '-')" />
<xsl:value-of select="concat(', 0x', substring-after($hex, '-'))" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$hex" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template name="format-class">
<xsl:param name="class" />
<xsl:param name="combclass" />
<xsl:param name="description" />
<xsl:choose>
<xsl:when test="$class = 'N'">ord</xsl:when><!-- Normal = mathord = atomType.ordinary -->
<xsl:when test="$class = 'A'">ord</xsl:when><!-- Alphabetic = mathalpha = atomType.ordinary -->
<xsl:when test="$class = 'B'">bin</xsl:when><!-- Binary = mathbin = atomType.binaryOperator -->
<xsl:when test="$class = 'C'">close</xsl:when><!-- Closing = mathclose = atomType.closeSymbol -->
<xsl:when test="$class = 'D'"><!-- Diacritic -->
<xsl:choose>
<xsl:when test="$combclass = '220'">botaccent</xsl:when>
<xsl:when test="$combclass = '230'">accent</xsl:when>
<xsl:otherwise>ord</xsl:otherwise><!-- assuming ordinary -->
</xsl:choose>
</xsl:when>
<xsl:when test="$class = 'F'">ord</xsl:when><!-- Fence = assiming ordinary -->
<xsl:when test="$class = 'G'">ord</xsl:when><!-- Glyph Part = assuming ordinary -->
<xsl:when test="$class = 'L'"><!-- Large -->
<xsl:choose>
<!-- SILE uses the atom for spacing currently (ignoring lspace and rspace) -->
<!-- HACK: integral signs are NOT considered as mathop for spacing purpose -->
<xsl:when test="contains($description,'INTEGRAL') or contains($description,'INTEGRATION')">ord</xsl:when>
<xsl:otherwise>op</xsl:otherwise><!-- mathop = atomType.bigOperator -->
</xsl:choose>
</xsl:when>
<xsl:when test="$class = 'O'">open</xsl:when><!-- Opening -->
<xsl:when test="$class = 'P'">punct</xsl:when><!-- Punctuation -->
<xsl:when test="$class = 'R'">rel</xsl:when><!-- Relation -->
<xsl:when test="$class = 'S'">ord</xsl:when><!-- Space = assuming ordinary -->
<xsl:when test="$class = 'U'">ord</xsl:when><!-- Unary = assuming ordinary -->
<xsl:when test="$class = 'V'">bin</xsl:when><!-- Vary = assume binary and let the logic decide later -->
<xsl:otherwise>ord</xsl:otherwise><!-- assuming ordinary if not specified -->
</xsl:choose>
</xsl:template>

<xsl:template name="format-mathlatex">
<xsl:param name="mathlatex" />
<xsl:choose>
<xsl:when test="$mathlatex">"<xsl:value-of select="substring($mathlatex, 2)" />"</xsl:when>
<xsl:otherwise>nil</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template match="unicode">--- GENERATED FILE, DO NOT EDIT MANUALLY
--
-- Operator dictionary for unicode characters
--
-- Extracted from https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml
-- (https://github.com/w3c/xml-entities)
-- Copyright David Carlisle 1999-2024
-- Use and distribution of this code are permitted under the terms of the
-- W3C Software Notice and License.
-- http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html
-- This file is a collection of information about how to map Unicode entities to LaTeX,
-- and various SGML/XML entity sets (ISO and MathML/HTML).
-- A Unicode character may be mapped to several entities.
-- Originally designed by Sebastian Rahtz in conjunction with Barbara Beeton for the STIX project
--

local atoms = require("packages.math.atoms")

--- Transform a list of codepoints into a string
local function U (...)
local t = { ... }
local str = ""
for i = 1, #t do
str = str .. luautf8.char(t[i])
end
return str
end

local symbols = {}
local operatorDict = {}

--- Register a symbol
-- @tparam string str String representation of the symbol
-- @tparam string shortatom Short atom type
-- @tparam string mathlatex TeX-like name of the symbol (from unicode-math)
-- @tparam string _ Unicode name of the symbol (informative)
-- @tparam table ops List of operator forms and their properties
local function addSymbol (str, shortatom, mathlatex, _, ops)
if mathlatex then
SU.debug("math.symbols", "Registering symbol", str, "as", mathlatex)
symbols[mathlatex] = str
end
local op = {}
op.atom = atoms.types[shortatom]
if ops then
op.forms = {}
for _, v in pairs(ops) do
if v.form then
-- NOTE: At this point the mu unit is not yet defined, so keep it as a string.
v.lspace = v.lspace and ("%smu"):format(v.lspace) or "0mu"
v.rspace = v.rspace and ("%smu"):format(v.rspace) or "0mu"
op.forms[v.form] = v
else
SU.warn("No form for operator " .. str .. " (operator dictionary is probably incomplete)")
end
end
end
operatorDict[str] = op
end

<xsl:apply-templates select="charlist/character" />

return {
operatorDict = operatorDict,
symbols = symbols,
}
</xsl:template>

<xsl:template match="character">
<xsl:variable name="mathclass" select="unicodedata/@mathclass" />
<xsl:variable name="mathlatex" select="mathlatex[@set='unicode-math']/text()" />
<xsl:variable name="combclass" select="unicodedata/@combclass" />
<xsl:variable name="atom">
<xsl:call-template name="format-class">
<xsl:with-param name="class" select="$mathclass" />
<xsl:with-param name="combclass" select="$combclass" />
<xsl:with-param name="description" select="description" />
</xsl:call-template>
</xsl:variable>
<xsl:if test="$atom != 'ord' or $mathlatex or operator-dictionary">
<xsl:text>addSymbol(</xsl:text>
<!-- Codepoints -->
<xsl:call-template name="format-codepoint">
<xsl:with-param name="codepoint" select="@id" />
</xsl:call-template>
<!-- Atom type -->
<xsl:text>,"</xsl:text><xsl:value-of select="$atom" /><xsl:text>",</xsl:text>
<!-- Math latex name or nil -->
<xsl:call-template name="format-mathlatex">
<xsl:with-param name="mathlatex" select="$mathlatex" />
</xsl:call-template>
<!-- Description -->
<xsl:text>,"</xsl:text><xsl:value-of select="description" /><xsl:text>"</xsl:text>
<!-- Operator dictionary or nil -->
<xsl:choose>
<xsl:when test="operator-dictionary">
<xsl:text>,{</xsl:text>
<xsl:apply-templates select="operator-dictionary">
<!-- sort by @priority -->
<xsl:sort select="@priority" data-type="number" order="descending" />
</xsl:apply-templates>
<xsl:text>}</xsl:text>
</xsl:when>
<xsl:otherwise><xsl:text>,nil</xsl:text></xsl:otherwise>
</xsl:choose>
<xsl:text>)</xsl:text>
</xsl:if>
</xsl:template>

<xsl:template match="operator-dictionary">
{
<xsl:for-each select="@*">
<xsl:sort select="name()" />
<xsl:value-of select="name()" />
=
<xsl:call-template name="format-value">
<xsl:with-param name="value" select="." />
</xsl:call-template>,
</xsl:for-each>
},
</xsl:template>

</xsl:stylesheet>
3 changes: 2 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ AM_CONDITIONAL([ICU], [test "x$with_icu" = "xyes"])
# Required for downloading fonts for the manual and for tests
# Since the source tarball includes a prebuilt manual we only need this for Git source builds
AM_COND_IF([FONT_DOWNLOAD_TOOLS], [
QUE_PROGVAR([curl])
QUE_PROGVAR([bsdtar])
QUE_PROGVAR([curl])
])

AM_COND_IF([DEVELOPER_MODE], [
Expand All @@ -197,6 +197,7 @@ AM_COND_IF([DEVELOPER_TOOLS], [
QUE_PROGVAR([taplo])
QUE_PROGVAR([tr])
QUE_PROGVAR([typos])
QUE_PROGVAR([xsltproc])
])

AX_PROG_LUA([5.1], [], [], [], [prefer])
Expand Down
20 changes: 20 additions & 0 deletions packages/math/atoms.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- Shorthands for atom types, used in the `atom` command option
-- and also in the unicode symbols table / operator dictionary
local atomType = {
-- The 8 atom types defined in the TeXbook's spacing table
ord = 0,
op = 1,
bin = 2,
rel = 3,
open = 4,
close = 5,
punct = 6,
inner = 7, -- Unused for now (used for fractions in The TeXbook)
-- Other atom types (considered as "ord" for spacing)
over = 8, -- Unused for now (used for overlines etc. in The TeXbook)
under = 9, -- Unused for now (used for underlines etc. in The TeXbook)
accent = 10,
botaccent = 11, -- Unused for now but botaccent is encoded in our dictionary
}

return { types = atomType }
Loading
Loading