QuLogic · Jun 6, 2025
diff --git a/‎.github/workflows/tests.yml
Lines changed: 5 additions & 2 deletions b/‎.github/workflows/tests.yml
Lines changed: 5 additions & 2 deletions
diff --git a/‎doc/users/next_whats_new/type1_subset.rst
Lines changed: 9 additions & 0 deletions b/‎doc/users/next_whats_new/type1_subset.rst
Lines changed: 9 additions & 0 deletions
diff --git a/‎lib/matplotlib/_type1font.py
Lines changed: 332 additions & 13 deletions b/‎lib/matplotlib/_type1font.py
Lines changed: 332 additions & 13 deletions
diff --git a/‎lib/matplotlib/backends/backend_pdf.py
Lines changed: 66 additions & 41 deletions b/‎lib/matplotlib/backends/backend_pdf.py
Lines changed: 66 additions & 41 deletions
diff --git a/‎lib/matplotlib/dviread.py
Lines changed: 19 additions & 4 deletions b/‎lib/matplotlib/dviread.py
Lines changed: 19 additions & 4 deletions
diff --git a/‎lib/matplotlib/dviread.pyi
Lines changed: 2 additions & 0 deletions b/‎lib/matplotlib/dviread.pyi
Lines changed: 2 additions & 0 deletions
diff --git a/‎lib/matplotlib/tests/baseline_images/test_backend_pdf/font-bitstream-charter.pdf
12.7 KB b/‎lib/matplotlib/tests/baseline_images/test_backend_pdf/font-bitstream-charter.pdf
12.7 KB
diff --git a/‎lib/matplotlib/tests/baseline_images/test_backend_pdf/font-dejavusans.pdf
32.6 KB b/‎lib/matplotlib/tests/baseline_images/test_backend_pdf/font-dejavusans.pdf
32.6 KB
diff --git a/‎lib/matplotlib/tests/baseline_images/test_backend_pdf/font-heuristica.pdf
54.9 KB b/‎lib/matplotlib/tests/baseline_images/test_backend_pdf/font-heuristica.pdf
54.9 KB
diff --git a/‎lib/matplotlib/tests/test_backend_pdf.py
Lines changed: 51 additions & 1 deletion b/‎lib/matplotlib/tests/test_backend_pdf.py
Lines changed: 51 additions & 1 deletion
diff --git a/‎lib/matplotlib/tests/test_usetex.py
Lines changed: 64 additions & 0 deletions b/‎lib/matplotlib/tests/test_usetex.py
Lines changed: 64 additions & 0 deletions
@@ -64,8 +64,10 @@ jobs:
             python-version: '3.12'
             # https://github.com/matplotlib/matplotlib/issues/29844
             pygobject-ver: '<3.52.0'
-          - os: ubuntu-22.04
+          - name-suffix: "(Extra TeX packages)"
+            os: ubuntu-22.04
             python-version: '3.13'
+            extra-packages: 'texlive-fonts-extra texlive-lang-cyrillic'
             # https://github.com/matplotlib/matplotlib/issues/29844
             pygobject-ver: '<3.52.0'
           - name-suffix: "Free-threaded"
@@ -142,7 +144,8 @@ jobs:
               texlive-latex-recommended \
               texlive-luatex \
               texlive-pictures \
-              texlive-xetex
+              texlive-xetex \
+              ${{ matrix.extra-packages }}
             if [[ "${{ matrix.name-suffix }}" != '(Minimum Versions)' ]]; then
               sudo apt-get install -yy --no-install-recommends ffmpeg poppler-utils
             fi
 
@@ -0,0 +1,9 @@
+PDF files created with usetex now embed subsets of Type 1 fonts
+---------------------------------------------------------------
+
+When using the PDF backend with the usetex feature,
+Matplotlib calls TeX to render the text and formulas in the figure.
+The fonts that get used are usually "Type 1" fonts.
+They used to be embedded in full
+but are now limited to the glyphs that are actually used in the figure.
+This reduces the size of the resulting PDF files.
@@ -722,8 +722,6 @@ def __init__(self, filename, metadata=None):
         self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1))
         self._fontNames = {}     # maps filenames to internal font names
         self._dviFontInfo = {}   # maps dvi font names to embedding information
-        # differently encoded Type-1 fonts may share the same descriptor
-        self._type1Descriptors = {}
         self._character_tracker = _backend_pdf_ps.CharacterTracker()
 
         self.alphaStates = {}   # maps alpha values to graphics state objects
@@ -767,8 +765,7 @@ def __init__(self, filename, metadata=None):
 
     fontNames = _api.deprecated("3.11")(property(lambda self: self._fontNames))
     dviFontInfo = _api.deprecated("3.11")(property(lambda self: self._dviFontInfo))
-    type1Descriptors = _api.deprecated("3.11")(
-        property(lambda self: self._type1Descriptors))
+    type1Descriptors = _api.deprecated("3.11")(property(lambda _: {}))
 
     def newPage(self, width, height):
         self.endStream()
@@ -808,7 +805,14 @@ def newTextnote(self, text, positionRect=[-100, -100, 0, 0]):
                    }
         self.pageAnnotations.append(theNote)
 
-    def _get_subsetted_psname(self, ps_name, charmap):
+    @staticmethod
+    def _get_subset_prefix(charset):
+        """
+        Get a prefix for a subsetted font name.
+
+        The prefix is six uppercase letters followed by a plus sign;
+        see PDF reference section 5.5.3 Font Subsets.
+        """
         def toStr(n, base):
             if n < base:
                 return string.ascii_uppercase[n]
@@ -818,11 +822,15 @@ def toStr(n, base):
                 )
 
         # encode to string using base 26
-        hashed = hash(frozenset(charmap.keys())) % ((sys.maxsize + 1) * 2)
+        hashed = hash(charset) % ((sys.maxsize + 1) * 2)
         prefix = toStr(hashed, 26)
 
         # get first 6 characters from prefix
-        return prefix[:6] + "+" + ps_name
+        return prefix[:6] + "+"
+
+    @staticmethod
+    def _get_subsetted_psname(ps_name, charmap):
+        return PdfFile._get_subset_prefix(frozenset(charmap.keys())) + ps_name
 
     def finalize(self):
         """Write out the various deferred objects and the pdf end matter."""
@@ -994,53 +1002,60 @@ def _embedTeXFont(self, fontinfo):
         _log.debug('Embedding TeX font %s - fontinfo=%s',
                    fontinfo.dvifont.texname, fontinfo.__dict__)
 
-        # Widths
-        widthsObject = self.reserveObject('font widths')
-        tfm = fontinfo.dvifont._tfm
-        # convert from TeX's 12.20 representation to 1/1000 text space units.
-        widths = [(1000 * metrics.tex_width) >> 20
-                  if (metrics := tfm.get_metrics(char)) else 0
-                  for char in range(max(tfm._glyph_metrics, default=-1) + 1)]
-        self.writeObject(widthsObject, widths)
-
-        # Font dictionary
+        # The font dictionary is the top-level object describing a font
         fontdictObject = self.reserveObject('font dictionary')
         fontdict = {
             'Type':      Name('Font'),
             'Subtype':   Name('Type1'),
-            'FirstChar': 0,
-            'LastChar':  len(widths) - 1,
-            'Widths':    widthsObject,
-            }
-
-        # Encoding (if needed)
-        if fontinfo.encodingfile is not None:
-            fontdict['Encoding'] = {
-                'Type': Name('Encoding'),
-                'Differences': [
-                    0, *map(Name, dviread._parse_enc(fontinfo.encodingfile))],
-            }
+        }
 
-        # We have a font file to embed - read it in and apply any effects
+        # Read the font file and apply any encoding changes and effects
         t1font = _type1font.Type1Font(fontinfo.fontfile)
+        if fontinfo.encodingfile is not None:
+            t1font = t1font.with_encoding(
+                {i: c for i, c in enumerate(dviread._parse_enc(fontinfo.encodingfile))}
+            )
         if fontinfo.effects:
             t1font = t1font.transform(fontinfo.effects)
-        fontdict['BaseFont'] = Name(t1font.prop['FontName'])
 
-        # Font descriptors may be shared between differently encoded
-        # Type-1 fonts, so only create a new descriptor if there is no
-        # existing descriptor for this font.
-        effects = (fontinfo.effects.get('slant', 0.0),
-                   fontinfo.effects.get('extend', 1.0))
-        fontdesc = self._type1Descriptors.get((fontinfo.fontfile, effects))
-        if fontdesc is None:
-            fontdesc = self.createType1Descriptor(t1font)
-            self._type1Descriptors[(fontinfo.fontfile, effects)] = fontdesc
-        fontdict['FontDescriptor'] = fontdesc
+        # Reduce the font to only the glyphs used in the document, get the encoding
+        # for that subset, and compute various properties based on the encoding.
+        chars = frozenset(self._character_tracker.used[fontinfo.dvifont.fname])
+        t1font = t1font.subset(chars, self._get_subset_prefix(chars))
+        fontdict['BaseFont'] = Name(t1font.prop['FontName'])
+        # createType1Descriptor writes the font data as a side effect
+        fontdict['FontDescriptor'] = self.createType1Descriptor(t1font)
+        encoding = t1font.prop['Encoding']
+        fontdict['Encoding'] = self._generate_encoding(encoding)
+        fc = fontdict['FirstChar'] = min(encoding.keys(), default=0)
+        lc = fontdict['LastChar'] = max(encoding.keys(), default=255)
+
+        # Convert glyph widths from TeX 12.20 fixed point to 1/1000 text space units
+        tfm = fontinfo.dvifont._tfm
+        widths = [(1000 * metrics.tex_width) >> 20
+                  if (metrics := tfm.get_metrics(char)) else 0
+                  for char in range(fc, lc + 1)]
+        fontdict['Widths'] = widthsObject = self.reserveObject('glyph widths')
+        self.writeObject(widthsObject, widths)
 
         self.writeObject(fontdictObject, fontdict)
         return fontdictObject
 
+
+    def _generate_encoding(self, encoding):
+        prev = -2
+        result = []
+        for code, name in sorted(encoding.items()):
+            if code != prev + 1:
+                result.append(code)
+            prev = code
+            result.append(Name(name))
+        return {
+            'Type': Name('Encoding'),
+            'Differences': result
+        }
+
+
     @_api.delete_parameter("3.11", "fontfile")
     def createType1Descriptor(self, t1font, fontfile=None):
         # Create and write the font descriptor and the font file
@@ -1077,6 +1092,14 @@ def createType1Descriptor(self, t1font, fontfile=None):
         if 0:
             flags |= 1 << 18
 
+        encoding = t1font.prop['Encoding']
+        charset = ''.join(
+            sorted(
+                f'/{c}' for c in encoding.values()
+                if c != '.notdef'
+            )
+        )
+
         descriptor = {
             'Type':        Name('FontDescriptor'),
             'FontName':    Name(t1font.prop['FontName']),
@@ -1090,6 +1113,7 @@ def createType1Descriptor(self, t1font, fontfile=None):
             'FontFile':    fontfileObject,
             'FontFamily':  t1font.prop['FamilyName'],
             'StemV':       50,  # TODO
+            'CharSet':     charset,
             # (see also revision 3874; but not all TeX distros have AFM files!)
             # 'FontWeight': a number where 400 = Regular, 700 = Bold
         }
@@ -2269,6 +2293,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, *, mtext=None):
                 seq += [['font', pdfname, dvifont.size]]
                 oldfont = dvifont
             seq += [['text', x1, y1, [bytes([glyph])], x1+width]]
+            self.file._character_tracker.track(dvifont, chr(glyph))
 
         # Find consecutive text strings with constant y coordinate and
         # combine into a sequence of strings and kerns, or just one
 
@@ -17,17 +17,17 @@
               ...
 """
 
-from collections import namedtuple
 import dataclasses
 import enum
-from functools import cache, lru_cache, partial, wraps
 import logging
 import os
-from pathlib import Path
 import re
 import struct
 import subprocess
 import sys
+from collections import namedtuple
+from functools import cache, lru_cache, partial, wraps
+from pathlib import Path
 
 import numpy as np
 
@@ -583,6 +583,9 @@ class DviFont:
     Attributes
     ----------
     texname : bytes
+    fname : str
+       Compatibility shim so that DviFont can be used with
+       ``_backend_pdf_ps.CharacterTracker``; not a real filename.
     size : float
        Size of the font in Adobe points, converted from the slightly
        smaller TeX points.
@@ -602,6 +605,18 @@ def __init__(self, scale, tfm, texname, vf):
         (1000 * self._tfm.width.get(char, 0)) >> 20
         for char in range(max(self._tfm.width, default=-1) + 1)]))
 
+    @property
+    def fname(self):
+        """A fake filename"""
+        return self.texname.decode('latin-1')
+
+    def _get_fontmap(self, string):
+        """Get the mapping from characters to the font that includes them.
+
+        Each value maps to self; there is no fallback mechanism for DviFont.
+        """
+        return {char: self for char in string}
+
     def __eq__(self, other):
         return (type(self) is type(other)
                 and self.texname == other.texname and self.size == other.size)
@@ -1161,8 +1176,8 @@ def _fontfile(cls, suffix, texname):
 
 
 if __name__ == '__main__':
-    from argparse import ArgumentParser
     import itertools
+    from argparse import ArgumentParser
 
     import fontTools.agl
 
 
@@ -66,6 +66,8 @@ class DviFont:
     def __ne__(self, other: object) -> bool: ...
     @property
     def widths(self) -> list[int]: ...
+    @property
+    def fname(self) -> str: ...
 
 class Vf(Dvi):
     def __init__(self, filename: str | os.PathLike) -> None: ...
 
@@ -16,7 +16,7 @@
 from matplotlib.backends._backend_pdf_ps import get_glyphs_subset, font_as_file
 from matplotlib.backends.backend_pdf import PdfPages
 from matplotlib.patches import Rectangle
-from matplotlib.testing import _gen_multi_font_text
+from matplotlib.testing import _gen_multi_font_text, _has_tex_package
 from matplotlib.testing.decorators import check_figures_equal, image_comparison
 from matplotlib.testing._markers import needs_usetex
 
@@ -428,3 +428,53 @@ def test_truetype_conversion(recwarn):
             font=Path(__file__).parent / "data/mpltest.ttf", fontsize=80)
     ax.set_xticks([])
     ax.set_yticks([])
+
+
+@pytest.mark.skipif(not _has_tex_package("heuristica"),
+                    reason="LaTeX lacks heuristica package")
+@image_comparison(["font-heuristica.pdf"])
+def test_font_heuristica():
+    # Heuristica uses the callothersubr operator for some glyphs
+    mpl.rcParams['text.latex.preamble'] = '\n'.join((
+        r'\usepackage{heuristica}',
+        r'\usepackage[T1]{fontenc}',
+        r'\usepackage[utf8]{inputenc}'
+    ))
+    fig, ax = plt.subplots()
+    ax.text(0.1, 0.1, r"BHTem fi ffl 1234", usetex=True, fontsize=50)
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+@pytest.mark.skipif(not _has_tex_package("DejaVuSans"),
+                    reason="LaTeX lacks DejaVuSans package")
+@image_comparison(["font-dejavusans.pdf"])
+def test_font_dejavusans():
+    # DejaVuSans uses the seac operator to compose characters with diacritics
+    mpl.rcParams['text.latex.preamble'] = '\n'.join((
+        r'\usepackage{DejaVuSans}',
+        r'\usepackage[T1]{fontenc}',
+        r'\usepackage[utf8]{inputenc}'
+    ))
+
+    fig, ax = plt.subplots()
+    ax.text(0.1, 0.1, r"\textsf{ñäö ABCDabcd}", usetex=True, fontsize=50)
+    ax.text(0.1, 0.3, r"\textsf{fi ffl 1234}", usetex=True, fontsize=50)
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+@pytest.mark.skipif(not _has_tex_package("charter"),
+                    reason="LaTeX lacks charter package")
+@image_comparison(["font-bitstream-charter.pdf"])
+def test_font_bitstream_charter():
+    mpl.rcParams['text.latex.preamble'] = '\n'.join((
+        r'\usepackage{charter}',
+        r'\usepackage[T1]{fontenc}',
+        r'\usepackage[utf8]{inputenc}'
+    ))
+    fig, ax = plt.subplots()
+    ax.text(0.1, 0.1, r"åüš ABCDabcd", usetex=True, fontsize=50)
+    ax.text(0.1, 0.3, r"fi ffl 1234", usetex=True, fontsize=50)
+    ax.set_xticks([])
+    ax.set_yticks([])
@@ -1,3 +1,4 @@
+import re
 from tempfile import TemporaryFile
 
 import numpy as np
@@ -156,6 +157,69 @@ def test_missing_psfont(fmt, monkeypatch):
         fig.savefig(tmpfile, format=fmt)
 
 
+def test_pdf_type1_font_subsetting():
+    """Test that fonts in PDF output are properly subset."""
+    pikepdf = pytest.importorskip("pikepdf")
+
+    mpl.rcParams["text.usetex"] = True
+    mpl.rcParams["text.latex.preamble"] = r"\usepackage{amssymb}"
+    fig, ax = plt.subplots()
+    ax.text(0.2, 0.7, r"$\int_{-\infty}^{\aleph}\sqrt{\alpha\beta\gamma}\mathrm{d}x$")
+    ax.text(0.2, 0.5, r"$\mathfrak{x}\circledcirc\mathfrak{y}\in\mathbb{R}$")
+
+    with TemporaryFile() as tmpfile:
+        fig.savefig(tmpfile, format="pdf")
+        tmpfile.seek(0)
+        pdf = pikepdf.Pdf.open(tmpfile)
+
+        length = {}
+        page = pdf.pages[0]
+        for font_name, font in page.Resources.Font.items():
+            assert font.Subtype == "/Type1", (
+                f"Font {font_name}={font} is not a Type 1 font"
+            )
+
+            # Subsetted font names have a 6-character tag followed by a '+'
+            base_font = str(font["/BaseFont"]).removeprefix("/")
+            assert re.match(r"^[A-Z]{6}\+", base_font), (
+                f"Font {font_name}={base_font} lacks a subset indicator tag"
+            )
+            assert "/FontFile" in font.FontDescriptor, (
+                f"Type 1 font {font_name}={base_font} is not embedded"
+            )
+            _, original_name = base_font.split("+", 1)
+            length[original_name] = len(bytes(font["/FontDescriptor"]["/FontFile"]))
+
+    print("Embedded font stream lengths:", length)
+    # We should have several fonts, each much smaller than the original.
+    # I get under 10kB on my system for each font, but allow 15kB in case
+    # of differences in the font files.
+    assert {
+        'CMEX10',
+        'CMMI12',
+        'CMR12',
+        'CMSY10',
+        'CMSY8',
+        'EUFM10',
+        'MSAM10',
+        'MSBM10',
+    }.issubset(length), "Missing expected fonts in the PDF"
+    for font_name, length in length.items():
+        assert length < 15_000, (
+            f"Font {font_name}={length} is larger than expected"
+        )
+
+    # For comparison, lengths without subsetting on my system:
+    #  'CMEX10': 29686
+    #  'CMMI12': 36176
+    #  'CMR12': 32157
+    #  'CMSY10': 32004
+    #  'CMSY8': 32061
+    #  'EUFM10': 20546
+    #  'MSAM10': 31199
+    #  'MSBM10': 34129
+
+
 try:
     _old_gs_version = mpl._get_executable_info('gs').version < parse_version('9.55')
 except mpl.ExecutableNotFoundError: