👌 Improve parsing of nested amsmath (#119)

chrisjsewell · web-flow · commit 3f7fcc6794f4 · 2024-09-07T14:39:48.000+02:00
The previous logic was problematic for amsmath blocks nested in other blocs (such as blockquotes) The new parsing code now principally follows the logic in `markdown_it/rules_block/fence.py` (see also https://spec.commonmark.org/0.30/#fenced-code-blocks), except that: 1. it allows for a closing tag on the same line as the opening tag, and 2. it does not allow for an opening tag without closing tag (i.e. no auto-closing)
diff --git a/mdit_py_plugins/amsmath/__init__.py b/mdit_py_plugins/amsmath/__init__.py
@@ -54,7 +54,7 @@
 # whose total width is the actual width of the contents;
 # thus they can be used as a component in a containing expression
 
-RE_OPEN = re.compile(r"\\begin\{(" + "|".join(ENVIRONMENTS) + r")([\*]?)\}")
+RE_OPEN = r"\\begin\{(" + "|".join(ENVIRONMENTS) + r")([\*]?)\}"
 
 
 def amsmath_plugin(
@@ -95,47 +95,60 @@ def render_amsmath_block(
     md.add_render_rule("amsmath", render_amsmath_block)
 
 
-def match_environment(string: str) -> None | tuple[str, str, int]:
-    match_open = RE_OPEN.match(string)
-    if not match_open:
-        return None
-    environment = match_open.group(1)
-    numbered = match_open.group(2)
-    match_close = re.search(
-        r"\\end\{" + environment + numbered.replace("*", r"\*") + "\\}", string
-    )
-    if not match_close:
-        return None
-    return (environment, numbered, match_close.end())
-
-
 def amsmath_block(
     state: StateBlock, startLine: int, endLine: int, silent: bool
 ) -> bool:
+    # note the code principally follows the logic in markdown_it/rules_block/fence.py,
+    # except that:
+    # (a) it allows for closing tag on same line as opening tag
+    # (b) it does not allow for opening tag without closing tag (i.e. no auto-closing)
+
     if is_code_block(state, startLine):
         return False
 
-    begin = state.bMarks[startLine] + state.tShift[startLine]
+    # does the first line contain the beginning of an amsmath environment
+    first_start = state.bMarks[startLine] + state.tShift[startLine]
+    first_end = state.eMarks[startLine]
+    first_text = state.src[first_start:first_end]
 
-    outcome = match_environment(state.src[begin:])
-    if not outcome:
+    if not (match_open := re.match(RE_OPEN, first_text)):
         return False
-    environment, numbered, endpos = outcome
-    endpos += begin
-
-    line = startLine
-    while line < endLine:
-        if endpos >= state.bMarks[line] and endpos <= state.eMarks[line]:
-            # line for end of block math found ...
-            state.line = line + 1
+
+    # construct the closing tag
+    environment = match_open.group(1)
+    numbered = match_open.group(2)
+    closing = rf"\end{{{match_open.group(1)}{match_open.group(2)}}}"
+
+    # start looking for the closing tag, including the current line
+    nextLine = startLine - 1
+
+    while True:
+        nextLine += 1
+        if nextLine >= endLine:
+            # reached the end of the block without finding the closing tag
+            return False
+
+        next_start = state.bMarks[nextLine] + state.tShift[nextLine]
+        next_end = state.eMarks[nextLine]
+        if next_start < first_end and state.sCount[nextLine] < state.blkIndent:
+            # non-empty line with negative indent should stop the list:
+            # - \begin{align}
+            #  test
+            return False
+
+        if state.src[next_start:next_end].rstrip().endswith(closing):
+            # found the closing tag
             break
-        line += 1
+
+    state.line = nextLine + 1
 
     if not silent:
         token = state.push("amsmath", "math", 0)
         token.block = True
-        token.content = state.src[begin:endpos]
+        token.content = state.getLines(
+            startLine, state.line, state.sCount[startLine], False
+        )
         token.meta = {"environment": environment, "numbered": numbered}
-        token.map = [startLine, line]
+        token.map = [startLine, nextLine]
 
     return True
diff --git a/tests/fixtures/amsmath.md b/tests/fixtures/amsmath.md
@@ -11,6 +11,15 @@ a = 1
 </div>
 .
 
+equation environment on one line:
+.
+\begin{equation}a = 1\end{equation}
+.
+<div class="math amsmath">
+\begin{equation}a = 1\end{equation}
+</div>
+.
+
 equation* environment:
 .
 \begin{equation*}
@@ -181,13 +190,43 @@ equation environment, in list:
 <li>
 <div class="math amsmath">
 \begin{equation}
-  a = 1
-  \end{equation}
+a = 1
+\end{equation}
 </div>
 </li>
 </ul>
 .
 
+equation environment, in block quote:
+.
+> \begin{matrix}
+> -0.707 &  0.408 &  0.577 \\
+> -0.707 & -0.408 & -0.577 \\
+> -0.    & -0.816 &  0.577
+> \end{matrix}
+
+> \begin{equation}
+a = 1
+\end{equation}
+.
+<blockquote>
+<div class="math amsmath">
+\begin{matrix}
+-0.707 &amp;  0.408 &amp;  0.577 \\
+-0.707 &amp; -0.408 &amp; -0.577 \\
+-0.    &amp; -0.816 &amp;  0.577
+\end{matrix}
+</div>
+</blockquote>
+<blockquote>
+<div class="math amsmath">
+\begin{equation}
+a = 1
+\end{equation}
+</div>
+</blockquote>
+.
+
 `alignat` environment and HTML escaping
 .
 \begin{alignat}{3}
@@ -242,7 +281,7 @@ Indented by 4 spaces, DISABLE-CODEBLOCKS
 .
 <div class="math amsmath">
 \begin{equation}
-    a = 1
-    \end{equation}
+a = 1
+\end{equation}
 </div>
 .