Fix spec tests

chrisjsewell · chrisjsewell · commit 22d6189c5a2b · 2020-03-28T12:37:06.000Z
diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py
@@ -3,6 +3,8 @@
 from typing import Callable
 from urllib.parse import urlparse, urlunparse, quote, unquote  # noqa: F401
 
+from .utils import ESCAPABLE
+
 # TODO below we port the use of the JS packages:
 # var mdurl        = require('mdurl')
 # var punycode     = require('punycode')
@@ -30,9 +32,10 @@
 #  ################# Copied from Commonmark.py #################
 
 ENTITY = "&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});"
-ESCAPABLE = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
 reBackslashOrAmp = re.compile(r"[\\&]")
-reEntityOrEscapedChar = re.compile("\\\\" + ESCAPABLE + "|" + ENTITY, re.IGNORECASE)
+reEntityOrEscapedChar = re.compile(
+    "\\\\" + "[" + ESCAPABLE + "]|" + ENTITY, re.IGNORECASE
+)
 
 
 def unescape_char(s):
@@ -112,13 +115,14 @@ def unescape_unquote(x):
     return unquote(unescape_string(x))
 
 
-def normalizeLinkText(title):
+def normalizeLinkText(link):
     """Normalize autolink content::
 
         <destination>
          ~~~~~~~~~~~
     """
-    (scheme, netloc, path, params, query, fragment) = urlparse(title)
+    (scheme, netloc, path, params, query, fragment) = urlparse(link)
+    print((scheme, netloc, path, params, query, fragment))
     if scheme in RECODE_HOSTNAME_FOR:
         url = urlunparse(
             (
@@ -131,7 +135,7 @@ def normalizeLinkText(title):
             )
         )
     else:
-        url = unescape_unquote(title)
+        url = unescape_unquote(link)
     return url
 
     # TODO the selective encoding below should probably be done here,
diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py
@@ -150,6 +150,15 @@ def unescapeAll(string: str):
     return html.unescape(string)
 
 
+ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
+ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
+
+
+def stripEscape(string):
+    """Strip escape \\ characters"""
+    return ESCAPE_CHAR.sub(r"\1", string)
+
+
 # //////////////////////////////////////////////////////////////////////////////
 
 # TODO This section changed quite a lot, should re-check
diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py
@@ -1,7 +1,6 @@
 """Parse link title
 """
-
-from ..common.utils import unescapeAll, charCodeAt
+from ..common.utils import unescapeAll, charCodeAt, stripEscape
 
 
 class _Result:
@@ -13,6 +12,9 @@ def __init__(self):
         self.lines = 0
         self.str = ""
 
+    def __str__(self):
+        return self.str
+
 
 def parseLinkTitle(string, pos, maximum):
     lines = 0
@@ -37,9 +39,11 @@ def parseLinkTitle(string, pos, maximum):
     while pos < maximum:
         code = charCodeAt(string, pos)
         if code == marker:
+            title = string[start + 1 : pos]
+            title = unescapeAll(stripEscape(title))
             result.pos = pos + 1
             result.lines = lines
-            result.str = unescapeAll(string[start + 1 : pos])
+            result.str = title
             result.ok = True
             return result
         elif code == 0x0A:
diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
@@ -1,7 +1,7 @@
 # fences (``` lang, ~~~ lang)
 import logging
 
-from ..common.utils import charCodeAt
+from ..common.utils import charCodeAt, stripEscape
 from .state_block import StateBlock
 
 LOGGER = logging.getLogger(__name__)
@@ -97,7 +97,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
     state.line = nextLine + (1 if haveEndMarker else 0)
 
     token = state.push("fence", "code", 0)
-    token.info = params
+    token.info = stripEscape(params)
     token.content = state.getLines(startLine + 1, nextLine, length, True)
     token.markup = markup
     token.map = [startLine, state.line]
diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py
@@ -43,9 +43,12 @@ def backtick(state: StateInline, silent: bool):
                 token = state.push("code_inline", "code", 0)
                 token.markup = marker
                 token.content = state.src[pos:matchStart].replace("\n", " ")
-                if token.content.startswith(" ") and token.content.endswith(" "):
+                if (
+                    token.content.startswith(" ")
+                    and token.content.endswith(" ")
+                    and len(token.content.strip()) > 0
+                ):
                     token.content = token.content[1:-1]
-
             state.pos = matchEnd
             return True
 
diff --git a/tests/test_cmark_spec/test_spec.py b/tests/test_cmark_spec/test_spec.py
@@ -19,24 +19,15 @@ def test_file(file_regression):
 
 @pytest.mark.parametrize("entry", json.loads(TESTS_INPUT.read_text()))
 def test_spec(entry):
-    if entry["example"] in [108, 334]:
-        # TODO fix failing empty code span tests ``` ``` -> <code> </code> not <code></code>
-        pytest.skip("empty code span spacing")
-    if entry["example"] in [
-        171,  # [foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n
-        308,  # [foo](/bar\\* \"ti\\*tle\")\n
-        309,  # [foo]\n\n[foo]: /bar\\* \"ti\\*tle\"\n
-        310,  # ``` foo\\+bar\nfoo\n```\n
-        502,  # [link](/url \"title \\\"&quot;\")\n
-        599,  # <http://example.com/\\[\\>\n
-    ]:
-        # TODO fix url backslash escaping
-        pytest.skip("url backslash escaping")
+    if entry["example"] == 599:
+        # TODO fix Backslash-escapes do not work inside autolinks
+        pytest.skip("autolinks backslash escape")
     md = MarkdownIt("commonmark")
     output = md.render(entry["markdown"])
     expected = entry["html"]
 
     if entry["example"] == 593:
+        # this doesn't have any bearing on the output
         output = output.replace("mailto", "MAILTO")
     if entry["example"] in [187, 209, 210]:
         # this doesn't have any bearing on the output