Skip to content

Commit 22d6189

Browse files
committed
Fix spec tests
1 parent 70e5e6f commit 22d6189

File tree

6 files changed

+36
-25
lines changed

6 files changed

+36
-25
lines changed

markdown_it/common/normalize_url.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from typing import Callable
44
from urllib.parse import urlparse, urlunparse, quote, unquote # noqa: F401
55

6+
from .utils import ESCAPABLE
7+
68
# TODO below we port the use of the JS packages:
79
# var mdurl = require('mdurl')
810
# var punycode = require('punycode')
@@ -30,9 +32,10 @@
3032
# ################# Copied from Commonmark.py #################
3133

3234
ENTITY = "&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});"
33-
ESCAPABLE = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
3435
reBackslashOrAmp = re.compile(r"[\\&]")
35-
reEntityOrEscapedChar = re.compile("\\\\" + ESCAPABLE + "|" + ENTITY, re.IGNORECASE)
36+
reEntityOrEscapedChar = re.compile(
37+
"\\\\" + "[" + ESCAPABLE + "]|" + ENTITY, re.IGNORECASE
38+
)
3639

3740

3841
def unescape_char(s):
@@ -112,13 +115,14 @@ def unescape_unquote(x):
112115
return unquote(unescape_string(x))
113116

114117

115-
def normalizeLinkText(title):
118+
def normalizeLinkText(link):
116119
"""Normalize autolink content::
117120
118121
<destination>
119122
~~~~~~~~~~~
120123
"""
121-
(scheme, netloc, path, params, query, fragment) = urlparse(title)
124+
(scheme, netloc, path, params, query, fragment) = urlparse(link)
125+
print((scheme, netloc, path, params, query, fragment))
122126
if scheme in RECODE_HOSTNAME_FOR:
123127
url = urlunparse(
124128
(
@@ -131,7 +135,7 @@ def normalizeLinkText(title):
131135
)
132136
)
133137
else:
134-
url = unescape_unquote(title)
138+
url = unescape_unquote(link)
135139
return url
136140

137141
# TODO the selective encoding below should probably be done here,

markdown_it/common/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,15 @@ def unescapeAll(string: str):
150150
return html.unescape(string)
151151

152152

153+
ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
154+
ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
155+
156+
157+
def stripEscape(string):
158+
"""Strip escape \\ characters"""
159+
return ESCAPE_CHAR.sub(r"\1", string)
160+
161+
153162
# //////////////////////////////////////////////////////////////////////////////
154163

155164
# TODO This section changed quite a lot, should re-check

markdown_it/helpers/parse_link_title.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Parse link title
22
"""
3-
4-
from ..common.utils import unescapeAll, charCodeAt
3+
from ..common.utils import unescapeAll, charCodeAt, stripEscape
54

65

76
class _Result:
@@ -13,6 +12,9 @@ def __init__(self):
1312
self.lines = 0
1413
self.str = ""
1514

15+
def __str__(self):
16+
return self.str
17+
1618

1719
def parseLinkTitle(string, pos, maximum):
1820
lines = 0
@@ -37,9 +39,11 @@ def parseLinkTitle(string, pos, maximum):
3739
while pos < maximum:
3840
code = charCodeAt(string, pos)
3941
if code == marker:
42+
title = string[start + 1 : pos]
43+
title = unescapeAll(stripEscape(title))
4044
result.pos = pos + 1
4145
result.lines = lines
42-
result.str = unescapeAll(string[start + 1 : pos])
46+
result.str = title
4347
result.ok = True
4448
return result
4549
elif code == 0x0A:

markdown_it/rules_block/fence.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# fences (``` lang, ~~~ lang)
22
import logging
33

4-
from ..common.utils import charCodeAt
4+
from ..common.utils import charCodeAt, stripEscape
55
from .state_block import StateBlock
66

77
LOGGER = logging.getLogger(__name__)
@@ -97,7 +97,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
9797
state.line = nextLine + (1 if haveEndMarker else 0)
9898

9999
token = state.push("fence", "code", 0)
100-
token.info = params
100+
token.info = stripEscape(params)
101101
token.content = state.getLines(startLine + 1, nextLine, length, True)
102102
token.markup = markup
103103
token.map = [startLine, state.line]

markdown_it/rules_inline/backticks.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,12 @@ def backtick(state: StateInline, silent: bool):
4343
token = state.push("code_inline", "code", 0)
4444
token.markup = marker
4545
token.content = state.src[pos:matchStart].replace("\n", " ")
46-
if token.content.startswith(" ") and token.content.endswith(" "):
46+
if (
47+
token.content.startswith(" ")
48+
and token.content.endswith(" ")
49+
and len(token.content.strip()) > 0
50+
):
4751
token.content = token.content[1:-1]
48-
4952
state.pos = matchEnd
5053
return True
5154

tests/test_cmark_spec/test_spec.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,15 @@ def test_file(file_regression):
1919

2020
@pytest.mark.parametrize("entry", json.loads(TESTS_INPUT.read_text()))
2121
def test_spec(entry):
22-
if entry["example"] in [108, 334]:
23-
# TODO fix failing empty code span tests ``` ``` -> <code> </code> not <code></code>
24-
pytest.skip("empty code span spacing")
25-
if entry["example"] in [
26-
171, # [foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n
27-
308, # [foo](/bar\\* \"ti\\*tle\")\n
28-
309, # [foo]\n\n[foo]: /bar\\* \"ti\\*tle\"\n
29-
310, # ``` foo\\+bar\nfoo\n```\n
30-
502, # [link](/url \"title \\\"&quot;\")\n
31-
599, # <http://example.com/\\[\\>\n
32-
]:
33-
# TODO fix url backslash escaping
34-
pytest.skip("url backslash escaping")
22+
if entry["example"] == 599:
23+
# TODO fix Backslash-escapes do not work inside autolinks
24+
pytest.skip("autolinks backslash escape")
3525
md = MarkdownIt("commonmark")
3626
output = md.render(entry["markdown"])
3727
expected = entry["html"]
3828

3929
if entry["example"] == 593:
30+
# this doesn't have any bearing on the output
4031
output = output.replace("mailto", "MAILTO")
4132
if entry["example"] in [187, 209, 210]:
4233
# this doesn't have any bearing on the output

0 commit comments

Comments
 (0)