Skip to content

Commit fa93233

Browse files
authored
Merge pull request #11627 from cdrini/refactor/rm-some-dead-code
Remove dead code handling missing dependencies
2 parents f89d394 + ccd5f3d commit fa93233

File tree

1 file changed

+12
-28
lines changed

1 file changed

+12
-28
lines changed

openlibrary/core/helpers.py

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,18 @@
44
import re
55
from collections.abc import Callable, Iterable
66
from datetime import date, datetime
7-
from typing import Any
7+
from typing import Any, cast
88
from urllib.parse import urlsplit
99

1010
import babel
1111
import babel.core
1212
import babel.dates
1313
import babel.numbers
14+
import genshi
15+
import genshi.filters
1416
import web
1517
from babel.core import Locale
16-
17-
try:
18-
import genshi
19-
import genshi.filters
20-
except ImportError:
21-
genshi = None
22-
23-
try:
24-
from bs4 import BeautifulSoup
25-
except ImportError:
26-
BeautifulSoup = None
18+
from bs4 import BeautifulSoup
2719

2820
from infogami import config
2921
from infogami.infobase.client import Nothing
@@ -60,18 +52,14 @@
6052
__docformat__ = "restructuredtext en"
6153

6254

63-
def sanitize(html: str, encoding: str = 'utf8') -> str:
55+
def sanitize(html: str, encoding: str = 'utf8', beautify: bool = True) -> str:
6456
"""Removes unsafe tags and attributes from html and adds
6557
``rel="nofollow"`` attribute to all external links.
6658
Using encoding=None if passing Unicode strings.
6759
encoding="utf8" matches default format for earlier versions of Genshi
6860
https://genshi.readthedocs.io/en/latest/upgrade/#upgrading-from-genshi-0-6-x-to-the-development-version
6961
"""
7062

71-
# Can't sanitize unless genshi module is available
72-
if genshi is None:
73-
return html
74-
7563
def get_nofollow(name, event):
7664
attrs = event[1][1]
7765

@@ -82,29 +70,25 @@ def get_nofollow(name, event):
8270
return 'nofollow'
8371

8472
try:
85-
html = genshi.HTML(html, encoding=encoding)
73+
html_stream = genshi.HTML(html, encoding=encoding)
8674

8775
# except (genshi.ParseError, UnicodeDecodeError, UnicodeError) as e:
8876
# don't catch Unicode errors so we can tell if we're getting bytes
8977
except genshi.ParseError:
90-
if BeautifulSoup:
91-
# Bad html. Tidy it up using BeautifulSoup
78+
# Bad html. Tidy it up using BeautifulSoup
79+
if beautify:
9280
html = str(BeautifulSoup(html, "lxml"))
93-
try:
94-
html = genshi.HTML(html)
95-
except Exception:
96-
# Failed to sanitize.
97-
# We can't do any better than returning the original HTML, without sanitizing.
98-
return html
81+
# Avoid infinite recursion by disabling beautify on the next call
82+
return sanitize(html, encoding=encoding, beautify=False)
9983
else:
10084
raise
10185

10286
stream = (
103-
html
87+
html_stream
10488
| genshi.filters.HTMLSanitizer()
10589
| genshi.filters.Transformer("//a").attr("rel", get_nofollow)
10690
)
107-
return stream.render()
91+
return cast(str, stream.render())
10892

10993

11094
class NothingEncoder(json.JSONEncoder):

0 commit comments

Comments
 (0)