File tree Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -412,7 +412,11 @@ def _detect_html_xhtml(
412
412
else :
413
413
return "application/xml"
414
414
415
- if re .match (r"<!doctype\s+html|<html|<head|<body" , content_str ):
415
+ if re .match (
416
+ r"(<script.*?>.*?</script>\s*)?(<!doctype\s+html|<html|<head|<body)" ,
417
+ content_str ,
418
+ re .DOTALL ,
419
+ ):
416
420
return "text/html"
417
421
418
422
p = re .compile (
Original file line number Diff line number Diff line change @@ -132,6 +132,13 @@ def test_guess_format(tmp_path):
132
132
doc_path = Path ("./tests/data/html/wiki_duck.html" )
133
133
assert dci ._guess_format (doc_path ) == InputFormat .HTML
134
134
135
+ html_str = ( # HTML starting with a script
136
+ "<script>\n console.log('foo');\n </script>"
137
+ '<!doctype html>\n <html lang="en-us class="no-js"></html>'
138
+ )
139
+ stream = DocumentStream (name = "lorem_ipsum" , stream = BytesIO (f"{ html_str } " .encode ()))
140
+ assert dci ._guess_format (stream ) == InputFormat .HTML
141
+
135
142
# Valid MD
136
143
buf = BytesIO (Path ("./tests/data/md/wiki.md" ).open ("rb" ).read ())
137
144
stream = DocumentStream (name = "wiki.md" , stream = buf )
You can’t perform that action at this time.
0 commit comments