Skip to content

Commit 054ea51

Browse files
TimWollabukka
authored andcommitted
Fix GHSA-p3x9-6h7p-cgfc: libxml streams wrong content-type on redirect
libxml streams use wrong content-type header when requesting a redirected resource.
1 parent fac131f commit 054ea51

File tree

4 files changed

+223
-32
lines changed

4 files changed

+223
-32
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Basic)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Intentionally using non-standard casing for content-type to verify it is matched not case sensitively.
33+
yield "data://text/plain,HTTP/1.1 200 OK\r\nconteNt-tyPe: text/html; charset=utf-8\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Missing content-type)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Missing content-type in actual response.
33+
yield "data://text/plain,HTTP/1.1 200 OK\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Reason with colon)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Missing content-type in actual response.
33+
yield "data://text/plain,HTTP/1.1 200 OK: This is fine\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"

ext/libxml/libxml.c

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -525,41 +525,52 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
525525
if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
526526
zval *header;
527527

528-
ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
528+
/* Scan backwards: The header array might contain the headers for multiple responses, if
529+
* a redirect was followed.
530+
*/
531+
ZEND_HASH_REVERSE_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
529532
const char buf[] = "Content-Type:";
530-
if (Z_TYPE_P(header) == IS_STRING &&
531-
!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
532-
char needle[] = "charset=";
533-
char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
534-
char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), strlen(needle));
535-
536-
if (encoding) {
537-
char *end;
538-
539-
encoding += sizeof("charset=")-1;
540-
if (*encoding == '"') {
541-
encoding++;
542-
}
543-
end = strchr(encoding, ';');
544-
if (end == NULL) {
545-
end = encoding + strlen(encoding);
546-
}
547-
end--; /* end == encoding-1 isn't a buffer underrun */
548-
while (*end == ' ' || *end == '\t') {
549-
end--;
550-
}
551-
if (*end == '"') {
552-
end--;
553-
}
554-
if (encoding >= end) continue;
555-
*(end+1) = '\0';
556-
enc = xmlParseCharEncoding(encoding);
557-
if (enc <= XML_CHAR_ENCODING_NONE) {
558-
enc = XML_CHAR_ENCODING_NONE;
533+
if (Z_TYPE_P(header) == IS_STRING) {
534+
/* If no colon is found in the header, we assume it's the HTTP status line and bail out. */
535+
char *colon = memchr(Z_STRVAL_P(header), ':', Z_STRLEN_P(header));
536+
char *space = memchr(Z_STRVAL_P(header), ' ', Z_STRLEN_P(header));
537+
if (colon == NULL || space < colon) {
538+
break;
539+
}
540+
541+
if (!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
542+
char needle[] = "charset=";
543+
char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
544+
char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
545+
546+
if (encoding) {
547+
char *end;
548+
549+
encoding += sizeof("charset=")-1;
550+
if (*encoding == '"') {
551+
encoding++;
552+
}
553+
end = strchr(encoding, ';');
554+
if (end == NULL) {
555+
end = encoding + strlen(encoding);
556+
}
557+
end--; /* end == encoding-1 isn't a buffer underrun */
558+
while (*end == ' ' || *end == '\t') {
559+
end--;
560+
}
561+
if (*end == '"') {
562+
end--;
563+
}
564+
if (encoding >= end) continue;
565+
*(end+1) = '\0';
566+
enc = xmlParseCharEncoding(encoding);
567+
if (enc <= XML_CHAR_ENCODING_NONE) {
568+
enc = XML_CHAR_ENCODING_NONE;
569+
}
559570
}
571+
efree(haystack);
572+
break; /* found content-type */
560573
}
561-
efree(haystack);
562-
break; /* found content-type */
563574
}
564575
} ZEND_HASH_FOREACH_END();
565576
}

0 commit comments

Comments
 (0)