Skip to content

Commit 5613dc6

Browse files
committed
utf8_hop_forward: Don't go over edge of buffer
even in the presence of malformed UTF-8. This preserves previous behavior of if you start at one byte past the edge of the buffer, it returns that position.
1 parent cfe4427 commit 5613dc6

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

inline.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2659,7 +2659,7 @@ start of the next character.
26592659
26602660
C<off> must be non-negative.
26612661
2662-
C<s> must be before or equal to C<end>.
2662+
C<s> must be before or equal to C<end>. If after, the function panics.
26632663
26642664
When moving forward it will not move beyond C<end>.
26652665
@@ -2677,19 +2677,28 @@ Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
26772677
* the bitops (especially ~) can create illegal UTF-8.
26782678
* In other words: in Perl UTF-8 is not just for Unicode. */
26792679

2680-
assert(s <= end);
26812680
assert(off >= 0);
26822681

2682+
if (UNLIKELY(s >= end)) {
2683+
if (s == end) {
2684+
return (U8 *) end;
2685+
}
2686+
2687+
Perl_croak_nocontext("panic: Start of forward hop (0x%p) is %zd bytes"
2688+
" beyond legal end position (0x%p)",
2689+
s, 1 + s - end, end);
2690+
}
2691+
26832692
if (off && UNLIKELY(UTF8_IS_CONTINUATION(*s))) {
26842693
/* Get to next non-continuation byte */
26852694
do {
26862695
s++;
26872696
}
2688-
while (UTF8_IS_CONTINUATION(*s));
2697+
while (s < end && UTF8_IS_CONTINUATION(*s));
26892698
off--;
26902699
}
26912700

2692-
while (off--) {
2701+
while (off-- && s < end) {
26932702
STRLEN skip = UTF8SKIP(s);
26942703
if ((STRLEN)(end - s) <= skip) {
26952704
GCC_DIAG_IGNORE(-Wcast-qual)

0 commit comments

Comments
 (0)