@@ -283,7 +283,6 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
283
283
case REOP_loop :
284
284
case REOP_lookahead :
285
285
case REOP_negative_lookahead :
286
- case REOP_bne_char_pos :
287
286
val = get_u32 (buf + pos + 1 );
288
287
val += (pos + 5 );
289
288
printf (" %u" , val );
@@ -921,21 +920,17 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
921
920
}
922
921
923
922
/* Return:
924
- 1 if the opcodes in bc_buf[] always advance the character pointer.
925
- 0 if the character pointer may not be advanced.
926
- -1 if the code may depend on side effects of its previous execution (backreference)
923
+ - true if the opcodes may not advance the char pointer
924
+ - false if the opcodes always advance the char pointer
927
925
*/
928
- static int re_check_advance (const uint8_t * bc_buf , int bc_buf_len )
926
+ static BOOL re_need_check_advance (const uint8_t * bc_buf , int bc_buf_len )
929
927
{
930
- int pos , opcode , ret , len , i ;
931
- uint32_t val , last ;
932
- BOOL has_back_reference ;
933
- uint8_t capture_bitmap [CAPTURE_COUNT_MAX ];
928
+ int pos , opcode , len ;
929
+ uint32_t val ;
930
+ BOOL ret ;
934
931
935
- ret = -2 ; /* not known yet */
932
+ ret = TRUE;
936
933
pos = 0 ;
937
- has_back_reference = FALSE;
938
- memset (capture_bitmap , 0 , sizeof (capture_bitmap ));
939
934
940
935
while (pos < bc_buf_len ) {
941
936
opcode = bc_buf [pos ];
@@ -955,8 +950,7 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
955
950
case REOP_dot :
956
951
case REOP_any :
957
952
simple_char :
958
- if (ret == -2 )
959
- ret = 1 ;
953
+ ret = FALSE;
960
954
break ;
961
955
case REOP_line_start :
962
956
case REOP_line_end :
@@ -970,41 +964,16 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
970
964
break ;
971
965
case REOP_save_start :
972
966
case REOP_save_end :
973
- val = bc_buf [pos + 1 ];
974
- capture_bitmap [val ] |= 1 ;
975
- break ;
976
967
case REOP_save_reset :
977
- {
978
- val = bc_buf [pos + 1 ];
979
- last = bc_buf [pos + 2 ];
980
- while (val < last )
981
- capture_bitmap [val ++ ] |= 1 ;
982
- }
983
- break ;
984
968
case REOP_back_reference :
985
969
case REOP_backward_back_reference :
986
- val = bc_buf [pos + 1 ];
987
- capture_bitmap [val ] |= 2 ;
988
- has_back_reference = TRUE;
989
970
break ;
990
971
default :
991
972
/* safe behvior: we cannot predict the outcome */
992
- if (ret == -2 )
993
- ret = 0 ;
994
- break ;
973
+ return TRUE;
995
974
}
996
975
pos += len ;
997
976
}
998
- if (has_back_reference ) {
999
- /* check if there is back reference which references a capture
1000
- made in the some code */
1001
- for (i = 0 ; i < CAPTURE_COUNT_MAX ; i ++ ) {
1002
- if (capture_bitmap [i ] == 3 )
1003
- return -1 ;
1004
- }
1005
- }
1006
- if (ret == -2 )
1007
- ret = 0 ;
1008
977
return ret ;
1009
978
}
1010
979
@@ -1583,8 +1552,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1583
1552
running the atom after the first quant_min times,
1584
1553
then there is no match. We remove this test when we
1585
1554
are sure the atom always advances the position. */
1586
- add_zero_advance_check = ( re_check_advance (s -> byte_code .buf + last_atom_start ,
1587
- s -> byte_code .size - last_atom_start ) == 0 );
1555
+ add_zero_advance_check = re_need_check_advance (s -> byte_code .buf + last_atom_start ,
1556
+ s -> byte_code .size - last_atom_start );
1588
1557
1589
1558
{
1590
1559
int len , pos ;
@@ -1601,38 +1570,34 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1601
1570
}
1602
1571
if (quant_max == 0 ) {
1603
1572
s -> byte_code .size = last_atom_start ;
1604
- } else if (quant_max == 1 ) {
1605
- if (dbuf_insert (& s -> byte_code , last_atom_start , 5 ))
1606
- goto out_of_memory ;
1607
- s -> byte_code .buf [last_atom_start ] = REOP_split_goto_first +
1608
- greedy ;
1609
- put_u32 (s -> byte_code .buf + last_atom_start + 1 , len );
1610
- } else if (quant_max == INT32_MAX ) {
1573
+ } else if (quant_max == 1 || quant_max == INT32_MAX ) {
1574
+ BOOL has_goto = (quant_max == INT32_MAX );
1611
1575
if (dbuf_insert (& s -> byte_code , last_atom_start , 5 + add_zero_advance_check ))
1612
1576
goto out_of_memory ;
1613
1577
s -> byte_code .buf [last_atom_start ] = REOP_split_goto_first +
1614
1578
greedy ;
1615
1579
put_u32 (s -> byte_code .buf + last_atom_start + 1 ,
1616
- len + 5 + add_zero_advance_check );
1580
+ len + 5 * has_goto + add_zero_advance_check * 2 );
1617
1581
if (add_zero_advance_check ) {
1618
- /* avoid infinite loop by stoping the
1619
- recursion if no advance was made in the
1620
- atom (only works if the atom has no
1621
- side effect) */
1622
1582
s -> byte_code .buf [last_atom_start + 1 + 4 ] = REOP_push_char_pos ;
1623
- re_emit_goto (s , REOP_bne_char_pos , last_atom_start );
1624
- } else {
1625
- re_emit_goto (s , REOP_goto , last_atom_start );
1583
+ re_emit_op (s , REOP_check_advance );
1626
1584
}
1585
+ if (has_goto )
1586
+ re_emit_goto (s , REOP_goto , last_atom_start );
1627
1587
} else {
1628
- if (dbuf_insert (& s -> byte_code , last_atom_start , 10 ))
1588
+ if (dbuf_insert (& s -> byte_code , last_atom_start , 10 + add_zero_advance_check ))
1629
1589
goto out_of_memory ;
1630
1590
pos = last_atom_start ;
1631
1591
s -> byte_code .buf [pos ++ ] = REOP_push_i32 ;
1632
1592
put_u32 (s -> byte_code .buf + pos , quant_max );
1633
1593
pos += 4 ;
1634
1594
s -> byte_code .buf [pos ++ ] = REOP_split_goto_first + greedy ;
1635
- put_u32 (s -> byte_code .buf + pos , len + 5 );
1595
+ put_u32 (s -> byte_code .buf + pos , len + 5 + add_zero_advance_check * 2 );
1596
+ pos += 4 ;
1597
+ if (add_zero_advance_check ) {
1598
+ s -> byte_code .buf [pos ++ ] = REOP_push_char_pos ;
1599
+ re_emit_op (s , REOP_check_advance );
1600
+ }
1636
1601
re_emit_goto (s , REOP_loop , last_atom_start + 5 );
1637
1602
re_emit_op (s , REOP_drop );
1638
1603
}
@@ -1656,22 +1621,25 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1656
1621
if (quant_max == INT32_MAX ) {
1657
1622
pos = s -> byte_code .size ;
1658
1623
re_emit_op_u32 (s , REOP_split_goto_first + greedy ,
1659
- len + 5 + add_zero_advance_check );
1624
+ len + 5 + add_zero_advance_check * 2 );
1660
1625
if (add_zero_advance_check )
1661
1626
re_emit_op (s , REOP_push_char_pos );
1662
1627
/* copy the atom */
1663
1628
dbuf_put_self (& s -> byte_code , last_atom_start , len );
1664
1629
if (add_zero_advance_check )
1665
- re_emit_goto (s , REOP_bne_char_pos , pos );
1666
- else
1667
- re_emit_goto (s , REOP_goto , pos );
1630
+ re_emit_op (s , REOP_check_advance );
1631
+ re_emit_goto (s , REOP_goto , pos );
1668
1632
} else if (quant_max > quant_min ) {
1669
1633
re_emit_op_u32 (s , REOP_push_i32 , quant_max - quant_min );
1670
1634
pos = s -> byte_code .size ;
1671
- re_emit_op_u32 (s , REOP_split_goto_first + greedy , len + 5 );
1635
+ re_emit_op_u32 (s , REOP_split_goto_first + greedy ,
1636
+ len + 5 + add_zero_advance_check * 2 );
1637
+ if (add_zero_advance_check )
1638
+ re_emit_op (s , REOP_push_char_pos );
1672
1639
/* copy the atom */
1673
1640
dbuf_put_self (& s -> byte_code , last_atom_start , len );
1674
-
1641
+ if (add_zero_advance_check )
1642
+ re_emit_op (s , REOP_check_advance );
1675
1643
re_emit_goto (s , REOP_loop , pos );
1676
1644
re_emit_op (s , REOP_drop );
1677
1645
}
@@ -1785,7 +1753,7 @@ static int lre_compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
1785
1753
}
1786
1754
break ;
1787
1755
case REOP_drop :
1788
- case REOP_bne_char_pos :
1756
+ case REOP_check_advance :
1789
1757
assert (stack_size > 0 );
1790
1758
stack_size -- ;
1791
1759
break ;
@@ -2281,11 +2249,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2281
2249
case REOP_push_char_pos :
2282
2250
stack [stack_len ++ ] = (uintptr_t )cptr ;
2283
2251
break ;
2284
- case REOP_bne_char_pos :
2285
- val = get_u32 (pc );
2286
- pc += 4 ;
2287
- if (stack [-- stack_len ] != (uintptr_t )cptr )
2288
- pc += (int )val ;
2252
+ case REOP_check_advance :
2253
+ if (stack [-- stack_len ] == (uintptr_t )cptr )
2254
+ goto no_match ;
2289
2255
break ;
2290
2256
case REOP_word_boundary :
2291
2257
case REOP_not_word_boundary :
0 commit comments