#include "dquote_static.c"
#include "charclass_invlists.h"
#include "inline_invlist.c"
+#include "utf8_strings.h"
#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
* LETTER SHARP S. We decrease the min length by 1 for each
* occurrence of 'ss' found */
-#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
-# define U390_first_byte 0xb4
- const U8 U390_tail[] = "\x68\xaf\x49\xaf\x42";
-# define U3B0_first_byte 0xb5
- const U8 U3B0_tail[] = "\x46\xaf\x49\xaf\x42";
-#else
-# define U390_first_byte 0xce
- const U8 U390_tail[] = "\xb9\xcc\x88\xcc\x81";
-# define U3B0_first_byte 0xcf
- const U8 U3B0_tail[] = "\x85\xcc\x88\xcc\x81";
-#endif
- const U8 len = sizeof(U390_tail); /* (-1 for NUL; +1 for 1st byte;
+#define U390_FIRST_BYTE GREEK_SMALL_LETTER_IOTA_UTF8_FIRST_BYTE
+#define U3B0_FIRST_BYTE GREEK_SMALL_LETTER_UPSILON_UTF8_FIRST_BYTE
+ const U8 U390_tail[] = GREEK_SMALL_LETTER_IOTA_UTF8_TAIL
+ COMBINING_DIAERESIS_UTF8
+ COMBINING_ACUTE_ACCENT_UTF8;
+ const U8 U3B0_tail[] = GREEK_SMALL_LETTER_UPSILON_UTF8_TAIL
+ COMBINING_DIAERESIS_UTF8
+ COMBINING_ACUTE_ACCENT_UTF8;
+ const U8 len = sizeof(U390_tail); /* (-1 for NUL; +1 for 1st byte;
yields a net of 0 */
/* Examine the string for one of the problematic sequences */
for (s = s0;
}
break;
- case U390_first_byte:
+ case U390_FIRST_BYTE:
if (s_end - s >= len
/* The 1's are because are skipping comparing the
}
break;
- case U3B0_first_byte:
+ case U3B0_FIRST_BYTE:
if (! (s_end - s >= len
&& memEQ(s + 1, U3B0_tail, len - 1)))
{
else {
while (SvAMAGIC(msv)
&& (sv = AMG_CALLunary(msv, string_amg))
- && sv != msv)
- {
+ && sv != msv
+ && !( SvROK(msv)
+ && SvROK(sv)
+ && SvRV(msv) == SvRV(sv))
+ ) {
msv = sv;
SvGETMAGIC(msv);
}
char *s = NULL;
I32 i = 0;
I32 s1, t1;
+ I32 n = paren;
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
- if (!rx->subbeg) {
- sv_setsv(sv,&PL_sv_undef);
- return;
- }
- else
- if (paren == RX_BUFF_IDX_PREMATCH && rx->offs[0].start != -1) {
- /* $` */
+ if ( ( n == RX_BUFF_IDX_CARET_PREMATCH
+ || n == RX_BUFF_IDX_CARET_FULLMATCH
+ || n == RX_BUFF_IDX_CARET_POSTMATCH
+ )
+ && !(rx->extflags & RXf_PMf_KEEPCOPY)
+ )
+ goto ret_undef;
+
+ if (!rx->subbeg)
+ goto ret_undef;
+
+ if (n == RX_BUFF_IDX_CARET_FULLMATCH)
+ /* no need to distinguish between them any more */
+ n = RX_BUFF_IDX_FULLMATCH;
+
+ if ((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
+ && rx->offs[0].start != -1)
+ {
+ /* $`, ${^PREMATCH} */
i = rx->offs[0].start;
s = rx->subbeg;
}
else
- if (paren == RX_BUFF_IDX_POSTMATCH && rx->offs[0].end != -1) {
- /* $' */
- s = rx->subbeg + rx->offs[0].end;
- i = rx->sublen - rx->offs[0].end;
+ if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
+ && rx->offs[0].end != -1)
+ {
+ /* $', ${^POSTMATCH} */
+ s = rx->subbeg - rx->suboffset + rx->offs[0].end;
+ i = rx->sublen + rx->suboffset - rx->offs[0].end;
}
else
- if ( 0 <= paren && paren <= (I32)rx->nparens &&
- (s1 = rx->offs[paren].start) != -1 &&
- (t1 = rx->offs[paren].end) != -1)
+ if ( 0 <= n && n <= (I32)rx->nparens &&
+ (s1 = rx->offs[n].start) != -1 &&
+ (t1 = rx->offs[n].end) != -1)
{
- /* $& $1 ... */
+ /* $&, ${^MATCH}, $1 ... */
i = t1 - s1;
- s = rx->subbeg + s1;
+ s = rx->subbeg + s1 - rx->suboffset;
} else {
- sv_setsv(sv,&PL_sv_undef);
- return;
+ goto ret_undef;
}
+
+ assert(s >= rx->subbeg);
assert(rx->sublen >= (s - rx->subbeg) + i );
if (i >= 0) {
const int oldtainted = PL_tainted;
SvTAINTED_off(sv);
}
} else {
+ ret_undef:
sv_setsv(sv,&PL_sv_undef);
return;
}
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
/* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
- switch (paren) {
- /* $` / ${^PREMATCH} */
- case RX_BUFF_IDX_PREMATCH:
+ switch (paren) {
+ case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
+ if (!(rx->extflags & RXf_PMf_KEEPCOPY))
+ goto warn_undef;
+ /*FALLTHROUGH*/
+
+ case RX_BUFF_IDX_PREMATCH: /* $` */
if (rx->offs[0].start != -1) {
i = rx->offs[0].start;
if (i > 0) {
}
}
return 0;
- /* $' / ${^POSTMATCH} */
- case RX_BUFF_IDX_POSTMATCH:
+
+ case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
+ if (!(rx->extflags & RXf_PMf_KEEPCOPY))
+ goto warn_undef;
+ case RX_BUFF_IDX_POSTMATCH: /* $' */
if (rx->offs[0].end != -1) {
i = rx->sublen - rx->offs[0].end;
if (i > 0) {
}
}
return 0;
+
+ case RX_BUFF_IDX_CARET_FULLMATCH: /* ${^MATCH} */
+ if (!(rx->extflags & RXf_PMf_KEEPCOPY))
+ goto warn_undef;
+ /*FALLTHROUGH*/
+
/* $& / ${^MATCH}, $1, $2, ... */
default:
if (paren <= (I32)rx->nparens &&
i = t1 - s1;
goto getlen;
} else {
+ warn_undef:
if (ckWARN(WARN_UNINITIALIZED))
report_uninit((const SV *)sv);
return 0;
}
getlen:
if (i > 0 && RXp_MATCH_UTF8(rx)) {
- const char * const s = rx->subbeg + s1;
+ const char * const s = rx->subbeg - rx->suboffset + s1;
const U8 *ep;
STRLEN el;
* list.)
* Taking the complement (inverting) an inversion list is quite simple, if the
* first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element at the beginning of each inversion list
- * to contain 0 when the list contains 0, and contains 1 otherwise. The actual
- * beginning of the list is either that element if 0, or the next one if 1.
+ * This implementation reserves an element at the beginning of each inversion
+ * list to contain 0 when the list contains 0, and contains 1 otherwise. The
+ * actual beginning of the list is either that element if 0, or the next one if
+ * 1.
*
* More about inversion lists can be found in "Unicode Demystified"
* Chapter 13 by Richard Gillam, published by Addison-Wesley.
case 'P':
{
char *e;
+
+ /* This routine will handle any undefined properties */
+ U8 swash_init_flags = _CORE_SWASH_INIT_RETURN_IF_UNDEF;
+
if (RExC_parse >= RExC_end)
vFAIL2("Empty \\%c{}", (U8)value);
if (*RExC_parse == '{') {
swash = _core_swash_init("utf8", name, &PL_sv_undef,
1, /* binary */
0, /* not tr/// */
- TRUE, /* this routine will handle
- undefined properties */
- NULL, FALSE /* No inversion list */
+ NULL, /* No inversion list */
+ &swash_init_flags
);
- if ( ! swash
- || ! SvROK(swash)
- || ! SvTYPE(SvRV(swash)) == SVt_PVHV
- || ! (invlist = _get_swash_invlist(swash)))
- {
+ if (! swash || ! (invlist = _get_swash_invlist(swash))) {
if (swash) {
SvREFCNT_dec(swash);
swash = NULL;
* the swash is from a user-defined property, then this
* whole character class should be regarded as such */
has_user_defined_property =
- _is_swash_user_defined(swash);
+ (swash_init_flags
+ & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY);
/* Invert if asking for the complement */
if (value == 'P') {
U8 dummy[UTF8_MAXBYTES+1];
STRLEN dummy_len;
- /* This particular string is above \xff in both UTF-8
- * and UTFEBCDIC */
- to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+ /* This string is just a short named one above \xff */
+ to_utf8_fold((U8*) HYPHEN_UTF8, dummy, &dummy_len);
assert(PL_utf8_tofold); /* Verify that worked */
}
PL_utf8_foldclosures =
PL_reg_oldsaved = NULL;
PL_reg_oldsavedlen = 0;
+ PL_reg_oldsavedoffset = 0;
+ PL_reg_oldsavedcoffset = 0;
PL_reg_maxiter = 0;
PL_reg_leftiter = 0;
PL_reg_poscache = NULL;