static int
S_deprecate_commaless_var_list(pTHX) {
PL_expect = XTERM;
- deprecate("comma-less variable list");
+ deprecate_fatal_in("5.28", "Use of comma-less variable list is deprecated");
return REPORT(','); /* grandfather non-comma-format format */
}
}
}
-PERL_STATIC_INLINE SV*
+STATIC SV*
S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
{
/* <s> points to first character of interior of \N{}, <e> to one beyond the
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
if (!SvCUR(res)) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
- "Unknown charname '' is deprecated");
+ deprecate_fatal_in("5.28", "Unknown charname '' is deprecated");
return res;
}
In transliterations:
characters are VERY literal, except for - not at the start or end
- of the string, which indicates a range. If the range is in bytes,
+ of the string, which indicates a range. However some backslash sequences
+ are recognized: \r, \n, and the like
+ \007 \o{}, \x{}, \N{}
+ If all elements in the transliteration are below 256,
scan_const expands the range to the full set of intermediate
characters. If the range is in utf8, the hyphen is replaced with
a certain range mark which will be handled by pmtrans() in op.c.
In double-quoted strings:
backslashes:
- double-quoted style: \r and \n
- constants: \x31, etc.
+ all those recognized in transliterations
deprecated backrefs: \1 (in substitution replacements)
case and quoting: \U \Q \E
stops on @ and $
} (end if backslash)
handle regular character
} (end while character to read)
-
+
*/
STATIC char *
bool didrange = FALSE; /* did we just finish a range? */
bool in_charclass = FALSE; /* within /[...]/ */
bool has_utf8 = FALSE; /* Output constant is UTF8 */
+ bool has_above_latin1 = FALSE; /* does something require special
+ handling in tr/// ? */
bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
UTF8? But, this can show as true
when the source isn't utf8, as for
example when it is entirely composed
of hex constants */
+ STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
+ number of characters found so far
+ that will expand (into 2 bytes)
+ should we have to convert to
+ UTF-8) */
SV *res; /* result from charnames */
STRLEN offset_to_max; /* The offset in the output to where the range
high-end character is temporarily placed */
* range, so for most cases we just drop down and handle the value
* as any other. There are two exceptions.
*
- * 1. A minus sign indicates that we are actually going to have
- * a range. In this case, skip the '-', set a flag, then drop
+ * 1. A hyphen indicates that we are actually going to have a
+ * range. In this case, skip the '-', set a flag, then drop
* down to handle what should be the end range value.
* 2. After we've handled that value, the next time through, that
* flag is set and we fix up the range.
*
* Ranges entirely within Latin1 are expanded out entirely, in
- * order to avoid the significant overhead of making a swash.
- * Ranges that extend above Latin1 have to have a swash, so there
- * is no advantage to abbreviating them here, so they are stored
- * here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies
- * a hyphen without any possible ambiguity. On EBCDIC machines, if
- * the range is expressed as Unicode, the Latin1 portion is
- * expanded out even if the entire range extends above Latin1.
- * This is because each code point in it has to be processed here
- * individually to get its native translation */
+ * order to make the transliteration a simple table look-up.
+ * Ranges that extend above Latin1 have to be done differently, so
+ * there is no advantage to expanding them here, so they are
+ * stored here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte
+ * signifies a hyphen without any possible ambiguity. On EBCDIC
+ * machines, if the range is expressed as Unicode, the Latin1
+ * portion is expanded out even if the range extends above
+ * Latin1. This is because each code point in it has to be
+ * processed here individually to get its native translation */
if (! dorange) {
- /* Here, we don't think we're in a range. If we've processed
- * at least one character, then see if this next one is a '-',
- * indicating the previous one was the start of a range. But
- * don't bother if we're too close to the end for the minus to
- * mean that. */
+ /* Here, we don't think we're in a range. If the new character
+ * is not a hyphen; or if it is a hyphen, but it's too close to
+ * either edge to indicate a range, then it's a regular
+ * character. */
if (*s != '-' || s >= send - 1 || s == start) {
/* A regular character. Process like any other, but first
non_portable_endpoint = 0;
backslash_N = 0;
#endif
+ /* The tests here for being above Latin1 and similar ones
+ * in the following 'else' suffice to find all such
+ * occurences in the constant, except those added by a
+ * backslash escape sequence, like \x{100}. And all those
+ * set 'has_above_latin1' as appropriate */
+ if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ has_above_latin1 = TRUE;
+ }
+
/* Drops down to generic code to process current byte */
}
- else {
+ else { /* Is a '-' in the context where it means a range */
if (didrange) { /* Something like y/A-C-Z// */
- Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+ Perl_croak(aTHX_ "Ambiguous range in transliteration"
+ " operator");
}
dorange = TRUE;
- s++; /* Skip past the minus */
+ s++; /* Skip past the hyphen */
/* d now points to where the end-range character will be
* placed. Save it so won't have to go finding it later,
* pointer). We'll finish processing the range the next
* time through the loop */
offset_to_max = d - SvPVX_const(sv);
+
+ if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ has_above_latin1 = TRUE;
+ }
+
+ /* Drops down to generic code to process current byte */
}
} /* End of not a range */
else {
* 'd' points to just beyond the range end in the 'sv' string,
* where we would next place something
* 'offset_to_max' is the offset in 'sv' at which the character
- * before 'd' begins.
+ * (the range's maximum end point) before 'd' begins.
*/
- const char * max_ptr = SvPVX_const(sv) + offset_to_max;
+ char * max_ptr = SvPVX(sv) + offset_to_max;
const char * min_ptr;
IV range_min;
IV range_max; /* last character in range */
IV real_range_max = 0;
#endif
- /* Get the range-ends code point values. */
+ /* Get the code point values of the range ends. */
if (has_utf8) {
/* We know the utf8 is valid, because we just constructed
* it ourselves in previous loop iterations */
range_max = * (U8*) max_ptr;
}
+ /* If the range is just a single code point, like tr/a-a/.../,
+ * that code point is already in the output, twice. We can
+ * just back up over the second instance and avoid all the rest
+ * of the work. But if it is a variant character, it's been
+ * counted twice, so decrement */
+ if (UNLIKELY(range_max == range_min)) {
+ d = max_ptr;
+ if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+ utf8_variant_count--;
+ }
+ goto range_done;
+ }
+
#ifdef EBCDIC
/* On EBCDIC platforms, we may have to deal with portable
* ranges. These happen if at least one range endpoint is a
* Unicode value (\N{...}), or if the range is a subset of
* [A-Z] or [a-z], and both ends are literal characters,
* like 'A', and not like \x{C1} */
- if ((convert_unicode
- = cBOOL(backslash_N) /* \N{} forces Unicode, hence
- portable range */
- || ( ! non_portable_endpoint
- && (( isLOWER_A(range_min) && isLOWER_A(range_max))
- || (isUPPER_A(range_min) && isUPPER_A(range_max))))
- )) {
+ convert_unicode =
+ cBOOL(backslash_N) /* \N{} forces Unicode,
+ hence portable range */
+ || ( ! non_portable_endpoint
+ && (( isLOWER_A(range_min) && isLOWER_A(range_max))
+ || (isUPPER_A(range_min) && isUPPER_A(range_max))));
+ if (convert_unicode) {
/* Special handling is needed for these portable ranges.
- * They are defined to all be in Unicode terms, which
- * include all Unicode code points between the end points.
+ * They are defined to be in Unicode terms, which includes
+ * all the Unicode code points between the end points.
* Convert to Unicode to get the Unicode range. Later we
* will convert each code point in the range back to
* native. */
}
#ifdef EBCDIC
else if (convert_unicode) {
- /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
Perl_croak(aTHX_
- "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
- " in transliteration operator",
- range_min, range_max);
+ "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
+ UVXf "}\" in transliteration operator",
+ range_min, range_max);
}
#endif
else {
- /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
Perl_croak(aTHX_
- "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
- " in transliteration operator",
- range_min, range_max);
+ "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
+ " in transliteration operator",
+ range_min, range_max);
}
}
if (has_utf8) {
- /* We try to avoid creating a swash. If the upper end of
- * this range is below 256, this range won't force a swash;
- * otherwise it does force a swash, and as long as we have
- * to have one, we might as well not expand things out.
- * But if it's EBCDIC, we may have to look at each
- * character below 256 if we have to convert to/from
- * Unicode values */
- if (range_max > 255
+ /* If everything in the transliteration is below 256, we
+ * can avoid special handling later. A translation table
+ * for each of those bytes is created by op.c. So we
+ * expand out all ranges to their constituent code points.
+ * But if we've encountered something above 255, the
+ * expanding won't help, so skip doing that. But if it's
+ * EBCDIC, we may have to look at each character below 256
+ * if we have to convert to/from Unicode values */
+ if ( has_above_latin1
#ifdef EBCDIC
&& (range_min > 255 || ! convert_unicode)
#endif
/* Move the high character one byte to the right; then
* insert between it and the range begin, an illegal
* byte which serves to indicate this is a range (using
- * a '-' could be ambiguous). */
+ * a '-' would be ambiguous). */
char *e = d++;
while (e-- > max_ptr) {
*(e + 1) = *e;
* equivalent */
if (has_utf8) {
for (i = range_min; i <= range_max; i++) {
- append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
- (U8 **) &d);
+ append_utf8_from_native_byte(
+ LATIN1_TO_NATIVE((U8) i),
+ (U8 **) &d);
}
}
else {
}
#ifdef EBCDIC
- /* If the original range extended above 255, add in that portion. */
+ /* If the original range extended above 255, add in that
+ * portion. */
if (real_range_max) {
*d++ = (char) UTF8_TWO_BYTE_HI(0x100);
*d++ = (char) UTF8_TWO_BYTE_LO(0x100);
- if (real_range_max > 0x101)
- *d++ = (char) ILLEGAL_UTF8_BYTE;
- if (real_range_max > 0x100)
+ if (real_range_max > 0x100) {
+ if (real_range_max > 0x101) {
+ *d++ = (char) ILLEGAL_UTF8_BYTE;
+ }
d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
+ }
}
#endif
if (!esc)
in_charclass = TRUE;
}
-
- else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
char *s1 = s-1;
int esc = 0;
while (s1 >= start && *s1-- == '\\')
if (!esc)
in_charclass = FALSE;
}
-
- /* skip for regexp comments /(?#comment)/, except for the last
- * char, which will be done separately.
- * Stop on (?{..}) and friends */
-
+ /* skip for regexp comments /(?#comment)/, except for the last
+ * char, which will be done separately. Stop on (?{..}) and
+ * friends */
else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
break;
}
}
-
- /* likewise skip #-initiated comments in //x patterns */
+ /* likewise skip #-initiated comments in //x patterns */
else if (*s == '#'
&& PL_lex_inpat
&& !in_charclass
&& ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
{
- while (s+1 < send && *s != '\n')
+ while (s < send && *s != '\n')
*d++ = *s++;
}
-
- /* no further processing of single-quoted regex */
+ /* no further processing of single-quoted regex */
else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
goto default_action;
- /* check for embedded arrays
- (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
- */
+ /* check for embedded arrays
+ * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
+ */
else if (*s == '@' && s[1]) {
if (UTF
? isIDFIRST_utf8_safe(s+1, send)
if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
break; /* in regexp, neither @+ nor @- are interpolated */
}
-
- /* check for embedded scalars. only stop if we're sure it's a
- variable.
- */
+ /* check for embedded scalars. only stop if we're sure it's a
+ * variable. */
else if (*s == '$') {
if (!PL_lex_inpat) /* not a regexp, so $ must be var */
break;
/* End of else if chain - OP_TRANS rejoin rest */
+ if (UNLIKELY(s >= send)) {
+ assert(s == send);
+ break;
+ }
+
/* backslashes */
if (*s == '\\' && s+1 < send) {
char* e; /* Can be used for ending '}', etc. */
UTF);
if (! valid) {
yyerror(error);
- continue;
+ uv = 0; /* drop through to ensure range ends are set */
}
goto NUM_ESCAPE_INSERT;
}
UTF);
if (! valid) {
yyerror(error);
- continue;
+ uv = 0; /* drop through to ensure range ends are set */
}
}
NUM_ESCAPE_INSERT:
/* Insert oct or hex escaped character. */
-
+
/* Here uv is the ordinal of the next character being added */
if (UVCHR_IS_INVARIANT(uv)) {
*d++ = (char) uv;
}
else {
if (!has_utf8 && uv > 255) {
- /* Might need to recode whatever we have accumulated so
- * far if it contains any chars variant in utf8 or
- * utf-ebcdic. */
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(
- sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
- /* Above-latin1 in string
- * implies no encoding */
- |SV_UTF8_NO_ENCODING,
- UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
- d = SvPVX(sv) + SvCUR(sv);
- has_utf8 = TRUE;
+ /* Here, 'uv' won't fit unless we convert to UTF-8.
+ * If we've only seen invariants so far, all we have to
+ * do is turn on the flag */
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ }
+ else {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
+
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+
+ /* Since we're having to grow here,
+ * make sure we have enough room for
+ * this escape and a NUL, so the
+ * code immediately below won't have
+ * to actually grow again */
+ UVCHR_SKIP(uv)
+ + (STRLEN)(send - s) + 1);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
+
+ has_above_latin1 = TRUE;
+ has_utf8 = TRUE;
}
- if (has_utf8) {
+ if (! has_utf8) {
+ *d++ = (char)uv;
+ utf8_variant_count++;
+ }
+ else {
/* Usually, there will already be enough room in 'sv'
* since such escapes are likely longer than any UTF-8
* sequence they can end up as. This isn't the case on
* EBCDIC where \x{40000000} contains 12 bytes, and the
* UTF-8 for it contains 14. And, we have to allow for
* a trailing NUL. It probably can't happen on ASCII
- * platforms, but be safe */
- const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+ * platforms, but be safe. See Note on sizing above. */
+ const STRLEN needed = d - SvPVX(sv)
+ + UVCHR_SKIP(uv)
+ + (send - s)
+ 1;
if (UNLIKELY(needed > SvLEN(sv))) {
SvCUR_set(sv, d - SvPVX_const(sv));
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
- }
- else {
- *d++ = (char)uv;
}
}
#ifdef EBCDIC
if (! has_utf8 && ( uv > 0xFF
|| PL_lex_inwhat != OP_TRANS))
{
+ /* See Note on sizing above. */
+ const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
+
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(
- sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
- d = SvPVX(sv) + SvCUR(sv);
+
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ extra);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
+
has_utf8 = TRUE;
+ has_above_latin1 = TRUE;
}
/* Add the (Unicode) code point to the output. */
(int) (e + 1 - start), start));
goto end_backslash_N;
}
+
+ if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
+ has_above_latin1 = TRUE;
+ }
+
}
else if (! SvUTF8(res)) {
/* Make sure \N{} return is UTF-8. This is because
* \N{} implies Unicode semantics, and scalars have
* to be in utf8 to guarantee those semantics; but
* not needed in tr/// */
- sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
+ sv_utf8_upgrade_flags(res, 0);
str = SvPV_const(res, len);
}
/* Upgrade destination to be utf8 if this new
* component is */
if (! has_utf8 && SvUTF8(res)) {
+ /* See Note on sizing above. */
+ const STRLEN extra = len + (send - s) + 1;
+
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
+
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(sv,
SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- len + (STRLEN)(send - s) + 1);
- d = SvPVX(sv) + SvCUR(sv);
+ extra);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
has_utf8 = TRUE;
} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
* to/from UTF-8.
*
* If the input has the same representation in UTF-8 as not, it will be
- * a single byte, and we don't care about UTF8ness; or if neither
- * source nor output is UTF-8, just copy the byte */
- if (NATIVE_BYTE_IS_INVARIANT((U8)(*s)) || (! this_utf8 && ! has_utf8))
- {
+ * a single byte, and we don't care about UTF8ness; just copy the byte */
+ if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
*d++ = *s++;
}
+ else if (! this_utf8 && ! has_utf8) {
+ /* If neither source nor output is UTF-8, is also a single byte,
+ * just copy it; but this byte counts should we later have to
+ * convert to UTF-8 */
+ *d++ = *s++;
+ utf8_variant_count++;
+ }
else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */
const STRLEN len = UTF8SKIP(s);
const UV nextuv = (this_utf8)
? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
: (UV) ((U8) *s);
- const STRLEN need = UVCHR_SKIP(nextuv);
+ STRLEN need = UVCHR_SKIP(nextuv);
+
if (!has_utf8) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- need + (STRLEN)(send - s) + 1);
- d = SvPVX(sv) + SvCUR(sv);
+
+ /* See Note on sizing above. */
+ need += (STRLEN)(send - s) + 1;
+
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ need);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
has_utf8 = TRUE;
} else if (need > len) {
/* encoded value larger than old, may need extra space (NOTE:
if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
sv_free(sv);
if (PL_in_my == KEY_our) {
- deprecate(":unique");
+ deprecate_disappears_in("5.28",
+ "Attribute \"unique\" is deprecated");
}
else
Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
}
else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
sv_free(sv);
- deprecate(":locked");
+ deprecate_disappears_in("5.28",
+ "Attribute \"locked\" is deprecated");
}
else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
sv_free(sv);
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump) {
- Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
- "dump() better written as CORE::dump()");
+ Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
+ "dump() better written as CORE::dump(). "
+ "dump() will no longer be available "
+ "in Perl 5.30");
}
gv = NULL;
gvp = 0;
case KEY_exists:
UNI(OP_EXISTS);
-
+
case KEY_exit:
UNI(OP_EXIT);
case KEY_last:
LOOPX(OP_LAST);
-
+
case KEY_lc:
UNI(OP_LC);
case KEY_pos:
UNIDOR(OP_POS);
-
+
case KEY_pack:
LOP(OP_PACK,XTERM);
case KEY_chomp:
UNI(OP_CHOMP);
-
+
case KEY_scalar:
UNI(OP_SCALAR);
|| ! SvOK(*cvp))
{
char *msg;
-
+
/* Here haven't found what we're looking for. If it is charnames,
* perhaps it needs to be loaded. Try doing that before giving up */
if (*key == 'c') {
"Use of /c modifier is meaningless without /g" );
}
- if (UNLIKELY((x_mod_count) > 1)) {
- yyerror("Only one /x regex modifier is allowed");
- }
-
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
return s;
}
}
- if (UNLIKELY((x_mod_count) > 1)) {
- yyerror("Only one /x regex modifier is allowed");
- }
-
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
}
else
term = '"';
if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
- deprecate("bare << to mean <<\"\"");
+ deprecate_fatal_in("5.28", "Use of bare << to mean <<\"\" is deprecated");
peek = s;
while (
isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
($*@) sub prototypes sub foo ($)
(stuff) sub attr parameters sub foo : attr(stuff)
<> readline or globs <FOO>, <>, <$fh>, or <*.c>
-
+
In most of these cases (all but <>, patterns and transliterate)
yylex() calls scan_str(). m// makes yylex() call scan_pat() which
calls scan_str(). s/// makes yylex() call scan_subst() which calls
*to = *s;
}
}
-
+
/* if the terminator isn't the same as the start character (e.g.,
matched brackets), we have to allow more in the quoting, and
be prepared for nested brackets.
else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
to[-1] = '\n';
#endif
-
+
/* if we're out of file, or a read fails, bail and reset the current
line marker so we can report where the unterminated string began
*/
bool floatit; /* boolean: int or float? */
const char *lastub = NULL; /* position of last underbar */
static const char* const number_too_long = "Number too long";
+ bool warned_about_underscore = 0;
+#define WARN_ABOUT_UNDERSCORE() \
+ do { \
+ if (!warned_about_underscore) { \
+ warned_about_underscore = 1; \
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
+ "Misplaced _ in number"); \
+ } \
+ } while(0)
/* Hexadecimal floating point.
*
* In many places (where we have quads and NV is IEEE 754 double)
}
if (*s == '_') {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s++;
}
/* _ are ignored -- but warned about if consecutive */
case '_':
if (lastub && s == lastub + 1)
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s++;
break;
out:
/* final misplaced underbar check */
- if (s[-1] == '_') {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
- }
+ if (s[-1] == '_')
+ WARN_ABOUT_UNDERSCORE();
if (UNLIKELY(HEXFP_PEEK(s))) {
/* Do sloppy (on the underbars) but quick detection
*/
if (*s == '_') {
if (lastub && s == lastub + 1)
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s++;
}
else {
}
/* final misplaced underbar check */
- if (lastub && s == lastub + 1) {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
- }
+ if (lastub && s == lastub + 1)
+ WARN_ABOUT_UNDERSCORE();
/* read a decimal portion if there is one. avoid
3..5 being interpreted as the number 3. followed
*d++ = *s++;
if (*s == '_') {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s;
}
Perl_croak(aTHX_ "%s", number_too_long);
if (*s == '_') {
if (lastub && s == lastub + 1)
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s;
}
else
*d++ = *s;
}
/* fractional part ending in underbar? */
- if (s[-1] == '_') {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
- }
+ if (s[-1] == '_')
+ WARN_ABOUT_UNDERSCORE();
if (*s == '.' && isDIGIT(s[1])) {
/* oops, it's really a v-string, but without the "v" */
s = start;
/* stray preinitial _ */
if (*s == '_') {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s++;
}
/* stray initial _ */
if (*s == '_') {
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s++;
}
else {
if (((lastub && s == lastub + 1)
|| (!isDIGIT(s[1]) && s[1] != '_')))
- Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ WARN_ABOUT_UNDERSCORE();
lastub = s++;
}
}