perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* dquote_static.c
	2	*
	3	* This file contains static functions that are related to
	4	* parsing double-quotish expressions, but are used in more than
	5	* one file.
	6	*
	7	* It is currently #included by regcomp.c and toke.c.
	8	*/
	9
	10	#define PERL_IN_DQUOTE_STATIC_C
	11	#include "embed.h"
	12
	13	/*
	14	- regcurly - a little FSA that accepts {\d+,?\d*}
	15	Pulled from regcomp.c.
	16	*/
	17	PERL_STATIC_INLINE I32
	18	S_regcurly(const char *s)
	19	{
	20	PERL_ARGS_ASSERT_REGCURLY;
	21
	22	if (*s++ != '{')
	23	return FALSE;
	24	if (!isDIGIT(*s))
	25	return FALSE;
	26	while (isDIGIT(*s))
	27	s++;
	28	if (*s == ',') {
	29	s++;
	30	while (isDIGIT(*s))
	31	s++;
	32	}
	33
	34	return *s == '}';
	35	}
	36
	37	/* XXX Add documentation after final interface and behavior is decided */
	38	/* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
	39	U8 source = *current;
	40	*/
	41
	42	STATIC char
	43	S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
	44	{
	45
	46	U8 result;
	47
	48	if (! isPRINT_A(source)) {
	49	Perl_croak(aTHX_ "%s",
	50	"Character following \"\\c\" must be printable ASCII");
	51	}
	52	else if (source == '{') {
	53	assert(isPRINT_A(toCTRL('{')));
	54
	55	/* diag_listed_as: Use "%s" instead of "%s" */
	56	Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", toCTRL('{'));
	57	}
	58
	59	result = toCTRL(source);
	60	if (output_warning && isPRINT_A(result)) {
	61	U8 clearer[3];
	62	U8 i = 0;
	63	if (! isWORDCHAR(result)) {
	64	clearer[i++] = '\\';
	65	}
	66	clearer[i++] = result;
	67	clearer[i++] = '\0';
	68
	69	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
	70	"\"\\c%c\" is more clearly written simply as \"%s\"",
	71	source,
	72	clearer);
	73	}
	74
	75	return result;
	76	}
	77
	78	STATIC bool
	79	S_grok_bslash_o(pTHX_ char *s, UV uv, const char** error_msg,
	80	const bool output_warning, const bool strict,
	81	const bool silence_non_portable,
	82	const bool UTF)
	83	{
	84
	85	/* Documentation to be supplied when interface nailed down finally
	86	* This returns FALSE if there is an error which the caller need not recover
	87	* from; , otherwise TRUE. In either case the caller should look at *len
	88	* On input:
	89	* s is the address of a pointer to a NULL terminated string that begins
	90	* with 'o', and the previous character was a backslash. At exit, *s
	91	* will be advanced to the byte just after those absorbed by this
	92	* function. Hence the caller can continue parsing from there. In
	93	* the case of an error, this routine has generally positioned *s to
	94	* point just to the right of the first bad spot, so that a message
	95	* that has a "<--" to mark the spot will be correctly positioned.
	96	* uv points to a UV that will hold the output value, valid only if the
	97	* return from the function is TRUE
	98	* error_msg is a pointer that will be set to an internal buffer giving an
	99	* error message upon failure (the return is FALSE). Untouched if
	100	* function succeeds
	101	* output_warning says whether to output any warning messages, or suppress
	102	* them
	103	* strict is true if this should fail instead of warn if there are
	104	* non-octal digits within the braces
	105	* silence_non_portable is true if to suppress warnings about the code
	106	* point returned being too large to fit on all platforms.
	107	* UTF is true iff the string *s is encoded in UTF-8.
	108	*/
	109	char* e;
	110	STRLEN numbers_len;
	111	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	112	\| PERL_SCAN_DISALLOW_PREFIX
	113	/* XXX Until the message is improved in grok_oct, handle errors
	114	* ourselves */
	115	\| PERL_SCAN_SILENT_ILLDIGIT;
	116
	117	PERL_ARGS_ASSERT_GROK_BSLASH_O;
	118
	119
	120	assert(**s == 'o');
	121	(*s)++;
	122
	123	if (**s != '{') {
	124	*error_msg = "Missing braces on \\o{}";
	125	return FALSE;
	126	}
	127
	128	e = strchr(*s, '}');
	129	if (!e) {
	130	(s)++; / Move past the '{' */
	131	while (isOCTAL(*s)) { / Position beyond the legal digits */
	132	(*s)++;
	133	}
	134	*error_msg = "Missing right brace on \\o{";
	135	return FALSE;
	136	}
	137
	138	(s)++; / Point to expected first digit (could be first byte of utf8
	139	sequence if not a digit) */
	140	numbers_len = e - *s;
	141	if (numbers_len == 0) {
	142	(s)++; / Move past the } */
	143	*error_msg = "Number with no digits";
	144	return FALSE;
	145	}
	146
	147	if (silence_non_portable) {
	148	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
	149	}
	150
	151	uv = grok_oct(s, &numbers_len, &flags, NULL);
	152	/* Note that if has non-octal, will ignore everything starting with that up
	153	* to the '}' */
	154
	155	if (numbers_len != (STRLEN) (e - *s)) {
	156	if (strict) {
	157	*s += numbers_len;
	158	s += (UTF) ? UTF8SKIP(s) : (STRLEN) 1;
	159	*error_msg = "Non-octal character";
	160	return FALSE;
	161	}
	162	else if (output_warning) {
	163	Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
	164	/* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
	165	"Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
	166	(s + numbers_len),
	167	(int) numbers_len,
	168	*s);
	169	}
	170	}
	171
	172	/* Return past the '}' */
	173	*s = e + 1;
	174
	175	return TRUE;
	176	}
	177
	178	PERL_STATIC_INLINE bool
	179	S_grok_bslash_x(pTHX_ char *s, UV uv, const char** error_msg,
	180	const bool output_warning, const bool strict,
	181	const bool silence_non_portable,
	182	const bool UTF)
	183	{
	184
	185	/* Documentation to be supplied when interface nailed down finally
	186	* This returns FALSE if there is an error which the caller need not recover
	187	* from; , otherwise TRUE. In either case the caller should look at *len
	188	* On input:
	189	* s is the address of a pointer to a NULL terminated string that begins
	190	* with 'x', and the previous character was a backslash. At exit, *s
	191	* will be advanced to the byte just after those absorbed by this
	192	* function. Hence the caller can continue parsing from there. In
	193	* the case of an error, this routine has generally positioned *s to
	194	* point just to the right of the first bad spot, so that a message
	195	* that has a "<--" to mark the spot will be correctly positioned.
	196	* uv points to a UV that will hold the output value, valid only if the
	197	* return from the function is TRUE
	198	* error_msg is a pointer that will be set to an internal buffer giving an
	199	* error message upon failure (the return is FALSE). Untouched if
	200	* function succeeds
	201	* output_warning says whether to output any warning messages, or suppress
	202	* them
	203	* strict is true if anything out of the ordinary should cause this to
	204	* fail instead of warn or be silent. For example, it requires
	205	* exactly 2 digits following the \x (when there are no braces).
	206	* 3 digits could be a mistake, so is forbidden in this mode.
	207	* silence_non_portable is true if to suppress warnings about the code
	208	* point returned being too large to fit on all platforms.
	209	* UTF is true iff the string *s is encoded in UTF-8.
	210	*/
	211	char* e;
	212	STRLEN numbers_len;
	213	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	214
	215	PERL_ARGS_ASSERT_GROK_BSLASH_X;
	216
	217	PERL_UNUSED_ARG(output_warning);
	218
	219	assert(**s == 'x');
	220	(*s)++;
	221
	222	if (strict) {
	223	flags \|= PERL_SCAN_SILENT_ILLDIGIT;
	224	}
	225
	226	if (**s != '{') {
	227	STRLEN len = (strict) ? 3 : 2;
	228
	229	uv = grok_hex(s, &len, &flags, NULL);
	230	*s += len;
	231	if (strict && len != 2) {
	232	if (len < 2) {
	233	s += (UTF) ? UTF8SKIP(s) : 1;
	234	*error_msg = "Non-hex character";
	235	}
	236	else {
	237	*error_msg = "Use \\x{...} for more than two hex characters";
	238	}
	239	return FALSE;
	240	}
	241	return TRUE;
	242	}
	243
	244	e = strchr(*s, '}');
	245	if (!e) {
	246	(s)++; / Move past the '{' */
	247	while (isXDIGIT(*s)) { / Position beyond the legal digits */
	248	(*s)++;
	249	}
	250	/* XXX The corresponding message above for \o is just '\\o{'; other
	251	* messages for other constructs include the '}', so are inconsistent.
	252	*/
	253	*error_msg = "Missing right brace on \\x{}";
	254	return FALSE;
	255	}
	256
	257	(s)++; / Point to expected first digit (could be first byte of utf8
	258	sequence if not a digit) */
	259	numbers_len = e - *s;
	260	if (numbers_len == 0) {
	261	if (strict) {
	262	(s)++; / Move past the } */
	263	*error_msg = "Number with no digits";
	264	return FALSE;
	265	}
	266	return TRUE;
	267	}
	268
	269	flags \|= PERL_SCAN_ALLOW_UNDERSCORES;
	270	if (silence_non_portable) {
	271	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
	272	}
	273
	274	uv = grok_hex(s, &numbers_len, &flags, NULL);
	275	/* Note that if has non-hex, will ignore everything starting with that up
	276	* to the '}' */
	277
	278	if (strict && numbers_len != (STRLEN) (e - *s)) {
	279	*s += numbers_len;
	280	s += (UTF) ? UTF8SKIP(s) : 1;
	281	*error_msg = "Non-hex character";
	282	return FALSE;
	283	}
	284
	285	/* Return past the '}' */
	286	*s = e + 1;
	287
	288	return TRUE;
	289	}
	290
	291	STATIC char*
	292	S_form_short_octal_warning(pTHX_
	293	const char * const s, /* Points to first non-octal */
	294	const STRLEN len /* Length of octals string, so
	295	(s-len) points to first
	296	octal */
	297	) {
	298	/* Return a character string consisting of a warning message for when a
	299	* string constant in octal is weird, like "\078". */
	300
	301	const char * sans_leading_zeros = s - len;
	302
	303	PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
	304
	305	assert(s == '8' \|\| s == '9');
	306
	307	/* Remove the leading zeros, retaining one zero so won't be zero length */
	308	while (*sans_leading_zeros == '0') sans_leading_zeros++;
	309	if (sans_leading_zeros == s) {
	310	sans_leading_zeros--;
	311	}
	312
	313	return Perl_form(aTHX_
	314	"'%.s' resolved to '\\o{%.s}%c'",
	315	(int) (len + 2), s - len - 1,
	316	(int) (s - sans_leading_zeros), sans_leading_zeros,
	317	*s);
	318	}
	319
	320	/*
	321	* Local variables:
	322	* c-indentation-style: bsd
	323	* c-basic-offset: 4
	324	* indent-tabs-mode: nil
	325	* End:
	326	*
	327	* ex: set ts=8 sts=4 sw=4 et:
	328	*/