perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* dquote_static.c
	2	*
	3	* This file contains static functions that are related to
	4	* parsing double-quotish expressions, but are used in more than
	5	* one file.
	6	*
	7	* It is currently #included by regcomp.c and toke.c.
	8	*/
	9
	10	#define PERL_IN_DQUOTE_STATIC_C
	11	#include "embed.h"
	12
	13	/*
	14	- regcurly - a little FSA that accepts {\d+,?\d*}
	15	Pulled from regcomp.c.
	16	*/
	17	PERL_STATIC_INLINE I32
	18	S_regcurly(pTHX_ const char *s)
	19	{
	20	PERL_UNUSED_CONTEXT;
	21	PERL_ARGS_ASSERT_REGCURLY;
	22
	23	if (*s++ != '{')
	24	return FALSE;
	25	if (!isDIGIT(*s))
	26	return FALSE;
	27	while (isDIGIT(*s))
	28	s++;
	29	if (*s == ',') {
	30	s++;
	31	while (isDIGIT(*s))
	32	s++;
	33	}
	34
	35	return *s == '}';
	36	}
	37
	38	/* XXX Add documentation after final interface and behavior is decided */
	39	/* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
	40	U8 source = *current;
	41	*/
	42
	43	STATIC char
	44	S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
	45	{
	46
	47	U8 result;
	48
	49	if (! isPRINT_A(source)) {
	50	Perl_croak(aTHX_ "%s",
	51	"Character following \"\\c\" must be printable ASCII");
	52	}
	53	else if (source == '{') {
	54	assert(isPRINT_A(toCTRL('{')));
	55
	56	/* diag_listed_as: Use "%s" instead of "%s" */
	57	Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", toCTRL('{'));
	58	}
	59
	60	result = toCTRL(source);
	61	if (output_warning && isPRINT_A(result)) {
	62	U8 clearer[3];
	63	U8 i = 0;
	64	if (! isWORDCHAR(result)) {
	65	clearer[i++] = '\\';
	66	}
	67	clearer[i++] = result;
	68	clearer[i++] = '\0';
	69
	70	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
	71	"\"\\c%c\" is more clearly written simply as \"%s\"",
	72	source,
	73	clearer);
	74	}
	75
	76	return result;
	77	}
	78
	79	STATIC bool
	80	S_grok_bslash_o(pTHX_ char *s, UV uv, const char** error_msg,
	81	const bool output_warning, const bool strict,
	82	const bool silence_non_portable,
	83	const bool UTF)
	84	{
	85
	86	/* Documentation to be supplied when interface nailed down finally
	87	* This returns FALSE if there is an error which the caller need not recover
	88	* from; , otherwise TRUE. In either case the caller should look at *len
	89	* On input:
	90	* s is the address of a pointer to a NULL terminated string that begins
	91	* with 'o', and the previous character was a backslash. At exit, *s
	92	* will be advanced to the byte just after those absorbed by this
	93	* function. Hence the caller can continue parsing from there. In
	94	* the case of an error, this routine has generally positioned *s to
	95	* point just to the right of the first bad spot, so that a message
	96	* that has a "<--" to mark the spot will be correctly positioned.
	97	* uv points to a UV that will hold the output value, valid only if the
	98	* return from the function is TRUE
	99	* error_msg is a pointer that will be set to an internal buffer giving an
	100	* error message upon failure (the return is FALSE). Untouched if
	101	* function succeeds
	102	* output_warning says whether to output any warning messages, or suppress
	103	* them
	104	* strict is true if this should fail instead of warn if there are
	105	* non-octal digits within the braces
	106	* silence_non_portable is true if to suppress warnings about the code
	107	* point returned being too large to fit on all platforms.
	108	* UTF is true iff the string *s is encoded in UTF-8.
	109	*/
	110	char* e;
	111	STRLEN numbers_len;
	112	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	113	\| PERL_SCAN_DISALLOW_PREFIX
	114	/* XXX Until the message is improved in grok_oct, handle errors
	115	* ourselves */
	116	\| PERL_SCAN_SILENT_ILLDIGIT;
	117
	118	PERL_ARGS_ASSERT_GROK_BSLASH_O;
	119
	120
	121	assert(**s == 'o');
	122	(*s)++;
	123
	124	if (**s != '{') {
	125	*error_msg = "Missing braces on \\o{}";
	126	return FALSE;
	127	}
	128
	129	e = strchr(*s, '}');
	130	if (!e) {
	131	(s)++; / Move past the '{' */
	132	while (isOCTAL(*s)) { / Position beyond the legal digits */
	133	(*s)++;
	134	}
	135	*error_msg = "Missing right brace on \\o{";
	136	return FALSE;
	137	}
	138
	139	(s)++; / Point to expected first digit (could be first byte of utf8
	140	sequence if not a digit) */
	141	numbers_len = e - *s;
	142	if (numbers_len == 0) {
	143	(s)++; / Move past the } */
	144	*error_msg = "Number with no digits";
	145	return FALSE;
	146	}
	147
	148	if (silence_non_portable) {
	149	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
	150	}
	151
	152	uv = grok_oct(s, &numbers_len, &flags, NULL);
	153	/* Note that if has non-octal, will ignore everything starting with that up
	154	* to the '}' */
	155
	156	if (numbers_len != (STRLEN) (e - *s)) {
	157	if (strict) {
	158	*s += numbers_len;
	159	s += (UTF) ? UTF8SKIP(s) : (STRLEN) 1;
	160	*error_msg = "Non-octal character";
	161	return FALSE;
	162	}
	163	else if (output_warning) {
	164	Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
	165	/* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
	166	"Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
	167	(s + numbers_len),
	168	(int) numbers_len,
	169	*s);
	170	}
	171	}
	172
	173	/* Return past the '}' */
	174	*s = e + 1;
	175
	176	return TRUE;
	177	}
	178
	179	PERL_STATIC_INLINE bool
	180	S_grok_bslash_x(pTHX_ char *s, UV uv, const char** error_msg,
	181	const bool output_warning, const bool strict,
	182	const bool silence_non_portable,
	183	const bool UTF)
	184	{
	185
	186	/* Documentation to be supplied when interface nailed down finally
	187	* This returns FALSE if there is an error which the caller need not recover
	188	* from; , otherwise TRUE. In either case the caller should look at *len
	189	* On input:
	190	* s is the address of a pointer to a NULL terminated string that begins
	191	* with 'x', and the previous character was a backslash. At exit, *s
	192	* will be advanced to the byte just after those absorbed by this
	193	* function. Hence the caller can continue parsing from there. In
	194	* the case of an error, this routine has generally positioned *s to
	195	* point just to the right of the first bad spot, so that a message
	196	* that has a "<--" to mark the spot will be correctly positioned.
	197	* uv points to a UV that will hold the output value, valid only if the
	198	* return from the function is TRUE
	199	* error_msg is a pointer that will be set to an internal buffer giving an
	200	* error message upon failure (the return is FALSE). Untouched if
	201	* function succeeds
	202	* output_warning says whether to output any warning messages, or suppress
	203	* them
	204	* strict is true if anything out of the ordinary should cause this to
	205	* fail instead of warn or be silent. For example, it requires
	206	* exactly 2 digits following the \x (when there are no braces).
	207	* 3 digits could be a mistake, so is forbidden in this mode.
	208	* silence_non_portable is true if to suppress warnings about the code
	209	* point returned being too large to fit on all platforms.
	210	* UTF is true iff the string *s is encoded in UTF-8.
	211	*/
	212	char* e;
	213	STRLEN numbers_len;
	214	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	215
	216	PERL_ARGS_ASSERT_GROK_BSLASH_X;
	217
	218	PERL_UNUSED_ARG(output_warning);
	219
	220	assert(**s == 'x');
	221	(*s)++;
	222
	223	if (strict) {
	224	flags \|= PERL_SCAN_SILENT_ILLDIGIT;
	225	}
	226
	227	if (**s != '{') {
	228	STRLEN len = (strict) ? 3 : 2;
	229
	230	uv = grok_hex(s, &len, &flags, NULL);
	231	*s += len;
	232	if (strict && len != 2) {
	233	if (len < 2) {
	234	s += (UTF) ? UTF8SKIP(s) : 1;
	235	*error_msg = "Non-hex character";
	236	}
	237	else {
	238	*error_msg = "Use \\x{...} for more than two hex characters";
	239	}
	240	return FALSE;
	241	}
	242	return TRUE;
	243	}
	244
	245	e = strchr(*s, '}');
	246	if (!e) {
	247	(s)++; / Move past the '{' */
	248	while (isXDIGIT(*s)) { / Position beyond the legal digits */
	249	(*s)++;
	250	}
	251	/* XXX The corresponding message above for \o is just '\\o{'; other
	252	* messages for other constructs include the '}', so are inconsistent.
	253	*/
	254	*error_msg = "Missing right brace on \\x{}";
	255	return FALSE;
	256	}
	257
	258	(s)++; / Point to expected first digit (could be first byte of utf8
	259	sequence if not a digit) */
	260	numbers_len = e - *s;
	261	if (numbers_len == 0) {
	262	if (strict) {
	263	(s)++; / Move past the } */
	264	*error_msg = "Number with no digits";
	265	return FALSE;
	266	}
	267	return TRUE;
	268	}
	269
	270	flags \|= PERL_SCAN_ALLOW_UNDERSCORES;
	271	if (silence_non_portable) {
	272	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
	273	}
	274
	275	uv = grok_hex(s, &numbers_len, &flags, NULL);
	276	/* Note that if has non-hex, will ignore everything starting with that up
	277	* to the '}' */
	278
	279	if (strict && numbers_len != (STRLEN) (e - *s)) {
	280	*s += numbers_len;
	281	s += (UTF) ? UTF8SKIP(s) : 1;
	282	*error_msg = "Non-hex character";
	283	return FALSE;
	284	}
	285
	286	/* Return past the '}' */
	287	*s = e + 1;
	288
	289	return TRUE;
	290	}
	291
	292	STATIC char*
	293	S_form_short_octal_warning(pTHX_
	294	const char * const s, /* Points to first non-octal */
	295	const STRLEN len /* Length of octals string, so
	296	(s-len) points to first
	297	octal */
	298	) {
	299	/* Return a character string consisting of a warning message for when a
	300	* string constant in octal is weird, like "\078". */
	301
	302	const char * sans_leading_zeros = s - len;
	303
	304	PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
	305
	306	assert(s == '8' \|\| s == '9');
	307
	308	/* Remove the leading zeros, retaining one zero so won't be zero length */
	309	while (*sans_leading_zeros == '0') sans_leading_zeros++;
	310	if (sans_leading_zeros == s) {
	311	sans_leading_zeros--;
	312	}
	313
	314	return Perl_form(aTHX_
	315	"'%.s' resolved to '\\o{%.s}%c'",
	316	(int) (len + 2), s - len - 1,
	317	(int) (s - sans_leading_zeros), sans_leading_zeros,
	318	*s);
	319	}
	320
	321	/*
	322	* Local variables:
	323	* c-indentation-style: bsd
	324	* c-basic-offset: 4
	325	* indent-tabs-mode: nil
	326	* End:
	327	*
	328	* ex: set ts=8 sts=4 sw=4 et:
	329	*/