perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* dquote_static.c
	2	*
	3	* This file contains static functions that are related to
	4	* parsing double-quotish expressions, but are used in more than
	5	* one file.
	6	*
	7	* It is currently #included by regcomp.c and toke.c.
	8	*/
	9
	10	#define PERL_IN_DQUOTE_STATIC_C
	11	#include "embed.h"
	12
	13	/*
	14	- regcurly - a little FSA that accepts {\d+,?\d*}
	15	Pulled from regcomp.c.
	16	*/
	17	PERL_STATIC_INLINE I32
	18	S_regcurly(pTHX_ const char *s)
	19	{
	20	PERL_ARGS_ASSERT_REGCURLY;
	21
	22	if (*s++ != '{')
	23	return FALSE;
	24	if (!isDIGIT(*s))
	25	return FALSE;
	26	while (isDIGIT(*s))
	27	s++;
	28	if (*s == ',') {
	29	s++;
	30	while (isDIGIT(*s))
	31	s++;
	32	}
	33	if (*s != '}')
	34	return FALSE;
	35	return TRUE;
	36	}
	37
	38	/* XXX Add documentation after final interface and behavior is decided */
	39	/* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
	40	U8 source = *current;
	41	*/
	42
	43	STATIC char
	44	S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
	45	{
	46
	47	U8 result;
	48
	49	if (utf8) {
	50	/* Trying to deprecate non-ASCII usages. This construct has never
	51	* worked for a utf8 variant. So, even though are accepting non-ASCII
	52	* Latin1 in 5.14, no need to make them work under utf8 */
	53	if (! isASCII(source)) {
	54	Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII");
	55	}
	56	}
	57
	58	result = toCTRL(source);
	59	if (! isASCII(source)) {
	60	Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
	61	"Character following \"\\c\" must be ASCII");
	62	}
	63	else if (! isCNTRL(result) && output_warning) {
	64	if (source == '{') {
	65	Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
	66	"\"\\c{\" is deprecated and is more clearly written as \";\"");
	67	}
	68	else {
	69	U8 clearer[3];
	70	U8 i = 0;
	71	if (! isWORDCHAR(result)) {
	72	clearer[i++] = '\\';
	73	}
	74	clearer[i++] = result;
	75	clearer[i++] = '\0';
	76
	77	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
	78	"\"\\c%c\" is more clearly written simply as \"%s\"",
	79	source,
	80	clearer);
	81	}
	82	}
	83
	84	return result;
	85	}
	86
	87	STATIC bool
	88	S_grok_bslash_o(pTHX_ char *s, UV uv, const char** error_msg,
	89	const bool output_warning)
	90	{
	91
	92	/* Documentation to be supplied when interface nailed down finally
	93	* This returns FALSE if there is an error which the caller need not recover
	94	* from; , otherwise TRUE. In either case the caller should look at *len
	95	* On input:
	96	* s is the address of a pointer to a NULL terminated string that begins
	97	* with 'o', and the previous character was a backslash. At exit, *s
	98	* will be advanced to the byte just after those absorbed by this
	99	* function. Hence the caller can continue parsing from there. In
	100	* the case of an error, this routine has generally positioned *s to
	101	* point just to the right of the first bad spot, so that a message
	102	* that has a "<--" to mark the spot will be correctly positioned.
	103	* uv points to a UV that will hold the output value, valid only if the
	104	* return from the function is TRUE
	105	* error_msg is a pointer that will be set to an internal buffer giving an
	106	* error message upon failure (the return is FALSE). Untouched if
	107	* function succeeds
	108	* output_warning says whether to output any warning messages, or suppress
	109	* them
	110	*/
	111	char* e;
	112	STRLEN numbers_len;
	113	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	114	\| PERL_SCAN_DISALLOW_PREFIX
	115	/* XXX Until the message is improved in grok_oct, handle errors
	116	* ourselves */
	117	\| PERL_SCAN_SILENT_ILLDIGIT;
	118
	119	PERL_ARGS_ASSERT_GROK_BSLASH_O;
	120
	121
	122	assert(**s == 'o');
	123	(*s)++;
	124
	125	if (**s != '{') {
	126	*error_msg = "Missing braces on \\o{}";
	127	return FALSE;
	128	}
	129
	130	e = strchr(*s, '}');
	131	if (!e) {
	132	(s)++; / Move past the '{' */
	133	while (isOCTAL(*s)) { / Position beyond the legal digits */
	134	(*s)++;
	135	}
	136	*error_msg = "Missing right brace on \\o{";
	137	return FALSE;
	138	}
	139
	140	(s)++; / Point to expected first digit (could be first byte of utf8
	141	sequence if not a digit) */
	142	numbers_len = e - *s;
	143	if (numbers_len == 0) {
	144	(s)++; / Move past the } */
	145	*error_msg = "Number with no digits";
	146	return FALSE;
	147	}
	148
	149	uv = grok_oct(s, &numbers_len, &flags, NULL);
	150	/* Note that if has non-octal, will ignore everything starting with that up
	151	* to the '}' */
	152
	153	if (output_warning && numbers_len != (STRLEN) (e - *s)) {
	154	Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
	155	/* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
	156	"Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
	157	(s + numbers_len),
	158	(int) numbers_len,
	159	*s);
	160	}
	161
	162	/* Return past the '}' */
	163	*s = e + 1;
	164
	165	return TRUE;
	166	}
	167
	168	PERL_STATIC_INLINE bool
	169	S_grok_bslash_x(pTHX_ char *s, UV uv, const char** error_msg,
	170	const bool output_warning)
	171	{
	172
	173	/* Documentation to be supplied when interface nailed down finally
	174	* This returns FALSE if there is an error which the caller need not recover
	175	* from; , otherwise TRUE. In either case the caller should look at *len
	176	* On input:
	177	* s is the address of a pointer to a NULL terminated string that begins
	178	* with 'x', and the previous character was a backslash. At exit, *s
	179	* will be advanced to the byte just after those absorbed by this
	180	* function. Hence the caller can continue parsing from there. In
	181	* the case of an error, this routine has generally positioned *s to
	182	* point just to the right of the first bad spot, so that a message
	183	* that has a "<--" to mark the spot will be correctly positioned.
	184	* uv points to a UV that will hold the output value, valid only if the
	185	* return from the function is TRUE
	186	* error_msg is a pointer that will be set to an internal buffer giving an
	187	* error message upon failure (the return is FALSE). Untouched if
	188	* function succeeds
	189	* output_warning says whether to output any warning messages, or suppress
	190	* them
	191	*/
	192	char* e;
	193	STRLEN numbers_len;
	194	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	195	\| PERL_SCAN_DISALLOW_PREFIX;
	196
	197	PERL_ARGS_ASSERT_GROK_BSLASH_X;
	198
	199	PERL_UNUSED_ARG(output_warning);
	200
	201	assert(**s == 'x');
	202	(*s)++;
	203
	204	if (**s != '{') {
	205	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	206	STRLEN len = 2;
	207	uv = grok_hex(s, &len, &flags, NULL);
	208	*s += len;
	209	return TRUE;
	210	}
	211
	212	e = strchr(*s, '}');
	213	if (!e) {
	214	(s)++; / Move past the '{' */
	215	while (isXDIGIT(*s)) { / Position beyond the legal digits */
	216	(*s)++;
	217	}
	218	/* XXX The corresponding message above for \o is just '\\o{'; other
	219	* messages for other constructs include the '}', so are inconsistent.
	220	*/
	221	*error_msg = "Missing right brace on \\x{}";
	222	return FALSE;
	223	}
	224
	225	(s)++; / Point to expected first digit (could be first byte of utf8
	226	sequence if not a digit) */
	227	numbers_len = e - *s;
	228	uv = grok_hex(s, &numbers_len, &flags, NULL);
	229	/* Note that if has non-hex, will ignore everything starting with that up
	230	* to the '}' */
	231
	232	/* Return past the '}' */
	233	*s = e + 1;
	234
	235	return TRUE;
	236	}
	237
	238	/*
	239	* Local variables:
	240	* c-indentation-style: bsd
	241	* c-basic-offset: 4
	242	* indent-tabs-mode: nil
	243	* End:
	244	*
	245	* ex: set ts=8 sts=4 sw=4 et:
	246	*/