perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* dquote.c
	2	*
	3	* This file contains functions that are related to
	4	* parsing double-quotish expressions.
	5	*
	6	*/
	7
	8	#include "EXTERN.h"
	9	#define PERL_IN_DQUOTE_C
	10	#include "perl.h"
	11	#include "dquote_inline.h"
	12
	13	/* XXX Add documentation after final interface and behavior is decided */
	14	/* May want to show context for error, so would pass S_grok_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
	15	U8 source = *current;
	16	*/
	17
	18	char
	19	Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
	20	{
	21
	22	U8 result;
	23
	24	if (! isPRINT_A(source)) {
	25	Perl_croak(aTHX_ "%s",
	26	"Character following \"\\c\" must be printable ASCII");
	27	}
	28	else if (source == '{') {
	29	const char control = toCTRL('{');
	30	if (isPRINT_A(control)) {
	31	/* diag_listed_as: Use "%s" instead of "%s" */
	32	Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", control);
	33	}
	34	else {
	35	Perl_croak(aTHX_ "Sequence \"\\c{\" invalid");
	36	}
	37	}
	38
	39	result = toCTRL(source);
	40	if (output_warning && isPRINT_A(result)) {
	41	U8 clearer[3];
	42	U8 i = 0;
	43	if (! isWORDCHAR(result)) {
	44	clearer[i++] = '\\';
	45	}
	46	clearer[i++] = result;
	47	clearer[i++] = '\0';
	48
	49	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
	50	"\"\\c%c\" is more clearly written simply as \"%s\"",
	51	source,
	52	clearer);
	53	}
	54
	55	return result;
	56	}
	57
	58	bool
	59	Perl_grok_bslash_o(pTHX_ char *s, const char const send, UV *uv,
	60	const char** error_msg,
	61	const bool output_warning, const bool strict,
	62	const bool silence_non_portable,
	63	const bool UTF)
	64	{
	65
	66	/* Documentation to be supplied when interface nailed down finally
	67	* This returns FALSE if there is an error which the caller need not recover
	68	* from; otherwise TRUE. In either case the caller should look at *len [???].
	69	* It guarantees that the returned codepoint, *uv, when expressed as
	70	* utf8 bytes, would fit within the skipped "\o{...}" bytes.
	71	* On input:
	72	* s is the address of a pointer to a string. **s is 'o', and the
	73	* previous character was a backslash. At exit, *s will be advanced
	74	* to the byte just after those absorbed by this function. Hence the
	75	* caller can continue parsing from there. In the case of an error,
	76	* this routine has generally positioned *s to point just to the right
	77	* of the first bad spot, so that a message that has a "<--" to mark
	78	* the spot will be correctly positioned.
	79	* send - 1 gives a limit in *s that this function is not permitted to
	80	* look beyond. That is, the function may look at bytes only in the
	81	* range *s..send-1
	82	* uv points to a UV that will hold the output value, valid only if the
	83	* return from the function is TRUE
	84	* error_msg is a pointer that will be set to an internal buffer giving an
	85	* error message upon failure (the return is FALSE). Untouched if
	86	* function succeeds
	87	* output_warning says whether to output any warning messages, or suppress
	88	* them
	89	* strict is true if this should fail instead of warn if there are
	90	* non-octal digits within the braces
	91	* silence_non_portable is true if to suppress warnings about the code
	92	* point returned being too large to fit on all platforms.
	93	* UTF is true iff the string *s is encoded in UTF-8.
	94	*/
	95	char* e;
	96	STRLEN numbers_len;
	97	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	98	\| PERL_SCAN_DISALLOW_PREFIX
	99	/* XXX Until the message is improved in grok_oct, handle errors
	100	* ourselves */
	101	\| PERL_SCAN_SILENT_ILLDIGIT;
	102
	103	PERL_ARGS_ASSERT_GROK_BSLASH_O;
	104
	105	assert((s - 1) == '\\');
	106	assert(* *s == 'o');
	107	(*s)++;
	108
	109	if (**s != '{') {
	110	*error_msg = "Missing braces on \\o{}";
	111	return FALSE;
	112	}
	113
	114	e = (char ) memchr(s, '}', send - *s);
	115	if (!e) {
	116	(s)++; / Move past the '{' */
	117	while (isOCTAL(*s)) { / Position beyond the legal digits */
	118	(*s)++;
	119	}
	120	*error_msg = "Missing right brace on \\o{";
	121	return FALSE;
	122	}
	123
	124	(s)++; / Point to expected first digit (could be first byte of utf8
	125	sequence if not a digit) */
	126	numbers_len = e - *s;
	127	if (numbers_len == 0) {
	128	(s)++; / Move past the } */
	129	*error_msg = "Number with no digits";
	130	return FALSE;
	131	}
	132
	133	if (silence_non_portable) {
	134	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
	135	}
	136
	137	uv = grok_oct(s, &numbers_len, &flags, NULL);
	138	/* Note that if has non-octal, will ignore everything starting with that up
	139	* to the '}' */
	140
	141	if (numbers_len != (STRLEN) (e - *s)) {
	142	if (strict) {
	143	*s += numbers_len;
	144	s += (UTF) ? UTF8SKIP(s) : (STRLEN) 1;
	145	*error_msg = "Non-octal character";
	146	return FALSE;
	147	}
	148	else if (output_warning) {
	149	Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
	150	/* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
	151	"Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
	152	(s + numbers_len),
	153	(int) numbers_len,
	154	*s);
	155	}
	156	}
	157
	158	/* Return past the '}' */
	159	*s = e + 1;
	160
	161	return TRUE;
	162	}
	163
	164	bool
	165	Perl_grok_bslash_x(pTHX_ char *s, const char const send, UV *uv,
	166	const char** error_msg,
	167	const bool output_warning, const bool strict,
	168	const bool silence_non_portable,
	169	const bool UTF)
	170	{
	171
	172	/* Documentation to be supplied when interface nailed down finally
	173	* This returns FALSE if there is an error which the caller need not recover
	174	* from; otherwise TRUE.
	175	* It guarantees that the returned codepoint, *uv, when expressed as
	176	* utf8 bytes, would fit within the skipped "\x{...}" bytes.
	177	*
	178	* On input:
	179	* s is the address of a pointer to a string. **s is 'x', and the
	180	* previous character was a backslash. At exit, *s will be advanced
	181	* to the byte just after those absorbed by this function. Hence the
	182	* caller can continue parsing from there. In the case of an error,
	183	* this routine has generally positioned *s to point just to the right
	184	* of the first bad spot, so that a message that has a "<--" to mark
	185	* the spot will be correctly positioned.
	186	* send - 1 gives a limit in *s that this function is not permitted to
	187	* look beyond. That is, the function may look at bytes only in the
	188	* range *s..send-1
	189	* uv points to a UV that will hold the output value, valid only if the
	190	* return from the function is TRUE
	191	* error_msg is a pointer that will be set to an internal buffer giving an
	192	* error message upon failure (the return is FALSE). Untouched if
	193	* function succeeds
	194	* output_warning says whether to output any warning messages, or suppress
	195	* them
	196	* strict is true if anything out of the ordinary should cause this to
	197	* fail instead of warn or be silent. For example, it requires
	198	* exactly 2 digits following the \x (when there are no braces).
	199	* 3 digits could be a mistake, so is forbidden in this mode.
	200	* silence_non_portable is true if to suppress warnings about the code
	201	* point returned being too large to fit on all platforms.
	202	* UTF is true iff the string *s is encoded in UTF-8.
	203	*/
	204	char* e;
	205	STRLEN numbers_len;
	206	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	207
	208
	209	PERL_ARGS_ASSERT_GROK_BSLASH_X;
	210
	211	assert((s - 1) == '\\');
	212	assert(* *s == 'x');
	213	(*s)++;
	214
	215	if (strict \|\| ! output_warning) {
	216	flags \|= PERL_SCAN_SILENT_ILLDIGIT;
	217	}
	218
	219	if (**s != '{') {
	220	STRLEN len = (strict) ? 3 : 2;
	221
	222	uv = grok_hex(s, &len, &flags, NULL);
	223	*s += len;
	224	if (strict && len != 2) {
	225	if (len < 2) {
	226	s += (UTF) ? UTF8SKIP(s) : 1;
	227	*error_msg = "Non-hex character";
	228	}
	229	else {
	230	*error_msg = "Use \\x{...} for more than two hex characters";
	231	}
	232	return FALSE;
	233	}
	234	return TRUE;
	235	}
	236
	237	e = (char ) memchr(s, '}', send - *s);
	238	if (!e) {
	239	(s)++; / Move past the '{' */
	240	while (isXDIGIT(*s)) { / Position beyond the legal digits */
	241	(*s)++;
	242	}
	243	/* XXX The corresponding message above for \o is just '\\o{'; other
	244	* messages for other constructs include the '}', so are inconsistent.
	245	*/
	246	*error_msg = "Missing right brace on \\x{}";
	247	return FALSE;
	248	}
	249
	250	(s)++; / Point to expected first digit (could be first byte of utf8
	251	sequence if not a digit) */
	252	numbers_len = e - *s;
	253	if (numbers_len == 0) {
	254	if (strict) {
	255	(s)++; / Move past the } */
	256	*error_msg = "Number with no digits";
	257	return FALSE;
	258	}
	259	*s = e + 1;
	260	*uv = 0;
	261	return TRUE;
	262	}
	263
	264	flags \|= PERL_SCAN_ALLOW_UNDERSCORES;
	265	if (silence_non_portable) {
	266	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
	267	}
	268
	269	uv = grok_hex(s, &numbers_len, &flags, NULL);
	270	/* Note that if has non-hex, will ignore everything starting with that up
	271	* to the '}' */
	272
	273	if (strict && numbers_len != (STRLEN) (e - *s)) {
	274	*s += numbers_len;
	275	s += (UTF) ? UTF8SKIP(s) : 1;
	276	*error_msg = "Non-hex character";
	277	return FALSE;
	278	}
	279
	280	/* Return past the '}' */
	281	*s = e + 1;
	282
	283	return TRUE;
	284	}
	285
	286	/*
	287	* ex: set ts=8 sts=4 sw=4 et:
	288	*/