This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
toke.c:scan_heredoc: Remove incorrect part of comment
[perl5.git] / dquote_static.c
CommitLineData
04e98a4d
AD
1/* dquote_static.c
2 *
3efe3cb8 3 * This file contains static functions that are related to
04e98a4d
AD
4 * parsing double-quotish expressions, but are used in more than
5 * one file.
6 *
7 * It is currently #included by regcomp.c and toke.c.
8*/
9
881ffab6
KW
10#define PERL_IN_DQUOTE_STATIC_C
11#include "proto.h"
12#include "embed.h"
13
04e98a4d
AD
14/*
15 - regcurly - a little FSA that accepts {\d+,?\d*}
16 Pulled from regcomp.c.
17 */
04e98a4d 18PERL_STATIC_INLINE I32
93df2d9c 19S_regcurly(pTHX_ register const char *s)
04e98a4d 20{
881ffab6 21 PERL_ARGS_ASSERT_REGCURLY;
04e98a4d
AD
22
23 if (*s++ != '{')
24 return FALSE;
25 if (!isDIGIT(*s))
26 return FALSE;
27 while (isDIGIT(*s))
28 s++;
2ec31dd9 29 if (*s == ',') {
04e98a4d 30 s++;
2ec31dd9
KW
31 while (isDIGIT(*s))
32 s++;
33 }
04e98a4d
AD
34 if (*s != '}')
35 return FALSE;
36 return TRUE;
37}
db30362b 38
68b355dd
KW
39/* XXX Add documentation after final interface and behavior is decided */
40/* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
41 U8 source = *current;
68b355dd
KW
42*/
43
44STATIC char
17a3df4c 45S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
68b355dd
KW
46{
47
48 U8 result;
49
17a3df4c
KW
50 if (utf8) {
51 /* Trying to deprecate non-ASCII usages. This construct has never
52 * worked for a utf8 variant. So, even though are accepting non-ASCII
53 * Latin1 in 5.14, no need to make them work under utf8 */
54 if (! isASCII(source)) {
55 Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII");
56 }
68b355dd
KW
57 }
58
59 result = toCTRL(source);
17a3df4c
KW
60 if (! isASCII(source)) {
61 Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
62 "Character following \"\\c\" must be ASCII");
63 }
64 else if (! isCNTRL(result) && output_warning) {
68b355dd 65 if (source == '{') {
17a3df4c
KW
66 Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
67 "\"\\c{\" is deprecated and is more clearly written as \";\"");
68b355dd 68 }
17a3df4c 69 else {
68b355dd
KW
70 U8 clearer[3];
71 U8 i = 0;
72 if (! isALNUM(result)) {
73 clearer[i++] = '\\';
74 }
75 clearer[i++] = result;
76 clearer[i++] = '\0';
77
17a3df4c
KW
78 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
79 "\"\\c%c\" is more clearly written simply as \"%s\"",
68b355dd
KW
80 source,
81 clearer);
82 }
83 }
84
85 return result;
86}
87
db30362b
KW
88STATIC bool
89S_grok_bslash_o(pTHX_ const char *s,
90 UV *uv,
91 STRLEN *len,
92 const char** error_msg,
93 const bool output_warning)
94{
95
96/* Documentation to be supplied when interface nailed down finally
97 * This returns FALSE if there is an error which the caller need not recover
98 * from; , otherwise TRUE. In either case the caller should look at *len
99 * On input:
100 * s points to a string that begins with 'o', and the previous character
101 * was a backslash.
102 * uv points to a UV that will hold the output value, valid only if the
103 * return from the function is TRUE
104 * len on success will point to the next character in the string past the
105 * end of this construct.
106 * on failure, it will point to the failure
107 * error_msg is a pointer that will be set to an internal buffer giving an
108 * error message upon failure (the return is FALSE). Untouched if
109 * function succeeds
110 * output_warning says whether to output any warning messages, or suppress
111 * them
112 */
113 const char* e;
114 STRLEN numbers_len;
115 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
116 | PERL_SCAN_DISALLOW_PREFIX
117 /* XXX Until the message is improved in grok_oct, handle errors
118 * ourselves */
119 | PERL_SCAN_SILENT_ILLDIGIT;
120
121 PERL_ARGS_ASSERT_GROK_BSLASH_O;
122
123
124 assert(*s == 'o');
125 s++;
126
127 if (*s != '{') {
128 *len = 1; /* Move past the o */
129 *error_msg = "Missing braces on \\o{}";
130 return FALSE;
131 }
132
133 e = strchr(s, '}');
134 if (!e) {
135 *len = 2; /* Move past the o{ */
136 *error_msg = "Missing right brace on \\o{";
137 return FALSE;
138 }
139
140 /* Return past the '}' no matter what is inside the braces */
7d2838d8 141 *len = e - s + 2; /* 2 = 1 for the 'o' + 1 for the '}' */
db30362b
KW
142
143 s++; /* Point to first digit */
144
145 numbers_len = e - s;
146 if (numbers_len == 0) {
147 *error_msg = "Number with no digits";
148 return FALSE;
149 }
150
8b8acc9e 151 *uv = grok_oct(s, &numbers_len, &flags, NULL);
db30362b
KW
152 /* Note that if has non-octal, will ignore everything starting with that up
153 * to the '}' */
154
155 if (output_warning && numbers_len != (STRLEN) (e - s)) {
156 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
157 /* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
158 "Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
159 *(s + numbers_len),
160 (int) numbers_len,
161 s);
162 }
163
164 return TRUE;
165}
166
a0481293
KW
167PERL_STATIC_INLINE bool
168S_grok_bslash_x(pTHX_ const char *s,
169 UV *uv,
170 STRLEN *len,
171 const char** error_msg,
172 const bool output_warning)
173{
174
175/* Documentation to be supplied when interface nailed down finally
176 * This returns FALSE if there is an error which the caller need not recover
177 * from; , otherwise TRUE. In either case the caller should look at *len
178 * On input:
179 * s points to a string that begins with 'x', and the previous character
180 * was a backslash.
181 * uv points to a UV that will hold the output value, valid only if the
182 * return from the function is TRUE
183 * len on success will point to the next character in the string past the
184 * end of this construct.
185 * on failure, it will point to the failure
186 * error_msg is a pointer that will be set to an internal buffer giving an
187 * error message upon failure (the return is FALSE). Untouched if
188 * function succeeds
189 * output_warning says whether to output any warning messages, or suppress
190 * them
191 */
192 const char* e;
193 STRLEN numbers_len;
194 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
195 | PERL_SCAN_DISALLOW_PREFIX;
196
197 PERL_ARGS_ASSERT_GROK_BSLASH_X;
198
4f8dbb2d 199 PERL_UNUSED_ARG(output_warning);
a0481293
KW
200
201 assert(*s == 'x');
202 s++;
203
204 if (*s != '{') {
205 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
206 *len = 2;
207 *uv = grok_hex(s, len, &flags, NULL);
208 (*len)++;
209 return TRUE;
210 }
211
212 e = strchr(s, '}');
213 if (!e) {
214 *len = 2; /* Move past the 'x{' */
215 /* XXX The corresponding message above for \o is just '\\o{'; other
216 * messages for other constructs include the '}', so are inconsistent.
217 */
218 *error_msg = "Missing right brace on \\x{}";
219 return FALSE;
220 }
221
222 /* Return past the '}' no matter what is inside the braces */
223 *len = e - s + 2; /* 2 = 1 for the 'x' + 1 for the '}' */
224
225 s++; /* Point to first digit */
226
227 numbers_len = e - s;
228 *uv = grok_hex(s, &numbers_len, &flags, NULL);
229 /* Note that if has non-hex, will ignore everything starting with that up
230 * to the '}' */
231
232 return TRUE;
233}
234
04e98a4d
AD
235/*
236 * Local variables:
237 * c-indentation-style: bsd
238 * c-basic-offset: 4
14d04a33 239 * indent-tabs-mode: nil
04e98a4d
AD
240 * End:
241 *
14d04a33 242 * ex: set ts=8 sts=4 sw=4 et:
04e98a4d 243 */