Commit | Line | Data |
---|---|---|
f40a6c71 SC |
1 | =head1 NAME |
2 | ||
3 | perlclib - Internal replacements for standard C library functions | |
4 | ||
5 | =head1 DESCRIPTION | |
6 | ||
7 | One thing Perl porters should note is that F<perl> doesn't tend to use that | |
8 | much of the C standard library internally; you'll see very little use of, | |
9 | for example, the F<ctype.h> functions in there. This is because Perl | |
10 | tends to reimplement or abstract standard library functions, so that we | |
11 | know exactly how they're going to operate. | |
12 | ||
13 | This is a reference card for people who are familiar with the C library | |
56d22bd2 | 14 | and who want to do things the Perl way; to tell them which functions |
f40a6c71 SC |
15 | they ought to use instead of the more normal C functions. |
16 | ||
17 | =head2 Conventions | |
18 | ||
19 | In the following tables: | |
20 | ||
21 | =over 3 | |
22 | ||
23 | =item C<t> | |
24 | ||
25 | is a type. | |
26 | ||
27 | =item C<p> | |
28 | ||
29 | is a pointer. | |
30 | ||
31 | =item C<n> | |
32 | ||
33 | is a number. | |
34 | ||
35 | =item C<s> | |
36 | ||
37 | is a string. | |
38 | ||
39 | =back | |
40 | ||
41 | C<sv>, C<av>, C<hv>, etc. represent variables of their respective types. | |
42 | ||
43 | =head2 File Operations | |
44 | ||
45 | Instead of the F<stdio.h> functions, you should use the Perl abstraction | |
46 | layer. Instead of C<FILE*> types, you need to be handling C<PerlIO*> | |
56d22bd2 | 47 | types. Don't forget that with the new PerlIO layered I/O abstraction |
f40a6c71 SC |
48 | C<FILE*> types may not even be available. See also the C<perlapio> |
49 | documentation for more information about the following functions: | |
50 | ||
5b282140 | 51 | Instead Of: Use: |
56d22bd2 | 52 | |
5b282140 KW |
53 | stdin PerlIO_stdin() |
54 | stdout PerlIO_stdout() | |
55 | stderr PerlIO_stderr() | |
f40a6c71 | 56 | |
5b282140 KW |
57 | fopen(fn, mode) PerlIO_open(fn, mode) |
58 | freopen(fn, mode, stream) PerlIO_reopen(fn, mode, perlio) (Dep- | |
59 | recated) | |
60 | fflush(stream) PerlIO_flush(perlio) | |
61 | fclose(stream) PerlIO_close(perlio) | |
f40a6c71 SC |
62 | |
63 | =head2 File Input and Output | |
64 | ||
5b282140 | 65 | Instead Of: Use: |
f40a6c71 | 66 | |
5b282140 | 67 | fprintf(stream, fmt, ...) PerlIO_printf(perlio, fmt, ...) |
f40a6c71 | 68 | |
5b282140 KW |
69 | [f]getc(stream) PerlIO_getc(perlio) |
70 | [f]putc(stream, n) PerlIO_putc(perlio, n) | |
71 | ungetc(n, stream) PerlIO_ungetc(perlio, n) | |
f40a6c71 SC |
72 | |
73 | Note that the PerlIO equivalents of C<fread> and C<fwrite> are slightly | |
74 | different from their C library counterparts: | |
75 | ||
5b282140 KW |
76 | fread(p, size, n, stream) PerlIO_read(perlio, buf, numbytes) |
77 | fwrite(p, size, n, stream) PerlIO_write(perlio, buf, numbytes) | |
f40a6c71 | 78 | |
5b282140 | 79 | fputs(s, stream) PerlIO_puts(perlio, s) |
f40a6c71 SC |
80 | |
81 | There is no equivalent to C<fgets>; one should use C<sv_gets> instead: | |
82 | ||
5b282140 | 83 | fgets(s, n, stream) sv_gets(sv, perlio, append) |
f40a6c71 SC |
84 | |
85 | =head2 File Positioning | |
86 | ||
5b282140 | 87 | Instead Of: Use: |
f40a6c71 | 88 | |
5b282140 KW |
89 | feof(stream) PerlIO_eof(perlio) |
90 | fseek(stream, n, whence) PerlIO_seek(perlio, n, whence) | |
91 | rewind(stream) PerlIO_rewind(perlio) | |
f40a6c71 | 92 | |
5b282140 KW |
93 | fgetpos(stream, p) PerlIO_getpos(perlio, sv) |
94 | fsetpos(stream, p) PerlIO_setpos(perlio, sv) | |
f40a6c71 | 95 | |
5b282140 KW |
96 | ferror(stream) PerlIO_error(perlio) |
97 | clearerr(stream) PerlIO_clearerr(perlio) | |
f40a6c71 SC |
98 | |
99 | =head2 Memory Management and String Handling | |
100 | ||
5b282140 | 101 | Instead Of: Use: |
702eb6d0 | 102 | |
5b282140 KW |
103 | t* p = malloc(n) Newx(p, n, t) |
104 | t* p = calloc(n, s) Newxz(p, n, t) | |
105 | p = realloc(p, n) Renew(p, n, t) | |
106 | memcpy(dst, src, n) Copy(src, dst, n, t) | |
107 | memmove(dst, src, n) Move(src, dst, n, t) | |
108 | memcpy(dst, src, sizeof(t)) StructCopy(src, dst, t) | |
109 | memset(dst, 0, n * sizeof(t)) Zero(dst, n, t) | |
110 | memzero(dst, 0) Zero(dst, n, char) | |
111 | free(p) Safefree(p) | |
f40a6c71 | 112 | |
5b282140 KW |
113 | strdup(p) savepv(p) |
114 | strndup(p, n) savepvn(p, n) (Hey, strndup doesn't | |
115 | exist!) | |
f40a6c71 | 116 | |
5b282140 KW |
117 | strstr(big, little) instr(big, little) |
118 | strcmp(s1, s2) strLE(s1, s2) / strEQ(s1, s2) | |
119 | / strGT(s1,s2) | |
120 | strncmp(s1, s2, n) strnNE(s1, s2, n) / strnEQ(s1, s2, n) | |
f40a6c71 | 121 | |
0e42d607 DD |
122 | memcmp(p1, p2, n) memNE(p1, p2, n) |
123 | !memcmp(p1, p2, n) memEQ(p1, p2, n) | |
124 | ||
f40a6c71 SC |
125 | Notice the different order of arguments to C<Copy> and C<Move> than used |
126 | in C<memcpy> and C<memmove>. | |
127 | ||
128 | Most of the time, though, you'll want to be dealing with SVs internally | |
129 | instead of raw C<char *> strings: | |
130 | ||
5b282140 KW |
131 | strlen(s) sv_len(sv) |
132 | strcpy(dt, src) sv_setpv(sv, s) | |
133 | strncpy(dt, src, n) sv_setpvn(sv, s, n) | |
134 | strcat(dt, src) sv_catpv(sv, s) | |
135 | strncat(dt, src) sv_catpvn(sv, s) | |
136 | sprintf(s, fmt, ...) sv_setpvf(sv, fmt, ...) | |
f40a6c71 | 137 | |
328bf373 | 138 | Note also the existence of C<sv_catpvf> and C<sv_vcatpvfn>, combining |
f40a6c71 SC |
139 | concatenation with formatting. |
140 | ||
9f653bb5 | 141 | Sometimes instead of zeroing the allocated heap by using Newxz() you |
9965345d JH |
142 | should consider "poisoning" the data. This means writing a bit |
143 | pattern into it that should be illegal as pointers (and floating point | |
144 | numbers), and also hopefully surprising enough as integers, so that | |
145 | any code attempting to use the data without forethought will break | |
146 | sooner rather than later. Poisoning can be done using the Poison() | |
ea787f3b | 147 | macros, which have similar arguments to Zero(): |
9965345d | 148 | |
5b282140 KW |
149 | PoisonWith(dst, n, t, b) scribble memory with byte b |
150 | PoisonNew(dst, n, t) equal to PoisonWith(dst, n, t, 0xAB) | |
151 | PoisonFree(dst, n, t) equal to PoisonWith(dst, n, t, 0xEF) | |
152 | Poison(dst, n, t) equal to PoisonFree(dst, n, t) | |
9965345d | 153 | |
f40a6c71 SC |
154 | =head2 Character Class Tests |
155 | ||
fa2b1084 KW |
156 | There are several types of character class tests that Perl implements. |
157 | The only ones described here are those that directly correspond to C | |
158 | library functions that operate on 8-bit characters, but there are | |
159 | equivalents that operate on wide characters, and UTF-8 encoded strings. | |
dcccc8ff | 160 | All are more fully described in L<perlapi/Character classification> and |
fa2b1084 KW |
161 | L<perlapi/Character case changing>. |
162 | ||
163 | The C library routines listed in the table below return values based on | |
164 | the current locale. Use the entries in the final column for that | |
165 | functionality. The other two columns always assume a POSIX (or C) | |
166 | locale. The entries in the ASCII column are only meaningful for ASCII | |
167 | inputs, returning FALSE for anything else. Use these only when you | |
168 | B<know> that is what you want. The entries in the Latin1 column assume | |
169 | that the non-ASCII 8-bit characters are as Unicode defines, them, the | |
170 | same as ISO-8859-1, often called Latin 1. | |
171 | ||
172 | Instead Of: Use for ASCII: Use for Latin1: Use for locale: | |
173 | ||
174 | isalnum(c) isALPHANUMERIC(c) isALPHANUMERIC_L1(c) isALPHANUMERIC_LC(c) | |
175 | isalpha(c) isALPHA(c) isALPHA_L1(c) isALPHA_LC(u ) | |
176 | isascii(c) isASCII(c) isASCII_LC(c) | |
177 | isblank(c) isBLANK(c) isBLANK_L1(c) isBLANK_LC(c) | |
178 | iscntrl(c) isCNTRL(c) isCNTRL_L1(c) isCNTRL_LC(c) | |
179 | isdigit(c) isDIGIT(c) isDIGIT_L1(c) isDIGIT_LC(c) | |
180 | isgraph(c) isGRAPH(c) isGRAPH_L1(c) isGRAPH_LC(c) | |
181 | islower(c) isLOWER(c) isLOWER_L1(c) isLOWER_LC(c) | |
182 | isprint(c) isPRINT(c) isPRINT_L1(c) isPRINT_LC(c) | |
183 | ispunct(c) isPUNCT(c) isPUNCT_L1(c) isPUNCT_LC(c) | |
184 | isspace(c) isSPACE(c) isSPACE_L1(c) isSPACE_LC(c) | |
185 | isupper(c) isUPPER(c) isUPPER_L1(c) isUPPER_LC(c) | |
186 | isxdigit(c) isXDIGIT(c) isXDIGIT_L1(c) isXDIGIT_LC(c) | |
187 | ||
188 | tolower(c) toLOWER(c) toLOWER_L1(c) toLOWER_LC(c) | |
189 | toupper(c) toUPPER(c) toUPPER_LC(c) | |
190 | ||
191 | To emphasize that you are operating only on ASCII characters, you can | |
192 | append C<_A> to each of the macros in the ASCII column: C<isALPHA_A>, | |
193 | C<isDIGIT_A>, and so on. | |
194 | ||
195 | (There is no entry in the Latin1 column for C<isascii> even though there | |
196 | is an C<isASCII_L1>, which is identical to C<isASCII>; the | |
197 | latter name is clearer. There is no entry in the Latin1 column for | |
198 | C<toupper> because the result can be non-Latin1. You have to use | |
199 | C<toUPPER_uni>, as described in L<perlapi/Character case changing>.) | |
f40a6c71 SC |
200 | |
201 | =head2 F<stdlib.h> functions | |
202 | ||
5b282140 | 203 | Instead Of: Use: |
f40a6c71 | 204 | |
5b282140 | 205 | atof(s) Atof(s) |
22ff3130 HS |
206 | atoi(s) grok_atoUV(s, &uv, &e) |
207 | atol(s) grok_atoUV(s, &uv, &e) | |
6928bedc | 208 | strtod(s, &p) my_atof3(s, &nv, &p) is the closest we have |
22ff3130 HS |
209 | strtol(s, &p, n) grok_atoUV(s, &uv, &e) |
210 | strtoul(s, &p, n) grok_atoUV(s, &uv, &e) | |
211 | ||
212 | Typical use is to do range checks on C<uv> before casting: | |
213 | ||
5d4a52b5 KW |
214 | int i; UV uv; |
215 | char* end_ptr = input_end; | |
22ff3130 HS |
216 | if (grok_atoUV(input, &uv, &end_ptr) |
217 | && uv <= INT_MAX) | |
218 | i = (int)uv; | |
219 | ... /* continue parsing from end_ptr */ | |
220 | } else { | |
221 | ... /* parse error: not a decimal integer in range 0 .. MAX_IV */ | |
222 | } | |
f40a6c71 | 223 | |
53305cf1 | 224 | Notice also the C<grok_bin>, C<grok_hex>, and C<grok_oct> functions in |
2826e23d | 225 | F<numeric.c> for converting strings representing numbers in the respective |
22ff3130 | 226 | bases into C<NV>s. Note that grok_atoUV() doesn't handle negative inputs, |
e05c5d08 | 227 | or leading whitespace (being purposefully strict). |
338aa8b0 JH |
228 | |
229 | Note that strtol() and strtoul() may be disguised as Strtol(), Strtoul(), | |
230 | Atol(), Atoul(). Avoid those, too. | |
f40a6c71 SC |
231 | |
232 | In theory C<Strtol> and C<Strtoul> may not be defined if the machine perl is | |
233 | built on doesn't actually have strtol and strtoul. But as those 2 | |
234 | functions are part of the 1989 ANSI C spec we suspect you'll find them | |
235 | everywhere by now. | |
236 | ||
5b282140 KW |
237 | int rand() double Drand01() |
238 | srand(n) { seedDrand01((Rand_seed_t)n); | |
239 | PL_srand_called = TRUE; } | |
56d22bd2 | 240 | |
5b282140 | 241 | exit(n) my_exit(n) |
6c1246d3 | 242 | system(s) Don't. Look at pp_system or use my_popen. |
f40a6c71 | 243 | |
5b282140 | 244 | getenv(s) PerlEnv_getenv(s) |
7ad03f50 | 245 | setenv(s, val) my_setenv(s, val) |
f40a6c71 SC |
246 | |
247 | =head2 Miscellaneous functions | |
248 | ||
249 | You should not even B<want> to use F<setjmp.h> functions, but if you | |
250 | think you do, use the C<JMPENV> stack in F<scope.h> instead. | |
251 | ||
252 | For C<signal>/C<sigaction>, use C<rsignal(signo, handler)>. | |
253 | ||
254 | =head1 SEE ALSO | |
255 | ||
d974f73b | 256 | L<perlapi>, L<perlapio>, L<perlguts> |
f40a6c71 | 257 |