pod/perlapio.pod

   1 =head1 NAME
   2
   3 perlapio - perl's IO abstraction interface.
   4
   5 =head1 SYNOPSIS
   6
   7     #define PERLIO_NOT_STDIO 0    /* For co-existence with stdio only */
   8     #include <perlio.h>           /* Usually via #include <perl.h> */
   9
  10     PerlIO *PerlIO_stdin(void);
  11     PerlIO *PerlIO_stdout(void);
  12     PerlIO *PerlIO_stderr(void);
  13
  14     PerlIO *PerlIO_open(const char *path,const char *mode);
  15     PerlIO *PerlIO_fdopen(int fd, const char *mode);
  16     PerlIO *PerlIO_reopen(const char *path, const char *mode, PerlIO *old);  /* deprecated */
  17     int     PerlIO_close(PerlIO *f);
  18
  19     int     PerlIO_stdoutf(const char *fmt,...)
  20     int     PerlIO_puts(PerlIO *f,const char *string);
  21     int     PerlIO_putc(PerlIO *f,int ch);
  22     int     PerlIO_write(PerlIO *f,const void *buf,size_t numbytes);
  23     int     PerlIO_printf(PerlIO *f, const char *fmt,...);
  24     int     PerlIO_vprintf(PerlIO *f, const char *fmt, va_list args);
  25     int     PerlIO_flush(PerlIO *f);
  26
  27     int     PerlIO_eof(PerlIO *f);
  28     int     PerlIO_error(PerlIO *f);
  29     void    PerlIO_clearerr(PerlIO *f);
  30
  31     int     PerlIO_getc(PerlIO *d);
  32     int     PerlIO_ungetc(PerlIO *f,int ch);
  33     int     PerlIO_read(PerlIO *f, void *buf, size_t numbytes);
  34
  35     int     PerlIO_fileno(PerlIO *f);
  36
  37     void    PerlIO_setlinebuf(PerlIO *f);
  38
  39     Off_t   PerlIO_tell(PerlIO *f);
  40     int     PerlIO_seek(PerlIO *f, Off_t offset, int whence);
  41     void    PerlIO_rewind(PerlIO *f);
  42
  43     int     PerlIO_getpos(PerlIO *f, SV *save);        /* prototype changed */
  44     int     PerlIO_setpos(PerlIO *f, SV *saved);       /* prototype changed */
  45
  46     int     PerlIO_fast_gets(PerlIO *f);
  47     int     PerlIO_has_cntptr(PerlIO *f);
  48     int     PerlIO_get_cnt(PerlIO *f);
  49     char   *PerlIO_get_ptr(PerlIO *f);
  50     void    PerlIO_set_ptrcnt(PerlIO *f, char *ptr, int count);
  51
  52     int     PerlIO_canset_cnt(PerlIO *f);              /* deprecated */
  53     void    PerlIO_set_cnt(PerlIO *f, int count);      /* deprecated */
  54
  55     int     PerlIO_has_base(PerlIO *f);
  56     char   *PerlIO_get_base(PerlIO *f);
  57     int     PerlIO_get_bufsiz(PerlIO *f);
  58
  59     PerlIO *PerlIO_importFILE(FILE *stdio, const char *mode);
  60     FILE   *PerlIO_exportFILE(PerlIO *f, int flags);
  61     FILE   *PerlIO_findFILE(PerlIO *f);
  62     void    PerlIO_releaseFILE(PerlIO *f,FILE *stdio);
  63
  64     int     PerlIO_apply_layers(PerlIO *f, const char *mode, const char *layers);
  65     int     PerlIO_binmode(PerlIO *f, int ptype, int imode, const char *layers);
  66     void    PerlIO_debug(const char *fmt,...)
  67
  68 =head1 DESCRIPTION
  69
  70 Perl's source code, and extensions that want maximum portability,
  71 should use the above functions instead of those defined in ANSI C's
  72 I<stdio.h>.  The perl headers (in particular "perlio.h") will
  73 C<#define> them to the I/O mechanism selected at Configure time.
  74
  75 The functions are modeled on those in I<stdio.h>, but parameter order
  76 has been "tidied up a little".
  77
  78 C<PerlIO *> takes the place of FILE *. Like FILE * it should be
  79 treated as opaque (it is probably safe to assume it is a pointer to
  80 something).
  81
  82 There are currently three implementations:
  83
  84 =over 4
  85
  86 =item 1. USE_STDIO
  87
  88 All above are #define'd to stdio functions or are trivial wrapper
  89 functions which call stdio. In this case I<only> PerlIO * is a FILE *.
  90 This has been the default implementation since the abstraction was
  91 introduced in perl5.003_02.
  92
  93 =item 2. USE_SFIO
  94
  95 A "legacy" implementation in terms of the "sfio" library. Used for
  96 some specialist applications on Unix machines ("sfio" is not widely
  97 ported away from Unix).  Most of above are #define'd to the sfio
  98 functions. PerlIO * is in this case Sfio_t *.
  99
 100 =item 3. USE_PERLIO
 101
 102 Introduced just after perl5.7.0, this is a re-implementation of the
 103 above abstraction which allows perl more control over how IO is done
 104 as it decouples IO from the way the operating system and C library
 105 choose to do things. For USE_PERLIO PerlIO * has an extra layer of
 106 indirection - it is a pointer-to-a-pointer.  This allows the PerlIO *
 107 to remain with a known value while swapping the implementation around
 108 underneath I<at run time>. In this case all the above are true (but
 109 very simple) functions which call the underlying implementation.
 110
 111 This is the only implementation for which C<PerlIO_apply_layers()>
 112 does anything "interesting".
 113
 114 The USE_PERLIO implementation is described in L<perliol>.
 115
 116 =back
 117
 118 Because "perlio.h" is a thin layer (for efficiency) the semantics of
 119 these functions are somewhat dependent on the underlying implementation.
 120 Where these variations are understood they are noted below.
 121
 122 Unless otherwise noted, functions return 0 on success, or a negative
 123 value (usually C<EOF> which is usually -1) and set C<errno> on error.
 124
 125 =over 4
 126
 127 =item B<PerlIO_stdin()>, B<PerlIO_stdout()>, B<PerlIO_stderr()>
 128
 129 Use these rather than C<stdin>, C<stdout>, C<stderr>. They are written
 130 to look like "function calls" rather than variables because this makes
 131 it easier to I<make them> function calls if platform cannot export data
 132 to loaded modules, or if (say) different "threads" might have different
 133 values.
 134
 135 =item B<PerlIO_open(path, mode)>, B<PerlIO_fdopen(fd,mode)>
 136
 137 These correspond to fopen()/fdopen() and the arguments are the same.
 138 Return C<NULL> and set C<errno> if there is an error.  There may be an
 139 implementation limit on the number of open handles, which may be lower
 140 than the limit on the number of open files - C<errno> may not be set
 141 when C<NULL> is returned if this limit is exceeded.
 142
 143 =item B<PerlIO_reopen(path,mode,f)>
 144
 145 While this currently exists in all three implementations perl itself
 146 does not use it. I<As perl does not use it, it is not well tested.>
 147
 148 Perl prefers to C<dup> the new low-level descriptor to the descriptor
 149 used by the existing PerlIO. This may become the behaviour of this
 150 function in the future.
 151
 152 =item B<PerlIO_printf(f,fmt,...)>, B<PerlIO_vprintf(f,fmt,a)>
 153
 154 These are fprintf()/vfprintf() equivalents.
 155
 156 =item B<PerlIO_stdoutf(fmt,...)>
 157
 158 This is printf() equivalent. printf is #defined to this function,
 159 so it is (currently) legal to use C<printf(fmt,...)> in perl sources.
 160
 161 =item B<PerlIO_read(f,buf,count)>, B<PerlIO_write(f,buf,count)>
 162
 163 These correspond to fread() and fwrite(). Note that arguments are
 164 different, there is only one "count" and order has "file"
 165 first. Returns a byte count if successful (which may be zero or positive), returns
 166 negative value and sets C<errno> on error.  Depending on
 167 implementation C<errno> may be C<EINTR> if operation was interrupted
 168 by a signal.
 169
 170 =item B<PerlIO_close(f)>
 171
 172 Depending on implementation C<errno> may be C<EINTR> if operation was
 173 interrupted by a signal.
 174
 175 =item B<PerlIO_puts(f,s)>, B<PerlIO_putc(f,c)>
 176
 177 These correspond to fputs() and fputc().
 178 Note that arguments have been revised to have "file" first.
 179
 180 =item B<PerlIO_ungetc(f,c)>
 181
 182 This corresponds to ungetc().  Note that arguments have been revised
 183 to have "file" first.  Arranges that next read operation will return
 184 the byte B<c>.  Despite the implied "character" in the name only
 185 values in the range 0..0xFF are defined. Returns the byte B<c> on
 186 success or -1 (C<EOF>) on error.  The number of bytes that can be
 187 "pushed back" may vary, only 1 character is certain, and then only if
 188 it is the last character that was read from the handle.
 189
 190 =item B<PerlIO_getc(f)>
 191
 192 This corresponds to getc().
 193 Despite the c in the name only byte range 0..0xFF is supported.
 194 Returns the character read or -1 (C<EOF>) on error.
 195
 196 =item B<PerlIO_eof(f)>
 197
 198 This corresponds to feof().  Returns a true/false indication of
 199 whether the handle is at end of file.  For terminal devices this may
 200 or may not be "sticky" depending on the implementation.  The flag is
 201 cleared by PerlIO_seek(), or PerlIO_rewind().
 202
 203 =item B<PerlIO_error(f)>
 204
 205 This corresponds to ferror().  Returns a true/false indication of
 206 whether there has been an IO error on the handle.
 207
 208 =item B<PerlIO_fileno(f)>
 209
 210 This corresponds to fileno(), note that on some platforms, the meaning
 211 of "fileno" may not match Unix. Returns -1 if the handle has no open
 212 descriptor associated with it.
 213
 214 =item B<PerlIO_clearerr(f)>
 215
 216 This corresponds to clearerr(), i.e., clears 'error' and (usually)
 217 'eof' flags for the "stream". Does not return a value.
 218
 219 =item B<PerlIO_flush(f)>
 220
 221 This corresponds to fflush().  Sends any buffered write data to the
 222 underlying file.  If called with C<NULL> this may flush all open
 223 streams (or core dump with some USE_STDIO implementattions).
 224 Calling on a handle open for read only, or on  which last operation was a read of some kind
 225 may lead to undefined behaviour on some USE_STDIO implementations.
 226 The USE_PERLIO (layers) implementation tries to behave better: it flushes all open streams
 227 when passed C<NULL>, and attempts to retain data on read streams either in the buffer
 228 or by seeking the handle to the current logical position.
 229
 230 =item B<PerlIO_seek(f,offset,whence)>
 231
 232 This corresponds to fseek().  Sends buffered write data to the
 233 underlying file, or discards any buffered read data, then positions
 234 the file desciptor as specified by B<offset> and B<whence> (sic).
 235 This is the correct thing to do when switching between read and write
 236 on the same handle (see issues with PerlIO_flush() above).  Offset is
 237 of type C<Off_t> which is a perl Configure value which may not be same
 238 as stdio's C<off_t>.
 239
 240 =item B<PerlIO_tell(f)>
 241
 242 This corresponds to ftell().  Returns the current file position, or
 243 (Off_t) -1 on error.  May just return value system "knows" without
 244 making a system call or checking the underlying file descriptor (so
 245 use on shared file descriptors is not safe without a
 246 PerlIO_seek()). Return value is of type C<Off_t> which is a perl
 247 Configure value which may not be same as stdio's C<off_t>.
 248
 249 =item B<PerlIO_getpos(f,p)>, B<PerlIO_setpos(f,p)>
 250
 251 These correspond (loosely) to fgetpos() and fsetpos(). Rather than
 252 stdio's Fpos_t they expect a "Perl Scalar Value" to be passed. What is
 253 stored there should be considered opaque. The layout of the data may
 254 vary from handle to handle.  When not using stdio or if platform does
 255 not have the stdio calls then they are implemented in terms of
 256 PerlIO_tell() and PerlIO_seek().
 257
 258 =item B<PerlIO_rewind(f)>
 259
 260 This corresponds to rewind(). It is usually defined as being
 261
 262     PerlIO_seek(f,(Off_t)0L, SEEK_SET);
 263     PerlIO_clearerr(f);
 264
 265 =item B<PerlIO_tmpfile()>
 266
 267 This corresponds to tmpfile(), i.e., returns an anonymous PerlIO or
 268 NULL on error.  The system will attempt to automatically delete the
 269 file when closed.  On Unix the file is usually C<unlink>-ed just after
 270 it is created so it does not matter how it gets closed. On other
 271 systems the file may only be deleted if closed via PerlIO_close()
 272 and/or the program exits via C<exit>.  Depending on the implementation
 273 there may be "race conditions" which allow other processes access to
 274 the file, though in general it will be safer in this regard than
 275 ad. hoc. schemes.
 276
 277 =item B<PerlIO_setlinebuf(f)>
 278
 279 This corresponds to setlinebuf().  Does not return a value. What
 280 constitutes a "line" is implementation dependent but usually means
 281 that writing "\n" flushes the buffer.  What happens with things like
 282 "this\nthat" is uncertain.  (Perl core uses it I<only> when "dumping";
 283 it has nothing to do with $| auto-flush.)
 284
 285 =back
 286
 287 =head2 Co-existence with stdio
 288
 289 There is outline support for co-existence of PerlIO with stdio.
 290 Obviously if PerlIO is implemented in terms of stdio there is no
 291 problem. However in other cases then mechanisms must exist to create a
 292 FILE * which can be passed to library code which is going to use stdio
 293 calls.
 294
 295 The first step is to add this line:
 296
 297    #define PERLIO_NOT_STDIO 0
 298
 299 I<before> including any perl header files. (This will probably become
 300 the default at some point).  That prevents "perlio.h" from attempting
 301 to #define stdio functions onto PerlIO functions.
 302
 303 XS code is probably better using "typemap" if it expects FILE *
 304 arguments.  The standard typemap will be adjusted to comprehend any
 305 changes in this area.
 306
 307 =over 4
 308
 309 =item B<PerlIO_importFILE(f,mode)>
 310
 311 Used to get a PerlIO * from a FILE *.
 312
 313 The mode argument should be a string as would be passed to fopen/PerlIO_open.
 314 If it is NULL then - for legacy support - the code will (depending upon
 315 the platform and the implementation) either attempt to empirically determine the mode in
 316 which I<f> is open, or use "r+" to indicate a read/write stream.
 317
 318 Once called the FILE * should I<ONLY> be closed by calling
 319 C<PerlIO_close()> on the returned PerlIO *.
 320
 321 The PerlIO is set to textmode. Use PerlIO_binmode if this is
 322 not the desired mode.
 323
 324 This is B<not> the reverse of PerlIO_exportFILE().
 325
 326 =item B<PerlIO_exportFILE(f,mode)>
 327
 328 Given a PerlIO * create a 'native' FILE * suitable for passing to code
 329 expecting to be compiled and linked with ANSI C I<stdio.h>.
 330 The mode argument should be a string as would be passed to fopen/PerlIO_open.
 331 If it is NULL then - for legacy support - the FILE * is opened
 332 in same mode as the PerlIO *.
 333
 334 The fact that such a FILE * has been 'exported' is recorded, (normally by
 335 pushing a new :stdio "layer" onto the PerlIO *), which may affect future
 336 PerlIO operations on the original PerlIO *.
 337 You should not call C<fclose()> on the file unless you call
 338 C<PerlIO_releaseFILE()> to disassociate it from the PerlIO *.
 339 (Do not use PerlIO_importFILE() for doing the disassociation.)
 340
 341 Calling this function repeatedly will create a FILE * on each call
 342 (and will push an :stdio layer each time as well).
 343
 344 =item B<PerlIO_releaseFILE(p,f)>
 345
 346 Calling PerlIO_releaseFILE informs PerlIO that all use of FILE * is
 347 complete. It is removed from the list of 'exported' FILE *s, and the
 348 associated PerlIO * should revert to its original behaviour.
 349
 350 Use this to disassociate a file from a PerlIO * that was associated
 351 using PerlIO_exportFILE().
 352
 353 =item B<PerlIO_findFILE(f)>
 354
 355 Returns a native FILE * used by a stdio layer. If there is none, it
 356 will create one with PerlIO_exportFILE. In either case the FILE *
 357 should be considered as belonging to PerlIO subsystem and should
 358 only be closed by calling C<PerlIO_close()>.
 359
 360
 361 =back
 362
 363 =head2 "Fast gets" Functions
 364
 365 In addition to standard-like API defined so far above there is an
 366 "implementation" interface which allows perl to get at internals of
 367 PerlIO.  The following calls correspond to the various FILE_xxx macros
 368 determined by Configure - or their equivalent in other
 369 implementations. This section is really of interest to only those
 370 concerned with detailed perl-core behaviour, implementing a PerlIO
 371 mapping or writing code which can make use of the "read ahead" that
 372 has been done by the IO system in the same way perl does. Note that
 373 any code that uses these interfaces must be prepared to do things the
 374 traditional way if a handle does not support them.
 375
 376 =over 4
 377
 378 =item B<PerlIO_fast_gets(f)>
 379
 380 Returns true if implementation has all the interfaces required to
 381 allow perl's C<sv_gets> to "bypass" normal IO mechanism.
 382 This can vary from handle to handle.
 383
 384   PerlIO_fast_gets(f) = PerlIO_has_cntptr(f) && \
 385                         PerlIO_canset_cnt(f) && \
 386                         `Can set pointer into buffer'
 387
 388
 389 =item B<PerlIO_has_cntptr(f)>
 390
 391 Implementation can return pointer to current position in the "buffer"
 392 and a count of bytes available in the buffer.  Do not use this - use
 393 PerlIO_fast_gets.
 394
 395 =item B<PerlIO_get_cnt(f)>
 396
 397 Return count of readable bytes in the buffer. Zero or negative return
 398 means no more bytes available.
 399
 400 =item B<PerlIO_get_ptr(f)>
 401
 402 Return pointer to next readable byte in buffer, accessing via the
 403 pointer (dereferencing) is only safe if PerlIO_get_cnt() has returned
 404 a positive value.  Only positive offsets up to value returned by
 405 PerlIO_get_cnt() are allowed.
 406
 407 =item B<PerlIO_set_ptrcnt(f,p,c)>
 408
 409 Set pointer into buffer, and a count of bytes still in the
 410 buffer. Should be used only to set pointer to within range implied by
 411 previous calls to C<PerlIO_get_ptr> and C<PerlIO_get_cnt>. The two
 412 values I<must> be consistent with each other (implementation may only
 413 use one or the other or may require both).
 414
 415 =item B<PerlIO_canset_cnt(f)>
 416
 417 Implementation can adjust its idea of number of bytes in the buffer.
 418 Do not use this - use PerlIO_fast_gets.
 419
 420 =item B<PerlIO_set_cnt(f,c)>
 421
 422 Obscure - set count of bytes in the buffer. Deprecated.  Only usable
 423 if PerlIO_canset_cnt() returns true.  Currently used in only doio.c to
 424 force count less than -1 to -1.  Perhaps should be PerlIO_set_empty or
 425 similar.  This call may actually do nothing if "count" is deduced from
 426 pointer and a "limit".  Do not use this - use PerlIO_set_ptrcnt().
 427
 428 =item B<PerlIO_has_base(f)>
 429
 430 Returns true if implementation has a buffer, and can return pointer
 431 to whole buffer and its size. Used by perl for B<-T> / B<-B> tests.
 432 Other uses would be very obscure...
 433
 434 =item B<PerlIO_get_base(f)>
 435
 436 Return I<start> of buffer. Access only positive offsets in the buffer
 437 up to the value returned by PerlIO_get_bufsiz().
 438
 439 =item B<PerlIO_get_bufsiz(f)>
 440
 441 Return the I<total number of bytes> in the buffer, this is neither the
 442 number that can be read, nor the amount of memory allocated to the
 443 buffer. Rather it is what the operating system and/or implementation
 444 happened to C<read()> (or whatever) last time IO was requested.
 445
 446 =back
 447
 448 =head2 Other Functions
 449
 450 =over 4
 451
 452 =item PerlIO_apply_layers(f,mode,layers)
 453
 454 The new interface to the USE_PERLIO implementation. The layers ":crlf"
 455 and ":raw" are only ones allowed for other implementations and those
 456 are silently ignored. (As of perl5.8 ":raw" is deprecated.)
 457 Use PerlIO_binmode() below for the portable case.
 458
 459 =item PerlIO_binmode(f,ptype,imode,layers)
 460
 461 The hook used by perl's C<binmode> operator.
 462 B<ptype> is perl's character for the kind of IO:
 463
 464 =over 8
 465
 466 =item 'E<lt>' read
 467
 468 =item 'E<gt>' write
 469
 470 =item '+' read/write
 471
 472 =back
 473
 474 B<imode> is C<O_BINARY> or C<O_TEXT>.
 475
 476 B<layers> is a string of layers to apply, only ":crlf" makes sense in the non USE_PERLIO
 477 case. (As of perl5.8 ":raw" is deprecated in favour of passing NULL.)
 478
 479 Portable cases are:
 480
 481     PerlIO_binmode(f,ptype,O_BINARY,Nullch);
 482 and
 483     PerlIO_binmode(f,ptype,O_TEXT,":crlf");
 484
 485 On Unix these calls probably have no effect whatsoever.  Elsewhere
 486 they alter "\n" to CR,LF translation and possibly cause a special text
 487 "end of file" indicator to be written or honoured on read. The effect
 488 of making the call after doing any IO to the handle depends on the
 489 implementation. (It may be ignored, affect any data which is already
 490 buffered as well, or only apply to subsequent data.)
 491
 492 =item PerlIO_debug(fmt,...)
 493
 494 PerlIO_debug is a printf()-like function which can be used for
 495 debugging.  No return value. Its main use is inside PerlIO where using
 496 real printf, warn() etc. would recursively call PerlIO and be a
 497 problem.
 498
 499 PerlIO_debug writes to the file named by $ENV{'PERLIO_DEBUG'} typical
 500 use might be
 501
 502   Bourne shells (sh, ksh, bash, zsh, ash, ...):
 503    PERLIO_DEBUG=/dev/tty ./perl somescript some args
 504
 505   Csh/Tcsh:
 506    setenv PERLIO_DEBUG /dev/tty
 507    ./perl somescript some args
 508
 509   If you have the "env" utility:
 510    env PERLIO_DEBUG=/dev/tty ./perl somescript some args
 511
 512   Win32:
 513    set PERLIO_DEBUG=CON
 514    perl somescript some args
 515
 516 If $ENV{'PERLIO_DEBUG'} is not set PerlIO_debug() is a no-op.
 517
 518 =back