This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Speed up csh_glob
[perl5.git] / ext / File-Glob / Glob.pm
CommitLineData
72b16652
GS
1package File::Glob;
2
3use strict;
7f39e0ae 4our($VERSION, @ISA, @EXPORT_OK, @EXPORT_FAIL, %EXPORT_TAGS, $DEFAULT_FLAGS);
72b16652 5
da4061d3 6require XSLoader;
22bc907a 7use feature 'switch';
72b16652 8
72f7b9a1 9@ISA = qw(Exporter);
72b16652 10
00c80938
GS
11# NOTE: The glob() export is only here for compatibility with 5.6.0.
12# csh_glob() should not be used directly, unless you know what you're doing.
13
72b16652
GS
14%EXPORT_TAGS = (
15 'glob' => [ qw(
16 GLOB_ABEND
2d5e9e5d 17 GLOB_ALPHASORT
72b16652
GS
18 GLOB_ALTDIRFUNC
19 GLOB_BRACE
220398a0 20 GLOB_CSH
72b16652
GS
21 GLOB_ERR
22 GLOB_ERROR
b8ef571c 23 GLOB_LIMIT
72b16652 24 GLOB_MARK
220398a0 25 GLOB_NOCASE
72b16652
GS
26 GLOB_NOCHECK
27 GLOB_NOMAGIC
28 GLOB_NOSORT
29 GLOB_NOSPACE
30 GLOB_QUOTE
31 GLOB_TILDE
32 glob
00c80938 33 bsd_glob
72b16652
GS
34 ) ],
35);
36
aa0c903b
NC
37@EXPORT_OK = (@{$EXPORT_TAGS{'glob'}}, 'csh_glob');
38
0b0e6d70 39$VERSION = '1.14';
220398a0
PM
40
41sub import {
7d3fb230 42 require Exporter;
df5a3819
NC
43 local $Exporter::ExportLevel = $Exporter::ExportLevel + 1;
44 Exporter::import(grep {
45 my $passthrough;
46 given ($_) {
22bc907a
NC
47 $DEFAULT_FLAGS &= ~GLOB_NOCASE() when ':case';
48 $DEFAULT_FLAGS |= GLOB_NOCASE() when ':nocase';
49 when (':globally') {
50 no warnings 'redefine';
220398a0
PM
51 *CORE::GLOBAL::glob = \&File::Glob::csh_glob;
52 }
df5a3819 53 $passthrough = 1;
220398a0 54 }
df5a3819
NC
55 $passthrough;
56 } @_);
72b16652
GS
57}
58
da4061d3 59XSLoader::load();
72b16652 60
220398a0 61$DEFAULT_FLAGS = GLOB_CSH();
862f843b 62if ($^O =~ /^(?:MSWin32|VMS|os2|dos|riscos)$/) {
220398a0
PM
63 $DEFAULT_FLAGS |= GLOB_NOCASE();
64}
65
00c80938
GS
66# File::Glob::glob() is deprecated because its prototype is different from
67# CORE::glob() (use bsd_glob() instead)
68sub glob {
e0e8a4dc 69 splice @_, 1; # don't pass PL_glob_index as flags!
00c80938
GS
70 goto &bsd_glob;
71}
72
72b16652
GS
73## borrowed heavily from gsar's File::DosGlob
74my %iter;
75my %entries;
76
77sub csh_glob {
78 my $pat = shift;
79 my $cxix = shift;
72b16652
GS
80
81 # assume global context if not provided one
82 $cxix = '_G_' unless defined $cxix;
83 $iter{$cxix} = 0 unless exists $iter{$cxix};
84
85 # if we're just beginning, do it all first
86 if ($iter{$cxix} == 0) {
edfed4c3
FC
87 my @pat;
88
89 # glob without args defaults to $_
90 $pat = $_ unless defined $pat;
91
92 # extract patterns
93 $pat =~ s/^\s+//; # Protect against empty elements in
94 # things like < *.c>, which alone
95 # shouldn't trigger ParseWords. Patterns
96 # with a trailing space must be passed
97 # to ParseWords, in case it is escaped,
98 # as in glob('\ ').
99 if ($pat =~ /[\s"']/) {
100 # XXX this is needed for compatibility with the csh
101 # implementation in Perl. Need to support a flag
102 # to disable this behavior.
103 require Text::ParseWords;
104 for (@pat = Text::ParseWords::parse_line('\s+',1,$pat)) {
105 s/^['"]// and chop;
106 }
107 }
72b16652 108 if (@pat) {
220398a0 109 $entries{$cxix} = [ map { doglob($_, $DEFAULT_FLAGS) } @pat ];
72b16652
GS
110 }
111 else {
220398a0 112 $entries{$cxix} = [ doglob($pat, $DEFAULT_FLAGS) ];
72b16652
GS
113 }
114 }
115
116 # chuck it all out, quick or slow
117 if (wantarray) {
118 delete $iter{$cxix};
119 return @{delete $entries{$cxix}};
120 }
121 else {
122 if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
123 return shift @{$entries{$cxix}};
124 }
125 else {
126 # return undef for EOL
127 delete $iter{$cxix};
128 delete $entries{$cxix};
129 return undef;
130 }
131 }
132}
133
1341;
135__END__
136
137=head1 NAME
138
139File::Glob - Perl extension for BSD glob routine
140
141=head1 SYNOPSIS
142
143 use File::Glob ':glob';
9d70ac1b 144
00c80938
GS
145 @list = bsd_glob('*.[ch]');
146 $homedir = bsd_glob('~gnat', GLOB_TILDE | GLOB_ERR);
9d70ac1b 147
72b16652
GS
148 if (GLOB_ERROR) {
149 # an error occurred reading $homedir
150 }
151
00c80938 152 ## override the core glob (CORE::glob() does this automatically
11fe14b1 153 ## by default anyway, since v5.6.0)
220398a0 154 use File::Glob ':globally';
6bd08436 155 my @sources = <*.{c,h,y}>;
220398a0
PM
156
157 ## override the core glob, forcing case sensitivity
158 use File::Glob qw(:globally :case);
6bd08436 159 my @sources = <*.{c,h,y}>;
220398a0
PM
160
161 ## override the core glob forcing case insensitivity
162 use File::Glob qw(:globally :nocase);
6bd08436 163 my @sources = <*.{c,h,y}>;
9d70ac1b 164
6bd08436
SS
165 ## glob on all files in home directory
166 use File::Glob ':globally';
167 my @sources = <~gnat/*>;
72b16652
GS
168
169=head1 DESCRIPTION
170
9d70ac1b
RGS
171The glob angle-bracket operator C<< <> >> is a pathname generator that
172implements the rules for file name pattern matching used by Unix-like shells
173such as the Bourne shell or C shell.
6bd08436 174
00c80938
GS
175File::Glob::bsd_glob() implements the FreeBSD glob(3) routine, which is
176a superset of the POSIX glob() (described in IEEE Std 1003.2 "POSIX.2").
177bsd_glob() takes a mandatory C<pattern> argument, and an optional
72b16652
GS
178C<flags> argument, and returns a list of filenames matching the
179pattern, with interpretation of the pattern modified by the C<flags>
00c80938
GS
180variable.
181
182Since v5.6.0, Perl's CORE::glob() is implemented in terms of bsd_glob().
183Note that they don't share the same prototype--CORE::glob() only accepts
184a single argument. Due to historical reasons, CORE::glob() will also
185split its argument on whitespace, treating it as multiple patterns,
186whereas bsd_glob() considers them as one pattern.
187
6bd08436
SS
188=head2 META CHARACTERS
189
9d70ac1b
RGS
190 \ Quote the next metacharacter
191 [] Character class
192 {} Multiple pattern
193 * Match any string of characters
194 ? Match any single character
195 ~ User name home directory
196
197The metanotation C<a{b,c,d}e> is a shorthand for C<abe ace ade>. Left to
198right order is preserved, with results of matches being sorted separately
199at a low level to preserve this order. As a special case C<{>, C<}>, and
200C<{}> are passed undisturbed.
6bd08436
SS
201
202=head2 POSIX FLAGS
203
00c80938 204The POSIX defined flags for bsd_glob() are:
72b16652
GS
205
206=over 4
207
208=item C<GLOB_ERR>
209
00c80938
GS
210Force bsd_glob() to return an error when it encounters a directory it
211cannot open or read. Ordinarily bsd_glob() continues to find matches.
72b16652 212
b8ef571c
JH
213=item C<GLOB_LIMIT>
214
215Make bsd_glob() return an error (GLOB_NOSPACE) when the pattern expands
216to a size bigger than the system constant C<ARG_MAX> (usually found in
217limits.h). If your system does not define this constant, bsd_glob() uses
218C<sysconf(_SC_ARG_MAX)> or C<_POSIX_ARG_MAX> where available (in that
219order). You can inspect these values using the standard C<POSIX>
220extension.
221
72b16652
GS
222=item C<GLOB_MARK>
223
224Each pathname that is a directory that matches the pattern has a slash
225appended.
226
220398a0
PM
227=item C<GLOB_NOCASE>
228
229By default, file names are assumed to be case sensitive; this flag
00c80938 230makes bsd_glob() treat case differences as not significant.
220398a0 231
72b16652
GS
232=item C<GLOB_NOCHECK>
233
00c80938 234If the pattern does not match any pathname, then bsd_glob() returns a list
72b16652
GS
235consisting of only the pattern. If C<GLOB_QUOTE> is set, its effect
236is present in the pattern returned.
237
238=item C<GLOB_NOSORT>
239
240By default, the pathnames are sorted in ascending ASCII order; this
00c80938 241flag prevents that sorting (speeding up bsd_glob()).
72b16652
GS
242
243=back
244
245The FreeBSD extensions to the POSIX standard are the following flags:
246
247=over 4
248
249=item C<GLOB_BRACE>
250
a45bd81d 251Pre-process the string to expand C<{pat,pat,...}> strings like csh(1).
72b16652
GS
252The pattern '{}' is left unexpanded for historical reasons (and csh(1)
253does the same thing to ease typing of find(1) patterns).
254
255=item C<GLOB_NOMAGIC>
256
257Same as C<GLOB_NOCHECK> but it only returns the pattern if it does not
258contain any of the special characters "*", "?" or "[". C<NOMAGIC> is
259provided to simplify implementing the historic csh(1) globbing
260behaviour and should probably not be used anywhere else.
261
262=item C<GLOB_QUOTE>
263
264Use the backslash ('\') character for quoting: every occurrence of a
265backslash followed by a character in the pattern is replaced by that
266character, avoiding any special interpretation of the character.
220398a0 267(But see below for exceptions on DOSISH systems).
72b16652
GS
268
269=item C<GLOB_TILDE>
270
271Expand patterns that start with '~' to user name home directories.
272
273=item C<GLOB_CSH>
274
275For convenience, C<GLOB_CSH> is a synonym for
2d5e9e5d 276C<GLOB_BRACE | GLOB_NOMAGIC | GLOB_QUOTE | GLOB_TILDE | GLOB_ALPHASORT>.
72b16652
GS
277
278=back
279
280The POSIX provided C<GLOB_APPEND>, C<GLOB_DOOFFS>, and the FreeBSD
281extensions C<GLOB_ALTDIRFUNC>, and C<GLOB_MAGCHAR> flags have not been
282implemented in the Perl version because they involve more complex
283interaction with the underlying C structures.
284
2d5e9e5d
JH
285The following flag has been added in the Perl implementation for
286csh compatibility:
287
288=over 4
289
290=item C<GLOB_ALPHASORT>
291
292If C<GLOB_NOSORT> is not in effect, sort filenames is alphabetical
293order (case does not matter) rather than in ASCII order.
294
295=back
296
72b16652
GS
297=head1 DIAGNOSTICS
298
00c80938 299bsd_glob() returns a list of matching paths, possibly zero length. If an
72b16652
GS
300error occurred, &File::Glob::GLOB_ERROR will be non-zero and C<$!> will be
301set. &File::Glob::GLOB_ERROR is guaranteed to be zero if no error occurred,
302or one of the following values otherwise:
303
304=over 4
305
306=item C<GLOB_NOSPACE>
307
308An attempt to allocate memory failed.
309
310=item C<GLOB_ABEND>
311
312The glob was stopped because an error was encountered.
313
314=back
315
00c80938
GS
316In the case where bsd_glob() has found some matching paths, but is
317interrupted by an error, it will return a list of filenames B<and>
72b16652
GS
318set &File::Glob::ERROR.
319
00c80938
GS
320Note that bsd_glob() deviates from POSIX and FreeBSD glob(3) behaviour
321by not considering C<ENOENT> and C<ENOTDIR> as errors - bsd_glob() will
72b16652
GS
322continue processing despite those errors, unless the C<GLOB_ERR> flag is
323set.
324
325Be aware that all filenames returned from File::Glob are tainted.
326
327=head1 NOTES
328
329=over 4
330
331=item *
332
9d70ac1b
RGS
333If you want to use multiple patterns, e.g. C<bsd_glob("a* b*")>, you should
334probably throw them in a set as in C<bsd_glob("{a*,b*}")>. This is because
150b260b
GS
335the argument to bsd_glob() isn't subjected to parsing by the C shell.
336Remember that you can use a backslash to escape things.
72b16652
GS
337
338=item *
339
220398a0
PM
340On DOSISH systems, backslash is a valid directory separator character.
341In this case, use of backslash as a quoting character (via GLOB_QUOTE)
342interferes with the use of backslash as a directory separator. The
343best (simplest, most portable) solution is to use forward slashes for
344directory separators, and backslashes for quoting. However, this does
345not match "normal practice" on these systems. As a concession to user
346expectation, therefore, backslashes (under GLOB_QUOTE) only quote the
347glob metacharacters '[', ']', '{', '}', '-', '~', and backslash itself.
348All other backslashes are passed through unchanged.
349
350=item *
351
72b16652
GS
352Win32 users should use the real slash. If you really want to use
353backslashes, consider using Sarathy's File::DosGlob, which comes with
354the standard Perl distribution.
355
7369a524
CN
356=item *
357
358Mac OS (Classic) users should note a few differences. Since
359Mac OS is not Unix, when the glob code encounters a tilde glob (e.g.
be708cc0 360~user) and the C<GLOB_TILDE> flag is used, it simply returns that
7369a524
CN
361pattern without doing any expansion.
362
363Glob on Mac OS is case-insensitive by default (if you don't use any
364flags). If you specify any flags at all and still want glob
365to be case-insensitive, you must include C<GLOB_NOCASE> in the flags.
366
367The path separator is ':' (aka colon), not '/' (aka slash). Mac OS users
368should be careful about specifying relative pathnames. While a full path
369always begins with a volume name, a relative pathname should always
370begin with a ':'. If specifying a volume name only, a trailing ':' is
371required.
372
be708cc0
JH
373The specification of pathnames in glob patterns adheres to the usual Mac
374OS conventions: The path separator is a colon ':', not a slash '/'. A
375full path always begins with a volume name. A relative pathname on Mac
376OS must always begin with a ':', except when specifying a file or
377directory name in the current working directory, where the leading colon
378is optional. If specifying a volume name only, a trailing ':' is
379required. Due to these rules, a glob like E<lt>*:E<gt> will find all
380mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
381all files and directories in the current directory.
382
383Note that updirs in the glob pattern are resolved before the matching begins,
384i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
385that a single trailing ':' in the pattern is ignored (unless it's a volume
386name pattern like "*HD:"), i.e. a glob like E<lt>:*:E<gt> will find both
387directories I<and> files (and not, as one might expect, only directories).
388You can, however, use the C<GLOB_MARK> flag to distinguish (without a file
389test) directory names from file names.
390
391If the C<GLOB_MARK> flag is set, all directory paths will have a ':' appended.
392Since a directory like 'lib:' is I<not> a valid I<relative> path on Mac OS,
393both a leading and a trailing colon will be added, when the directory name in
394question doesn't contain any colons (e.g. 'lib' becomes ':lib:').
395
a45bd81d
GS
396=back
397
6bd08436
SS
398=head1 SEE ALSO
399
400L<perlfunc/glob>, glob(3)
401
72b16652
GS
402=head1 AUTHOR
403
0e950d83 404The Perl interface was written by Nathan Torkington E<lt>gnat@frii.comE<gt>,
72b16652 405and is released under the artistic license. Further modifications were
7369a524
CN
406made by Greg Bacon E<lt>gbacon@cs.uah.eduE<gt>, Gurusamy Sarathy
407E<lt>gsar@activestate.comE<gt>, and Thomas Wegner
408E<lt>wegner_thomas@yahoo.comE<gt>. The C glob code has the
72b16652
GS
409following copyright:
410
0e950d83
GS
411 Copyright (c) 1989, 1993 The Regents of the University of California.
412 All rights reserved.
3cb6de81 413
0e950d83
GS
414 This code is derived from software contributed to Berkeley by
415 Guido van Rossum.
416
417 Redistribution and use in source and binary forms, with or without
418 modification, are permitted provided that the following conditions
419 are met:
420
421 1. Redistributions of source code must retain the above copyright
422 notice, this list of conditions and the following disclaimer.
423 2. Redistributions in binary form must reproduce the above copyright
424 notice, this list of conditions and the following disclaimer in the
425 documentation and/or other materials provided with the distribution.
426 3. Neither the name of the University nor the names of its contributors
427 may be used to endorse or promote products derived from this software
428 without specific prior written permission.
429
430 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
431 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
432 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
433 ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
434 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
435 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
436 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
437 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
438 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
439 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
440 SUCH DAMAGE.
72b16652
GS
441
442=cut