This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[win32] merge change#886 from maintbranch
[perl5.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e
LW
1package Text::ParseWords;
2
3require 5.000;
a0d0e21e
LW
4use Carp;
5
dc848c6f
PP
6require AutoLoader;
7*AUTOLOAD = \&AutoLoader::AUTOLOAD;
8
9require Exporter;
10@ISA = qw(Exporter);
a0d0e21e
LW
11@EXPORT = qw(shellwords quotewords);
12@EXPORT_OK = qw(old_shellwords);
13
a5f75d66 14=head1 NAME
2304df62 15
a5f75d66 16Text::ParseWords - parse text into an array of tokens
a0d0e21e 17
a5f75d66 18=head1 SYNOPSIS
a0d0e21e 19
a5f75d66
AD
20 use Text::ParseWords;
21 @words = &quotewords($delim, $keep, @lines);
22 @words = &shellwords(@lines);
23 @words = &old_shellwords(@lines);
24
25=head1 DESCRIPTION
26
27&quotewords() accepts a delimiter (which can be a regular expression)
28and a list of lines and then breaks those lines up into a list of
29words ignoring delimiters that appear inside quotes.
30
31The $keep argument is a boolean flag. If true, the quotes are kept
32with each word, otherwise quotes are stripped in the splitting process.
33$keep also defines whether unprotected backslashes are retained.
34
35A &shellwords() replacement is included to demonstrate the new package.
36This version differs from the original in that it will _NOT_ default
37to using $_ if no arguments are given. I personally find the old behavior
38to be a mis-feature.
39
a5f75d66
AD
40&quotewords() works by simply jamming all of @lines into a single
41string in $_ and then pulling off words a bit at a time until $_
42is exhausted.
43
a5f75d66
AD
44=head1 AUTHORS
45
46Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
47
48Basically an update and generalization of the old shellwords.pl.
49Much code shamelessly stolen from the old version (author unknown).
50
51=cut
52
531;
54__END__
55
56sub shellwords {
2304df62
AD
57 local(@lines) = @_;
58 $lines[$#lines] =~ s/\s+$//;
a0d0e21e 59 &quotewords('\s+', 0, @lines);
2304df62
AD
60}
61
62
a0d0e21e
LW
63
64sub quotewords {
0dbfbeef
PP
65
66# The inner "for" loop builds up each word (or $field) one $snippet
67# at a time. A $snippet is a quoted string, a backslashed character,
68# or an unquoted string. We fall out of the "for" loop when we reach
69# the end of $_ or when we hit a delimiter. Falling out of the "for"
70# loop, we push the $field we've been building up onto the list of
71# @words we'll be returning, and then loop back and pull another word
72# off of $_.
73#
74# The first two cases inside the "for" loop deal with quoted strings.
75# The first case matches a double quoted string, removes it from $_,
76# and assigns the double quoted string to $snippet in the body of the
77# conditional. The second case handles single quoted strings. In
78# the third case we've found a quote at the current beginning of $_,
79# but it didn't match the quoted string regexps in the first two cases,
80# so it must be an unbalanced quote and we croak with an error (which can
81# be caught by eval()).
82#
83# The next case handles backslashed characters, and the next case is the
84# exit case on reaching the end of the string or finding a delimiter.
85#
86# Otherwise, we've found an unquoted thing and we pull of characters one
87# at a time until we reach something that could start another $snippet--
88# a quote of some sort, a backslash, or the delimiter. This one character
89# at a time behavior was necessary if the delimiter was going to be a
90# regexp (love to hear it if you can figure out a better way).
91
936c8837
SG
92 my ($delim, $keep, @lines) = @_;
93 my (@words, $snippet, $field);
94
95 local $_ = join ('', @lines);
2304df62 96
936c8837 97 while (length) {
2304df62 98 $field = '';
936c8837 99
2304df62 100 for (;;) {
456e8aa7 101 $snippet = '';
936c8837
SG
102
103 if (s/^"([^"\\]*(\\.[^"\\]*)*)"//) {
2304df62 104 $snippet = $1;
936c8837 105 $snippet = qq|"$snippet"| if $keep;
2304df62 106 }
936c8837 107 elsif (s/^'([^'\\]*(\\.[^'\\]*)*)'//) {
2304df62 108 $snippet = $1;
936c8837 109 $snippet = "'$snippet'" if $keep;
2304df62
AD
110 }
111 elsif (/^["']/) {
936c8837
SG
112 croak 'Unmatched quote';
113 }
114 elsif (s/^\\(.)//) {
115 $snippet = $1;
116 $snippet = "\\$snippet" if $keep;
2304df62 117 }
936c8837
SG
118 elsif (!length || s/^$delim//) {
119 last;
2304df62
AD
120 }
121 else {
936c8837
SG
122 while (length && !(/^$delim/ || /^['"\\]/)) {
123 $snippet .= substr ($_, 0, 1);
124 substr($_, 0, 1) = '';
125 }
2304df62 126 }
936c8837 127
2304df62
AD
128 $field .= $snippet;
129 }
936c8837
SG
130
131 push @words, $field;
2304df62 132 }
936c8837
SG
133
134 return @words;
2304df62 135}
2304df62
AD
136
137
a0d0e21e
LW
138sub old_shellwords {
139
140 # Usage:
141 # use ParseWords;
142 # @words = old_shellwords($line);
143 # or
144 # @words = old_shellwords(@lines);
145
146 local($_) = join('', @_);
147 my(@words,$snippet,$field);
148
149 s/^\s+//;
150 while ($_ ne '') {
151 $field = '';
152 for (;;) {
153 if (s/^"(([^"\\]|\\.)*)"//) {
154 ($snippet = $1) =~ s#\\(.)#$1#g;
155 }
156 elsif (/^"/) {
157 croak "Unmatched double quote: $_";
158 }
159 elsif (s/^'(([^'\\]|\\.)*)'//) {
160 ($snippet = $1) =~ s#\\(.)#$1#g;
161 }
162 elsif (/^'/) {
163 croak "Unmatched single quote: $_";
164 }
165 elsif (s/^\\(.)//) {
166 $snippet = $1;
167 }
168 elsif (s/^([^\s\\'"]+)//) {
169 $snippet = $1;
170 }
171 else {
172 s/^\s+//;
173 last;
174 }
175 $field .= $snippet;
176 }
177 push(@words, $field);
178 }
179 @words;
180}