This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix quotewords
[perl5.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e
LW
1package Text::ParseWords;
2
3require 5.000;
4require Exporter;
5require AutoLoader;
6use Carp;
7
8@ISA = qw(Exporter AutoLoader);
9@EXPORT = qw(shellwords quotewords);
10@EXPORT_OK = qw(old_shellwords);
11
a5f75d66 12=head1 NAME
2304df62 13
a5f75d66 14Text::ParseWords - parse text into an array of tokens
a0d0e21e 15
a5f75d66 16=head1 SYNOPSIS
a0d0e21e 17
a5f75d66
AD
18 use Text::ParseWords;
19 @words = &quotewords($delim, $keep, @lines);
20 @words = &shellwords(@lines);
21 @words = &old_shellwords(@lines);
22
23=head1 DESCRIPTION
24
25&quotewords() accepts a delimiter (which can be a regular expression)
26and a list of lines and then breaks those lines up into a list of
27words ignoring delimiters that appear inside quotes.
28
29The $keep argument is a boolean flag. If true, the quotes are kept
30with each word, otherwise quotes are stripped in the splitting process.
31$keep also defines whether unprotected backslashes are retained.
32
33A &shellwords() replacement is included to demonstrate the new package.
34This version differs from the original in that it will _NOT_ default
35to using $_ if no arguments are given. I personally find the old behavior
36to be a mis-feature.
37
38
39&quotewords() works by simply jamming all of @lines into a single
40string in $_ and then pulling off words a bit at a time until $_
41is exhausted.
42
a5f75d66
AD
43=head1 AUTHORS
44
45Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
46
47Basically an update and generalization of the old shellwords.pl.
48Much code shamelessly stolen from the old version (author unknown).
49
50=cut
51
521;
53__END__
54
55sub shellwords {
2304df62
AD
56 local(@lines) = @_;
57 $lines[$#lines] =~ s/\s+$//;
a0d0e21e 58 &quotewords('\s+', 0, @lines);
2304df62
AD
59}
60
61
a0d0e21e
LW
62
63sub quotewords {
0dbfbeef
PP
64
65# The inner "for" loop builds up each word (or $field) one $snippet
66# at a time. A $snippet is a quoted string, a backslashed character,
67# or an unquoted string. We fall out of the "for" loop when we reach
68# the end of $_ or when we hit a delimiter. Falling out of the "for"
69# loop, we push the $field we've been building up onto the list of
70# @words we'll be returning, and then loop back and pull another word
71# off of $_.
72#
73# The first two cases inside the "for" loop deal with quoted strings.
74# The first case matches a double quoted string, removes it from $_,
75# and assigns the double quoted string to $snippet in the body of the
76# conditional. The second case handles single quoted strings. In
77# the third case we've found a quote at the current beginning of $_,
78# but it didn't match the quoted string regexps in the first two cases,
79# so it must be an unbalanced quote and we croak with an error (which can
80# be caught by eval()).
81#
82# The next case handles backslashed characters, and the next case is the
83# exit case on reaching the end of the string or finding a delimiter.
84#
85# Otherwise, we've found an unquoted thing and we pull of characters one
86# at a time until we reach something that could start another $snippet--
87# a quote of some sort, a backslash, or the delimiter. This one character
88# at a time behavior was necessary if the delimiter was going to be a
89# regexp (love to hear it if you can figure out a better way).
90
2304df62
AD
91 local($delim, $keep, @lines) = @_;
92 local(@words,$snippet,$field,$_);
93
94 $_ = join('', @lines);
fb7007dc 95 while (length($_)) {
2304df62
AD
96 $field = '';
97 for (;;) {
8f80bcca
HS
98 $snippet = '';
99 if (s/^"(([^"\\]|\\.)*)"//) {
2304df62
AD
100 $snippet = $1;
101 $snippet = "\"$snippet\"" if ($keep);
102 }
8f80bcca 103 elsif (s/^'(([^'\\]|\\.)*)'//) {
2304df62
AD
104 $snippet = $1;
105 $snippet = "'$snippet'" if ($keep);
106 }
107 elsif (/^["']/) {
a0d0e21e 108 croak "Unmatched quote";
2304df62
AD
109 }
110 elsif (s/^\\(.)//) {
111 $snippet = $1;
112 $snippet = "\\$snippet" if ($keep);
113 }
fb7007dc 114 elsif (!length($_) || s/^$delim//) {
2304df62
AD
115 last;
116 }
117 else {
55497cff 118 while ($_ ne '' && !(/^$delim/ || /^['"\\]/)) {
2304df62
AD
119 $snippet .= substr($_, 0, 1);
120 substr($_, 0, 1) = '';
121 }
122 }
123 $field .= $snippet;
124 }
125 push(@words, $field);
126 }
127 @words;
128}
2304df62
AD
129
130
a0d0e21e
LW
131sub old_shellwords {
132
133 # Usage:
134 # use ParseWords;
135 # @words = old_shellwords($line);
136 # or
137 # @words = old_shellwords(@lines);
138
139 local($_) = join('', @_);
140 my(@words,$snippet,$field);
141
142 s/^\s+//;
143 while ($_ ne '') {
144 $field = '';
145 for (;;) {
146 if (s/^"(([^"\\]|\\.)*)"//) {
147 ($snippet = $1) =~ s#\\(.)#$1#g;
148 }
149 elsif (/^"/) {
150 croak "Unmatched double quote: $_";
151 }
152 elsif (s/^'(([^'\\]|\\.)*)'//) {
153 ($snippet = $1) =~ s#\\(.)#$1#g;
154 }
155 elsif (/^'/) {
156 croak "Unmatched single quote: $_";
157 }
158 elsif (s/^\\(.)//) {
159 $snippet = $1;
160 }
161 elsif (s/^([^\s\\'"]+)//) {
162 $snippet = $1;
163 }
164 else {
165 s/^\s+//;
166 last;
167 }
168 $field .= $snippet;
169 }
170 push(@words, $field);
171 }
172 @words;
173}