This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Correct typo: Socket --> Syslog
[perl5.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e
LW
1package Text::ParseWords;
2
3require 5.000;
4require Exporter;
5require AutoLoader;
6use Carp;
7
8@ISA = qw(Exporter AutoLoader);
9@EXPORT = qw(shellwords quotewords);
10@EXPORT_OK = qw(old_shellwords);
11
a5f75d66 12=head1 NAME
2304df62 13
a5f75d66 14Text::ParseWords - parse text into an array of tokens
a0d0e21e 15
a5f75d66 16=head1 SYNOPSIS
a0d0e21e 17
a5f75d66
AD
18 use Text::ParseWords;
19 @words = &quotewords($delim, $keep, @lines);
20 @words = &shellwords(@lines);
21 @words = &old_shellwords(@lines);
22
23=head1 DESCRIPTION
24
25&quotewords() accepts a delimiter (which can be a regular expression)
26and a list of lines and then breaks those lines up into a list of
27words ignoring delimiters that appear inside quotes.
28
29The $keep argument is a boolean flag. If true, the quotes are kept
30with each word, otherwise quotes are stripped in the splitting process.
31$keep also defines whether unprotected backslashes are retained.
32
33A &shellwords() replacement is included to demonstrate the new package.
34This version differs from the original in that it will _NOT_ default
35to using $_ if no arguments are given. I personally find the old behavior
36to be a mis-feature.
37
38
39&quotewords() works by simply jamming all of @lines into a single
40string in $_ and then pulling off words a bit at a time until $_
41is exhausted.
42
43The inner "for" loop builds up each word (or $field) one $snippet
44at a time. A $snippet is a quoted string, a backslashed character,
45or an unquoted string. We fall out of the "for" loop when we reach
46the end of $_ or when we hit a delimiter. Falling out of the "for"
47loop, we push the $field we've been building up onto the list of
48@words we'll be returning, and then loop back and pull another word
49off of $_.
50
51The first two cases inside the "for" loop deal with quoted strings.
52The first case matches a double quoted string, removes it from $_,
53and assigns the double quoted string to $snippet in the body of the
54conditional. The second case handles single quoted strings. In
55the third case we've found a quote at the current beginning of $_,
56but it didn't match the quoted string regexps in the first two cases,
57so it must be an unbalanced quote and we croak with an error (which can
58be caught by eval()).
a0d0e21e 59
a5f75d66
AD
60The next case handles backslashed characters, and the next case is the
61exit case on reaching the end of the string or finding a delimiter.
2304df62 62
a5f75d66
AD
63Otherwise, we've found an unquoted thing and we pull of characters one
64at a time until we reach something that could start another $snippet--
65a quote of some sort, a backslash, or the delimiter. This one character
66at a time behavior was necessary if the delimiter was going to be a
67regexp (love to hear it if you can figure out a better way).
68
69=head1 AUTHORS
70
71Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
72
73Basically an update and generalization of the old shellwords.pl.
74Much code shamelessly stolen from the old version (author unknown).
75
76=cut
77
781;
79__END__
80
81sub shellwords {
2304df62
AD
82 local(@lines) = @_;
83 $lines[$#lines] =~ s/\s+$//;
a0d0e21e 84 &quotewords('\s+', 0, @lines);
2304df62
AD
85}
86
87
a0d0e21e
LW
88
89sub quotewords {
2304df62
AD
90 local($delim, $keep, @lines) = @_;
91 local(@words,$snippet,$field,$_);
92
93 $_ = join('', @lines);
94 while ($_) {
95 $field = '';
96 for (;;) {
97 $snippet = '';
98 if (s/^"(([^"\\]|\\[\\"])*)"//) {
99 $snippet = $1;
100 $snippet = "\"$snippet\"" if ($keep);
101 }
102 elsif (s/^'(([^'\\]|\\[\\'])*)'//) {
103 $snippet = $1;
104 $snippet = "'$snippet'" if ($keep);
105 }
106 elsif (/^["']/) {
a0d0e21e 107 croak "Unmatched quote";
2304df62
AD
108 }
109 elsif (s/^\\(.)//) {
110 $snippet = $1;
111 $snippet = "\\$snippet" if ($keep);
112 }
113 elsif (!$_ || s/^$delim//) {
114 last;
115 }
116 else {
117 while ($_ && !(/^$delim/ || /^['"\\]/)) {
118 $snippet .= substr($_, 0, 1);
119 substr($_, 0, 1) = '';
120 }
121 }
122 $field .= $snippet;
123 }
124 push(@words, $field);
125 }
126 @words;
127}
2304df62
AD
128
129
a0d0e21e
LW
130sub old_shellwords {
131
132 # Usage:
133 # use ParseWords;
134 # @words = old_shellwords($line);
135 # or
136 # @words = old_shellwords(@lines);
137
138 local($_) = join('', @_);
139 my(@words,$snippet,$field);
140
141 s/^\s+//;
142 while ($_ ne '') {
143 $field = '';
144 for (;;) {
145 if (s/^"(([^"\\]|\\.)*)"//) {
146 ($snippet = $1) =~ s#\\(.)#$1#g;
147 }
148 elsif (/^"/) {
149 croak "Unmatched double quote: $_";
150 }
151 elsif (s/^'(([^'\\]|\\.)*)'//) {
152 ($snippet = $1) =~ s#\\(.)#$1#g;
153 }
154 elsif (/^'/) {
155 croak "Unmatched single quote: $_";
156 }
157 elsif (s/^\\(.)//) {
158 $snippet = $1;
159 }
160 elsif (s/^([^\s\\'"]+)//) {
161 $snippet = $1;
162 }
163 else {
164 s/^\s+//;
165 last;
166 }
167 $field .= $snippet;
168 }
169 push(@words, $field);
170 }
171 @words;
172}