Commit | Line | Data |
---|---|---|
a0d0e21e LW |
1 | package Text::ParseWords; |
2 | ||
3 | require 5.000; | |
4 | require Exporter; | |
5 | require AutoLoader; | |
6 | use Carp; | |
7 | ||
8 | @ISA = qw(Exporter AutoLoader); | |
9 | @EXPORT = qw(shellwords quotewords); | |
10 | @EXPORT_OK = qw(old_shellwords); | |
11 | ||
12 | # This code needs updating to use new Perl 5 features (regexp etc). | |
13 | ||
14 | # ParseWords.pm | |
2304df62 AD |
15 | # |
16 | # Usage: | |
a0d0e21e | 17 | # use ParseWords; |
2304df62 AD |
18 | # @words = "ewords($delim, $keep, @lines); |
19 | # @words = &shellwords(@lines); | |
a0d0e21e | 20 | # @words = &old_shellwords(@lines); |
2304df62 AD |
21 | |
22 | # Hal Pomeranz (pomeranz@netcom.com), 23 March 1994 | |
23 | # Permission to use and distribute under the same terms as Perl. | |
24 | # No warranty expressed or implied. | |
25 | ||
26 | # Basically an update and generalization of the old shellwords.pl. | |
27 | # Much code shamelessly stolen from the old version (author unknown). | |
28 | # | |
29 | # "ewords() accepts a delimiter (which can be a regular expression) | |
30 | # and a list of lines and then breaks those lines up into a list of | |
31 | # words ignoring delimiters that appear inside quotes. | |
32 | # | |
33 | # The $keep argument is a boolean flag. If true, the quotes are kept | |
34 | # with each word, otherwise quotes are stripped in the splitting process. | |
35 | # $keep also defines whether unprotected backslashes are retained. | |
36 | # | |
2304df62 | 37 | |
a0d0e21e LW |
38 | 1; |
39 | __END__ | |
40 | ||
41 | ||
42 | sub shellwords { | |
43 | ||
44 | # A &shellwords() replacement is included to demonstrate the new package. | |
45 | # This version differs from the original in that it will _NOT_ default | |
46 | # to using $_ if no arguments are given. I personally find the old behavior | |
47 | # to be a mis-feature. | |
2304df62 | 48 | |
2304df62 AD |
49 | local(@lines) = @_; |
50 | $lines[$#lines] =~ s/\s+$//; | |
a0d0e21e | 51 | "ewords('\s+', 0, @lines); |
2304df62 AD |
52 | } |
53 | ||
54 | ||
a0d0e21e LW |
55 | |
56 | sub quotewords { | |
57 | ||
2304df62 AD |
58 | # "ewords() works by simply jamming all of @lines into a single |
59 | # string in $_ and then pulling off words a bit at a time until $_ | |
60 | # is exhausted. | |
61 | # | |
62 | # The inner "for" loop builds up each word (or $field) one $snippet | |
63 | # at a time. A $snippet is a quoted string, a backslashed character, | |
64 | # or an unquoted string. We fall out of the "for" loop when we reach | |
65 | # the end of $_ or when we hit a delimiter. Falling out of the "for" | |
66 | # loop, we push the $field we've been building up onto the list of | |
67 | # @words we'll be returning, and then loop back and pull another word | |
68 | # off of $_. | |
69 | # | |
70 | # The first two cases inside the "for" loop deal with quoted strings. | |
71 | # The first case matches a double quoted string, removes it from $_, | |
72 | # and assigns the double quoted string to $snippet in the body of the | |
73 | # conditional. The second case handles single quoted strings. In | |
74 | # the third case we've found a quote at the current beginning of $_, | |
75 | # but it didn't match the quoted string regexps in the first two cases, | |
a0d0e21e | 76 | # so it must be an unbalanced quote and we croak with an error (which can |
2304df62 AD |
77 | # be caught by eval()). |
78 | # | |
79 | # The next case handles backslashed characters, and the next case is the | |
80 | # exit case on reaching the end of the string or finding a delimiter. | |
81 | # | |
82 | # Otherwise, we've found an unquoted thing and we pull of characters one | |
83 | # at a time until we reach something that could start another $snippet-- | |
84 | # a quote of some sort, a backslash, or the delimiter. This one character | |
85 | # at a time behavior was necessary if the delimiter was going to be a | |
86 | # regexp (love to hear it if you can figure out a better way). | |
87 | ||
2304df62 AD |
88 | local($delim, $keep, @lines) = @_; |
89 | local(@words,$snippet,$field,$_); | |
90 | ||
91 | $_ = join('', @lines); | |
92 | while ($_) { | |
93 | $field = ''; | |
94 | for (;;) { | |
95 | $snippet = ''; | |
96 | if (s/^"(([^"\\]|\\[\\"])*)"//) { | |
97 | $snippet = $1; | |
98 | $snippet = "\"$snippet\"" if ($keep); | |
99 | } | |
100 | elsif (s/^'(([^'\\]|\\[\\'])*)'//) { | |
101 | $snippet = $1; | |
102 | $snippet = "'$snippet'" if ($keep); | |
103 | } | |
104 | elsif (/^["']/) { | |
a0d0e21e | 105 | croak "Unmatched quote"; |
2304df62 AD |
106 | } |
107 | elsif (s/^\\(.)//) { | |
108 | $snippet = $1; | |
109 | $snippet = "\\$snippet" if ($keep); | |
110 | } | |
111 | elsif (!$_ || s/^$delim//) { | |
112 | last; | |
113 | } | |
114 | else { | |
115 | while ($_ && !(/^$delim/ || /^['"\\]/)) { | |
116 | $snippet .= substr($_, 0, 1); | |
117 | substr($_, 0, 1) = ''; | |
118 | } | |
119 | } | |
120 | $field .= $snippet; | |
121 | } | |
122 | push(@words, $field); | |
123 | } | |
124 | @words; | |
125 | } | |
2304df62 AD |
126 | |
127 | ||
a0d0e21e LW |
128 | sub old_shellwords { |
129 | ||
130 | # Usage: | |
131 | # use ParseWords; | |
132 | # @words = old_shellwords($line); | |
133 | # or | |
134 | # @words = old_shellwords(@lines); | |
135 | ||
136 | local($_) = join('', @_); | |
137 | my(@words,$snippet,$field); | |
138 | ||
139 | s/^\s+//; | |
140 | while ($_ ne '') { | |
141 | $field = ''; | |
142 | for (;;) { | |
143 | if (s/^"(([^"\\]|\\.)*)"//) { | |
144 | ($snippet = $1) =~ s#\\(.)#$1#g; | |
145 | } | |
146 | elsif (/^"/) { | |
147 | croak "Unmatched double quote: $_"; | |
148 | } | |
149 | elsif (s/^'(([^'\\]|\\.)*)'//) { | |
150 | ($snippet = $1) =~ s#\\(.)#$1#g; | |
151 | } | |
152 | elsif (/^'/) { | |
153 | croak "Unmatched single quote: $_"; | |
154 | } | |
155 | elsif (s/^\\(.)//) { | |
156 | $snippet = $1; | |
157 | } | |
158 | elsif (s/^([^\s\\'"]+)//) { | |
159 | $snippet = $1; | |
160 | } | |
161 | else { | |
162 | s/^\s+//; | |
163 | last; | |
164 | } | |
165 | $field .= $snippet; | |
166 | } | |
167 | push(@words, $field); | |
168 | } | |
169 | @words; | |
170 | } |