This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
authorNicholas Clark <nick@ccl4.org>
Sat, 4 Sep 2004 20:18:50 +0000 (20:18 +0000)
committerNicholas Clark <nick@ccl4.org>
Sat, 4 Sep 2004 20:18:50 +0000 (20:18 +0000)
[ 22997]
Cleanup the main regex in Text::ParseWords and make the
parse_line() routine faster. Add a Unicode test case.

[ 23060]
Failing matches don't reset numbered variables.
Change #22997 could cause Text::ParseWords to loop forever if the
regex didn't not match. Explicitly return if the match fails.
p4raw-link: @23060 on //depot/perl: 30799d55fe07dcf79e95d2823efbd5ec4c2e3bf4
p4raw-link: @22997 on //depot/perl: 429b060a3290b7ecf98534144fcaf0fb46b2afe3

p4raw-id: //depot/maint-5.8/perl@23260
p4raw-integrated: from //depot/perl@23259 'copy in'
lib/Text/ParseWords.pm (@23048..)
p4raw-integrated: from //depot/perl@22997 'ignore'
lib/Text/ParseWords.t (@22992..)

lib/Text/ParseWords.pm
lib/Text/ParseWords.t

index fbc0ee0..94e6db7 100644 (file)
@@ -1,7 +1,7 @@
 package Text::ParseWords;
 
 use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
-$VERSION = "3.22";
+$VERSION = "3.23";
 
 require 5.000;
 
@@ -53,32 +53,27 @@ sub parse_line {
        use re 'taint'; # if it's tainted, leave it as such
 
     my($delimiter, $keep, $line) = @_;
-    my($quote, $quoted, $unquoted, $delim, $word, @pieces);
+    my($word, @pieces);
 
     while (length($line)) {
-
-       ($quote, $quoted, undef, $unquoted, $delim, undef) =
-           $line =~ m/^(["'])                 # a $quote
-                        ((?:\\[\000-\377]|(?!\1)[^\\])*)  # and $quoted text
-                        \1                    # followed by the same quote
-                        ([\000-\377]*)        # and the rest
-                      |                       # --OR--
-                       ^((?:\\[\000-\377]|[^\\"'])*?)     # an $unquoted text
-                     (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))  
-                                               # plus EOL, delimiter, or quote
-                      ([\000-\377]*)          # the rest
-                     /x;                      # extended layout
-       return() unless( $quote || length($unquoted) || length($delim));
-
-       $line = $+;
+       $line =~ s/^(["'])                      # a $quote
+                   ((?:\\.|(?!\1)[^\\])*)      # and $quoted text
+                   \1                          # followed by the same quote
+                  |                            # --OR--
+                  ^((?:\\.|[^\\"'])*?)         # an $unquoted text
+                   (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))  
+                                               # plus EOL, delimiter, or quote
+                 //xs or return;               # extended layout
+       my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
+       return() unless( defined($quote) || length($unquoted) || length($delim));
 
         if ($keep) {
            $quoted = "$quote$quoted$quote";
        }
         else {
-           $unquoted =~ s/\\([\000-\377])/$1/g;
+           $unquoted =~ s/\\(.)/$1/sg;
            if (defined $quote) {
-               $quoted =~ s/\\([\000-\377])/$1/g if ($quote eq '"');
+               $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
                $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
             }
        }
index ef0e562..74c2733 100755 (executable)
@@ -8,7 +8,7 @@ BEGIN {
 use warnings;
 use Text::ParseWords;
 
-print "1..20\n";
+print "1..22\n";
 
 @words = shellwords(qq(foo "bar quiz" zoo));
 print "not " if $words[0] ne 'foo';
@@ -119,3 +119,16 @@ print "ok 19\n";
 $result = join('|', parse_line("\t", 0, $string));
 print "not " unless $result eq "field1|field2\nstill field2|field3";
 print "ok 20\n";
+
+# unicode
+$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"};
+$result = join('|', parse_line("\x{1234}", 0, $string));
+print "not " unless $result eq "field1|field2\x{1234}still field2|field3";
+print "ok 21\n";
+
+# missing quote after matching regex used to hang after change #22997
+"1234" =~ /(1)(2)(3)(4)/;
+$string = qq{"missing quote};
+$result = join('|', shellwords($string));
+print "not " unless $result eq "";
+print "ok 22\n";