This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8_heavy: Guard against infinite recursion
authorKarl Williamson <public@khwilliamson.com>
Sun, 14 Nov 2010 19:11:12 +0000 (12:11 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Mon, 22 Nov 2010 21:32:53 +0000 (13:32 -0800)
If things aren't just so, it could be that utf8_heavy calls something
which requires a pattern, such as split or just a pattern match that
ends up calling utf8_heavy again, ad infinitum.  When this happens,
memory gets eaten up and the machine grinds to a halt, likely requiring a
manual forced reboot.

To prevent this undesirable situation, utf8_heavy now stacks all its
calls in progress, and if any is a repeat, panics.

lib/utf8_heavy.pl

index 7951bc8..ab31922 100644 (file)
@@ -21,10 +21,17 @@ sub croak { require Carp; Carp::croak(@_) }
     # are specified that don't exactly match.
     my $min_floating_slop;
 
+    # To guard against this program calling something that in turn ends up
+    # calling this program with the same inputs, and hence infinitely
+    # recursing, we keep a stack of the properties that are currently in
+    # progress, pushed upon entry, popped upon return.
+    my @recursed;
+
     sub SWASHNEW {
         my ($class, $type, $list, $minbits, $none) = @_;
         local $^D = 0 if $^D;
 
+        $class = "" unless defined $class;
         print STDERR __LINE__, ": class=$class, type=$type, list=$list, minbits=$minbits, none=$none\n" if DEBUG;
 
         ##
@@ -70,6 +77,15 @@ sub croak { require Carp; Carp::croak(@_) }
 
         if ($type)
         {
+
+            # Verify that this isn't a recursive call for this property.
+            # Can't use croak, as it may try to recurse here itself.
+            my $class_type = $class . "::$type";
+            if (grep { $_ eq $class_type } @recursed) {
+                CORE::die "panic: Infinite recursion in SWASHNEW for '$type'\n";
+            }
+            push @recursed, $class_type;
+
             $type =~ s/^\s+//;
             $type =~ s/\s+$//;
 
@@ -122,7 +138,10 @@ sub croak { require Carp; Carp::croak(@_) }
                 # value indicates the table we should use.
                 my ($property, $table, @remainder) =
                                     split /\s*[:=]\s*/, $property_and_table, -1;
-                return $type if @remainder;
+                if (@remainder) {
+                    pop @recursed if @recursed;
+                    return $type;
+                }
 
                 my $prefix;
                 if (! defined $table) {
@@ -141,7 +160,10 @@ sub croak { require Carp; Carp::croak(@_) }
 
                     # And convert to canonical form.  Quit if not valid.
                     $property = $utf8::loose_property_name_of{$property};
-                    return $type unless defined $property;
+                    if (! defined $property) {
+                        pop @recursed if @recursed;
+                        return $type;
+                    }
 
                     $prefix = "$property=";
 
@@ -151,14 +173,20 @@ sub croak { require Carp; Carp::croak(@_) }
                         print STDERR __LINE__, ": table=$table\n" if DEBUG;
 
                         # Don't allow leading nor trailing slashes 
-                        return $type if $table =~ / ^ \/ | \/ $ /x;
+                        if ($table =~ / ^ \/ | \/ $ /x) {
+                            pop @recursed if @recursed;
+                            return $type;
+                        }
 
                         # Split on slash, in case it is a rational, like \p{1/5}
                         my @parts = split qr{ \s* / \s* }x, $table, -1;
                         print __LINE__, ": $type\n" if @parts > 2 && DEBUG;
 
                         # Can have maximum of one slash
-                        return $type if @parts > 2;
+                        if (@parts > 2) {
+                            pop @recursed if @recursed;
+                            return $type;
+                        }
 
                         foreach my $part (@parts) {
                             print __LINE__, ": part=$part\n" if DEBUG;
@@ -191,8 +219,10 @@ sub croak { require Carp; Carp::croak(@_) }
                             # Result better look like a number.  (This test is
                             # needed because, for example could have a plus in
                             # the middle.)
-                            return $type if $part
-                                            !~ / ^ -? [0-9]+ ( \. [0-9]+)? $ /x;
+                            if ($part !~ / ^ -? [0-9]+ ( \. [0-9]+)? $ /x) {
+                                pop @recursed if @recursed;
+                                return $type;
+                            }
                         }
 
                         #  If a rational...
@@ -296,7 +326,10 @@ sub croak { require Carp; Carp::croak(@_) }
                                 }
 
                                 # Quit if didn't find one.
-                                return $type unless defined $table;
+                                if (! defined $table) {
+                                    pop @recursed if @recursed;
+                                    return $type;
+                                }
                             }
                         }
                         print STDERR __LINE__, ": $property=$table\n" if DEBUG;
@@ -365,6 +398,7 @@ sub croak { require Carp; Carp::croak(@_) }
                 ## out what to do with $type. Ouch.
                 ##
 
+                pop @recursed if @recursed;
                 return $type;
             }
 
@@ -381,6 +415,7 @@ sub croak { require Carp; Carp::croak(@_) }
                 my $found = $Cache{$class, $file};
                 if ($found and ref($found) eq $class) {
                     print STDERR __LINE__, ": Returning cached '$file' for \\p{$type}\n" if DEBUG;
+                    pop @recursed if @recursed;
                     return $found;
                 }
 
@@ -448,7 +483,10 @@ sub croak { require Carp; Carp::croak(@_) }
                         elsif ($c =~ /^([0-9a-fA-F]+)/) {
                             $subobj = utf8->SWASHNEW("", $c, $minbits, 0);
                         }
-                        return $subobj unless ref $subobj;
+                        if (! ref $subobj) {
+                            pop @recursed if @recursed && $type;
+                            return $subobj;
+                        }
                         push @extras, $name => $subobj;
                         $bits = $subobj->{BITS} if $bits < $subobj->{BITS};
                     }
@@ -476,6 +514,8 @@ sub croak { require Carp; Carp::croak(@_) }
             $Cache{$class, $file} = $SWASH;
         }
 
+        pop @recursed if @recursed && $type;
+
         return $SWASH;
     }
 }