More doc fixes from Abigail.

[perl5.git] / pod / perlfaq6.pod
diff --git a/pod/perlfaq6.pod b/pod/perlfaq6.pod

index 1af7948..488a27c 100644 (file)
--- a/pod/perlfaq6.pod
+++ b/pod/perlfaq6.pod
@@ -1,6 +1,6 @@
  =head1 NAME
  
-perlfaq6 - Regexps ($Revision: 1.16 $, $Date: 1997/03/25 18:16:56 $)
+perlfaq6 - Regexps ($Revision: 1.22 $, $Date: 1998/07/16 14:01:07 $)
  
  =head1 DESCRIPTION
  
@@ -25,7 +25,7 @@ comments.
  
      # turn the line into the first word, a colon, and the
      # number of characters on the rest of the line
-    s/^(\w+)(.*)/ lc($1) . ":" . length($2) /ge;
+    s/^(\w+)(.*)/ lc($1) . ":" . length($2) /meg;
  
  =item Comments Inside the Regexp
  
@@ -69,8 +69,9 @@ delimiter within the pattern:
  
  =head2 I'm having trouble matching over more than one line.  What's wrong?
  
-Either you don't have newlines in your string, or you aren't using the
-correct modifier(s) on your pattern.
+Either you don't have more than one line in the string you're looking at
+(probably), or else you aren't using the correct modifier(s) on your
+pattern (possibly).
  
  There are many ways to get multiline data into a string.  If you want
  it to happen automatically while reading input, you'll want to set $/
@@ -94,7 +95,7 @@ record read in.
  
      $/ = '';           # read in more whole paragraph, not just one line
      while ( <> ) {
-       while ( /\b(\w\S+)(\s+\1)+\b/gi ) {
+       while ( /\b([\w'-]+)(\s+\1)+\b/gi ) {   # word starts alpha
             print "Duplicate $1 at paragraph $.\n";
         }
      }
@@ -133,12 +134,23 @@ But if you want nested occurrences of C<START> through C<END>, you'll
  run up against the problem described in the question in this section
  on matching balanced text.
  
+Here's another example of using C<..>:
+
+    while (<>) {
+        $in_header =   1  .. /^$/;
+        $in_body   = /^$/ .. eof();
+       # now choose between them
+    } continue {
+       reset if eof();         # fix $.
+    } 
+
  =head2 I put a regular expression into $/ but it didn't work. What's wrong?
  
  $/ must be a string, not a regular expression.  Awk has to be better
  for something. :-)
  
-Actually, you could do this if you don't mind reading the whole file into
+Actually, you could do this if you don't mind reading the whole file
+into memory:
  
      undef $/;
      @records = split /your_pattern/, <FH>;
@@ -210,7 +222,7 @@ This prints:
  
      this is a SUcCESS case
  
-=head2 How can I make C<\w> match accented characters?
+=head2 How can I make C<\w> match national character sets?
  
  See L<perllocale>.
  
@@ -325,9 +337,9 @@ playing hot potato.
  Use the split function:
  
      while (<>) {
-       foreach $word ( split ) {
+       foreach $word ( split ) { 
             # do something with $word here
-       }
+       } 
      }
  
  Note that this isn't really a word in the English sense; it's just
@@ -360,7 +372,7 @@ in the previous question:
  If you wanted to do the same thing for lines, you wouldn't need a
  regular expression:
  
-    while (<>) {
+    while (<>) { 
         $seen{$_}++;
      }
      while ( ($line, $count) = each %seen ) {
@@ -478,15 +490,17 @@ Or, using C<\G>, the much simpler (and faster):
  
  A more sophisticated use might involve a tokenizer.  The following
  lex-like example is courtesy of Jeffrey Friedl.  It did not work in
-5.003 due to bugs in that release, but does work in 5.004 or better:
+5.003 due to bugs in that release, but does work in 5.004 or better.
+(Note the use of C</c>, which prevents a failed match with C</g> from
+resetting the search position back to the beginning of the string.)
  
      while (<>) {
        chomp;
        PARSER: {
-           m/ \G( \d+\b    )/gx     && do { print "number: $1\n";  redo; };
-           m/ \G( \w+      )/gx     && do { print "word:   $1\n";  redo; };
-           m/ \G( \s+      )/gx     && do { print "space:  $1\n";  redo; };
-           m/ \G( [^\w\d]+ )/gx     && do { print "other:  $1\n";  redo; };
+           m/ \G( \d+\b    )/gcx    && do { print "number: $1\n";  redo; };
+           m/ \G( \w+      )/gcx    && do { print "word:   $1\n";  redo; };
+           m/ \G( \s+      )/gcx    && do { print "space:  $1\n";  redo; };
+           m/ \G( [^\w\d]+ )/gcx    && do { print "other:  $1\n";  redo; };
        }
      }
  
@@ -495,19 +509,19 @@ Of course, that could have been written as
      while (<>) {
        chomp;
        PARSER: {
-          if ( /\G( \d+\b    )/gx  {
+          if ( /\G( \d+\b    )/gcx  {
                 print "number: $1\n";
                 redo PARSER;
            }
-          if ( /\G( \w+      )/gx  {
+          if ( /\G( \w+      )/gcx  {
                 print "word: $1\n";
                 redo PARSER;
            }
-          if ( /\G( \s+      )/gx  {
+          if ( /\G( \s+      )/gcx  {
                 print "space: $1\n";
                 redo PARSER;
            }
-          if ( /\G( [^\w\d]+ )/gx  {
+          if ( /\G( [^\w\d]+ )/gcx  {
                 print "other: $1\n";
                 redo PARSER;
            }
@@ -531,12 +545,10 @@ L<perlfaq2>).
  
  =head2 What's wrong with using grep or map in a void context?
  
-Strictly speaking, nothing.  Stylistically speaking, it's not a good
-way to write maintainable code.  That's because you're using these
-constructs not for their return values but rather for their
-side-effects, and side-effects can be mystifying.  There's no void
-grep() that's not better written as a C<for> (well, C<foreach>,
-technically) loop.
+Both grep and map build a return list, regardless of their context.
+This means you're making Perl go to the trouble of building up a
+return list that you then just ignore.  That's no way to treat a
+programming language, you insensitive scoundrel!
  
  =head2 How can I match strings with multibyte characters?
  
@@ -546,19 +558,20 @@ synonymous.  The following set of approaches was offered by Jeffrey
  Friedl, whose article in issue #5 of The Perl Journal talks about this
  very matter.
  
-Let's suppose you have some weird Martian encoding where pairs of ASCII
-uppercase letters encode single Martian letters (i.e. the two bytes
-"CV" make a single Martian letter, as do the two bytes "SG", "VS",
-"XX", etc.). Other bytes represent single characters, just like ASCII.
+Let's suppose you have some weird Martian encoding where pairs of
+ASCII uppercase letters encode single Martian letters (i.e. the two
+bytes "CV" make a single Martian letter, as do the two bytes "SG",
+"VS", "XX", etc.). Other bytes represent single characters, just like
+ASCII.
  
-So, the string of Martian "I am CVSGXX!" uses 12 bytes to encode the nine
-characters 'I', ' ', 'a', 'm', ' ', 'CV', 'SG', 'XX', '!'.
+So, the string of Martian "I am CVSGXX!" uses 12 bytes to encode the
+nine characters 'I', ' ', 'a', 'm', ' ', 'CV', 'SG', 'XX', '!'.
  
  Now, say you want to search for the single character C</GX/>. Perl
-doesn't know about Martian, so it'll find the two bytes "GX" in the
-"I am CVSGXX!"  string, even though that character isn't there: it just
-looks like it is because "SG" is next to "XX", but there's no real "GX".
-This is a big problem.
+doesn't know about Martian, so it'll find the two bytes "GX" in the "I
+am CVSGXX!"  string, even though that character isn't there: it just
+looks like it is because "SG" is next to "XX", but there's no real
+"GX".  This is a big problem.
  
  Here are a few ways, all painful, to deal with it:
  
@@ -596,6 +609,18 @@ all mixed.
  
  =head1 AUTHOR AND COPYRIGHT
  
-Copyright (c) 1997 Tom Christiansen and Nathan Torkington.
-All rights reserved.  See L<perlfaq> for distribution information.
-
+Copyright (c) 1997, 1998 Tom Christiansen and Nathan Torkington.
+All rights reserved.
+
+When included as part of the Standard Version of Perl, or as part of
+its complete documentation whether printed or otherwise, this work
+may be distributed only under the terms of Perl's Artistic License.
+Any distribution of this file or derivatives thereof I<outside>
+of that package require that special arrangements be made with
+copyright holder.
+
+Irrespective of its distribution, all code examples in this file
+are hereby placed into the public domain.  You are permitted and
+encouraged to use this code in your own programs for fun
+or for profit as you see fit.  A simple comment in the code giving
+credit would be courteous but is not required.