- my $sentence_rx = qr{
- (?: (?<= ^ ) | (?<= \s ) ) # after start-of-string or whitespace
- \p{Lu} # capital letter
- .*? # a bunch of anything
- (?<= \S ) # that ends in non-whitespace
- (?<! \b [DMS]r ) # but isn't a common abbreviation
- (?<! \b Mrs )
- (?<! \b Sra )
- (?<! \b St )
- [.?!] # followed by a sentence ender
- (?= $ | \s ) # in front of end-of-string or whitespace
- }sx;
- local $/ = "";
- while (my $paragraph = <>) {
- say "NEW PARAGRAPH";
- my $count = 0;
- while ($paragraph =~ /($sentence_rx)/g) {
- printf "\tgot sentence %d: <%s>\n", ++$count, $1;
- }
+ my $sentence_rx = qr{
+ (?: (?<= ^ ) | (?<= \s ) ) # after start-of-string or
+ # whitespace
+ \p{Lu} # capital letter
+ .*? # a bunch of anything
+ (?<= \S ) # that ends in non-
+ # whitespace
+ (?<! \b [DMS]r ) # but isn't a common abbr.
+ (?<! \b Mrs )
+ (?<! \b Sra )
+ (?<! \b St )
+ [.?!] # followed by a sentence
+ # ender
+ (?= $ | \s ) # in front of end-of-string
+ # or whitespace
+ }sx;
+ local $/ = "";
+ while (my $paragraph = <>) {
+ say "NEW PARAGRAPH";
+ my $count = 0;
+ while ($paragraph =~ /($sentence_rx)/g) {
+ printf "\tgot sentence %d: <%s>\n", ++$count, $1;