We have found these regular expressions useful or interesting:
s/(\S+)(\s+)(\S+)/$3$2$1/
m/^(\w+)\s*=\s*(.*?)\s*$/ # keyword is $1, value is $2
m/.{80,}/
length( ) >= 80 # ok, not a regex
m|(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)|
s(/usr/bin)(/usr/local/bin)g
s/%([0-9A-Fa-f][0-9A-Fa-f])/chr(hex($1))/ge
s{
/* # Match the opening delimiter
.*? # Match a minimal number of characters
*/ # Match the closing delimiter
}{ }gsx;
s/^\s+//; s/\s+$//;
s/\\n/\n/g;
s/^.*:://
# XXX: fails on legal IPs 127.1 and 2130706433.
m{
^ ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] )
\. ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] )
\. ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] )
\. ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] )
$
}x
s{^.*/}{ }
$cols = ( ($ENV{TERMCAP} || " ") =~ m/:co#(\d+):/ ) ? $1 : 80;
($name = " $0 @ARGV") =~ s{ /\S+/}{ }g;
die "This isn't Linux" unless $^O =~ m/linux/i;
s/\n\s+/ /g
@nums = m/(\d+\.?\d*|\.\d+)/g;
@capwords = m/(\b\p{ Upper-case Letter }+\b)/g;
@lowords = m/(\b\p{ Lower-case Letter }+\b)/g;
@icwords = m{
( \b
[\p{ Upper-case Letter }\p{ Title-case Letter }]
\p{ Lower-case Letter } *
\b )
}gx;
@links = m/<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)['"]?\s*>/ig;
$initial = /^\S+\s+(\S)\S*\s+\S/ ? $1 : "";
s/"([^"]*)"/``$1''/g # old way
# next is unicode only
s/"([^"]*)"/\x{201C}\x{201C}$1\x{201D}\x{201D}/g
{ local $/ = "";
while (<>) {
s/\n/ /g;
s/ {3,}/ /g;
push @sentences, m/(\S.*?[!?.])(?= {2}|\Z)/g;
}
}
m/\b(\d{4})-(\d\d)-(\d\d)\b/ # YYYY in $1, MM in $2, DD in $3
m/ ^
(?:
1 \s (?: \d\d\d \s)? # 1, or 1 and area code
| # ... or ...
\(\d\d\d\) \s # area code with parens
| # ... or ...
(?: \+\d\d?\d? \s)? # optional +country code
\d\d\d ([\s\-]) # and area code
)
\d\d\d (\s|\1) # prefix (and area code separator)
\d\d\d\d # exchange
$
/x
m/\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/i
push(@lines, $1) while $input =~ s{
^ # gobble from front
(
. # begin $1: any single char (/s)
?* # but minimally matching even none
)
(?: # make capturing if saving terminators
\x0D \x0A # CRLF
| \x0A # LF
| \x0D # CR
| \x0C # FF
# (see http://www.unicode.org/reports/tr13/tr13-9.html)
| \x{2028} # Unicode LS
| \x{2029} # Unicode PS
)
}{ }sx; # consumes $input
Or use split:
@lines = split m{
(?: # make capturing if saving terminators
\x0D \x0A # CRLF
| \x0A # LF
| \x0D # CR
| \x0C # FF
# (see http://www.unicode.org/reports/tr13/tr13-9.html)
| \x{2028} # Unicode LS
| \x{2029} # Unicode PS
)
}x, $input;
Copyright © 2003 O'Reilly & Associates. All rights reserved.