Generated: Tue Feb 2 17:54:43 2010 from listregex.pl 2009/08/29 10.6 KB.
#!/perl -w # NAME: listregex.pl # AIM: Read perl files, and list all 'regex' expressions found # 8/29/2009 - geoff mclane - http://geoffair.net/mperl/ use strict; use warnings; use File::Basename; unshift(@INC, 'C:/GTools/perl'); require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); # options my $add_scalars = 1; # include regex like '=~ $COND' my $add_quoted = 1; # include regex lile "word" my $in_folder = '.'; my @warnings = (); my $total_lines = 0; my $reglist = "reglist.txt"; # FLAGS my $FG_NONE = 0; my $FG_SCAL = 1; # is a SCALAR entry my $FG_QUOT = 1; # is a QUOTED entry # debug my $dbg01 = 0; # show prt( "Returning success [$reg] at off=[$ind]\n" ) if ($dbg01); my $dbg02 = 0; # show prt( "$fil:$lnn: [$nxline]\n" ) if ($dbg02);; my $dbg03 = 0; # show prt( "reg=[$reg] off=[$off] nxl=[$nxline]\n" ) if ($dbg03); my $dbg04 = 0; # show prt( "$fil:$lnn: got regex = [$rlist] from line [$line]\n" ) if ($show && $dbg04); my $dbg05 = 0; # show prt( "Returning QUOTED success [$reg] at off=[$ind]\n" ) if ($dbg05); my $dbg06 = 0; # show prt( "Returning SCALAR success [$reg] at off=[$ind]\n" ) if ($dbg06); sub prtw($) { my ($txt) = shift; prt($txt); $txt =~ s/\n$//; push(@warnings,$txt); } sub show_warnings() { if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS ...\n" ); foreach my $line (@warnings) { prt("$line\n" ); } } else { prt("\nNo warnings issued.\n"); } #my $s = get_dbg_str(); #prt( "WARNING: DEBUG ON [$s]\n" ) if length($s); prt("\n"); } sub scan_directory($) { my ($ind) = shift; my ($DIR, $typ, $cnt, $hm); my ($ocnt, $ccnt, $hcnt, $dcnt); $ocnt = 0; $ccnt = 0; $hcnt = 0; $dcnt = 0; my @arr = (); prt( "Scanning folder [$ind] for perl files...\n" ); if (opendir $DIR, $ind) { my @fils = readdir($DIR); closedir $DIR; foreach my $fil (@fils) { next if (($fil eq '.')||($fil eq '..')); my $ff = $ind; $ff .= "\\" if !($ff =~ /(\\|\/)$/ ); $ff .= $fil; my ($nm, $dir, $ext) = fileparse( $fil, qr/\.[^.]*/ ); my $lcext = lc($ext); if ( (($lcext eq '.pl')||($lcext eq '.pm')||($lcext eq '.cgi')) ) { push(@arr, [ $fil, $ff ]); } } } else { prt( "ERROR: Unable to open directory...\n" ); } return \@arr; } # [m]/PATTERN/[g][i][m][o][s][x] # s/PATTERN/REPLACEMENT/[e][g][i][m][o][s][x] # tr/SEARCHLIST/REPLACMENTLIST/[c][d][s] my $m_mods = 'gimosx'; my $s_mods = 'egimosx'; my $t_mods = 'cds'; sub return_regex($$$$) { my ($txt,$roff,$rreg,$rflag) = @_; # like substr($line,$off), \$off, \$reg ) my ($ind,$len,$i,$c,$reg,$nc,$go,$pc,$mods,$ppc,$endc,$sub); $ind = index($txt,'=~'); if ($ind >= 0) { $ind += 2; $txt = substr($txt,$ind); $len = length($txt); $reg = ''; $go = 0; $endc = '/'; $sub = 0; #prt( "Found '=~' at offset $ind - checking [$txt]\n" ) if ($dbg01); for ($i = 0; $i < $len; $i++) { $c = substr($txt,$i,1); $nc = (($i + 1) < $len) ? substr($txt,$i+1,1) : ''; #prt( "Check char [$c]\n" ); if ($c eq '/') { $reg = $c; $mods = $m_mods; $go = 1; } elsif (($c eq 'm')&&($nc eq '/')) { $reg = "$c$nc"; $mods = $m_mods; $i++; $go = 1; } elsif (($c eq 'm')&&($nc eq '|')) { $reg = "$c$nc"; $mods = $m_mods; $endc = $nc; $i++; $go = 1; } elsif (($c eq 's')&&($nc eq '/')) { $reg = "$c$nc"; $mods = $s_mods; $sub = 1; $i++; $go = 1; } elsif (($c eq 't')&&($nc eq 'r')) { if (($i + 2) < $len) { $nc = (($i + 2) < $len) ? substr($txt,$i+2,1) : ''; if ($nc eq '/') { $reg = "tr/"; $i += 2; $mods = $t_mods; $sub = 1; $go = 1; } } } elsif ($c eq '$') { # special case of a scalar if ($add_scalars) { $reg = $c; $i++; for (; $i < $len; $i++) { $ppc = $pc; $pc = $c; $c = substr($txt,$i,1); $nc = (($i + 1) < $len) ? substr($txt,$i+1,1) : ''; if ($c =~ /\w/) { $reg .= $c; } else { last; } } if (length($reg) > 1) { $ind += $i; $$rreg = $reg; $$roff = $ind; $$rflag |= $FG_SCAL; prt( "Returning SCALAR success [$reg] at off=[$ind]\n" ) if ($dbg06); return 1; } } } elsif ($c eq '"') { if ($add_quoted) { # limited to '"\w+"' $reg = $c; $i++; for (; $i < $len; $i++) { $ppc = $pc; $pc = $c; $c = substr($txt,$i,1); $nc = (($i + 1) < $len) ? substr($txt,$i+1,1) : ''; if ($c =~ /\w/) { $reg .= $c; } elsif ($c eq '"') { $reg .= $c; $i++; last; } else { $reg = ''; last; } } if (length($reg) > 1) { $ind += $i; $$rreg = $reg; $$roff = $ind; $$rflag |= $FG_QUOT; prt( "Returning QUOTED success [$reg] at off=[$ind]\n" ) if ($dbg05); return 1; } } } if ($go) { #prt( "Found GO at offset $i\n" ) if ($dbg01); $i++; $c = '/'; for (; $i < $len; $i++) { $ppc = $pc; $pc = $c; $c = substr($txt,$i,1); $nc = (($i + 1) < $len) ? substr($txt,$i+1,1) : ''; $reg .= $c; # if (($c eq '/')&&($pc ne "\\")) { if ( ($c eq $endc) && (($pc ne "\\") || (($pc eq "\\") && ($ppc eq "\\")) ) ) { # we appear to have it... if ($sub) { $sub--; next; # continue for next part of tr or s/.../.../ } $i++; # now check for MODS for (; $i < $len; $i++) { $c = substr($txt,$i,1); last if ( !($c =~ /\w/) ); if ($c =~ /[$mods]/) { $reg .= $c; } else { last; } } $ind += $i; $$rreg = $reg; $$roff = $ind; prt( "Returning success [$reg] at off=[$ind]\n" ) if ($dbg01); return 1; } } last; } } } #prt( "Returning FAILED\n" ); return 0; } sub process_perl_files($) { my ($ra) = shift; my ($cnt,$ff,$fil,$i,$lnn,@lns,$line,$off,$reg,$rlist,$show,$nxline,$flag); $cnt = scalar @{$ra}; prt( "Processing $cnt perl files...\n" ); my %hreg = (); my @flagged = (); for ($i = 0; $i < $cnt; $i++) { $fil = ${$ra}[$i][0]; $ff = ${$ra}[$i][1]; $lnn = 0; if (open INF, "<$ff") { @lns = <INF>; close INF; $cnt = scalar @lns; prt( "Scanning $cnt lines, from file [$fil]\n" ); $total_lines += $cnt; foreach $line (@lns) { chomp $line; $lnn++; next if ($line =~ /^\s*#/); if ($line =~ /=~/) { $off = 0; $reg = ''; $rlist = ''; $show = 0; $nxline = trim_all($line); prt( "$fil:$lnn: [$nxline]\n" ) if ($dbg02); $flag = 0; while (return_regex( $nxline, \$off, \$reg, \$flag ) ) { if (defined $hreg{$reg}) { $hreg{$reg} .= "|$fil"; } else { $hreg{$reg} = "$fil"; $show = 1; } $rlist .= " + " if length($rlist); $rlist .= $reg; push(@flagged, [$i, $flag]) if ($flag); $flag = 0; $nxline = substr($nxline,$off); prt( "reg=[$reg] off=[$off] nxl=[$nxline]\n" ) if ($dbg03); } $line = trim_all($line); if (length($rlist)) { prt( "$fil:$lnn: got regex = [$rlist] from line [$line]\n" ) if ($show && $dbg04); } else { prtw( "$fil:$lnn: WARNING: FAILED to get regex from line [$line]???\n" ); } } } } else { prtw( "WARNING: Can not open [$ff]!\n" ); } } return \%hreg; } sub show_regex_list($) { my ($rh) = shift; my ($k,$v,$cnt,$len,$max,$msg,$out); $max = 0; $cnt = 0; $out = ''; foreach $k (keys %{$rh}) { $v = ${$rh}{$k}; $len = length($k); $max = $len if ($len > $max); $cnt++; } $msg = "List of $cnt regex examples...\n"; prt($msg); $out = $msg; foreach $k (sort keys %{$rh}) { $v = ${$rh}{$k}; $msg = "$k"; $msg .= ' ' while (length($msg) < $max); prt( "$msg $v\n" ); $out .= "$msg $v\n"; } $msg = "Done list of $cnt regex...\n"; prt($msg); $out .= $msg; write2file($out,$reglist); prt( "Written list to $reglist file...\n" ); } prt( "$0 ... List regex in perl files, in [$in_folder] ...\n" ); my $rarr = scan_directory($in_folder); my $rrhash = process_perl_files($rarr); prt( "Processed ".scalar @{$rarr}." files, $total_lines total lines, for ".scalar keys(%{$rrhash})." regex samples...\n" ); show_regex_list($rrhash); show_warnings(); close_log($outfile,1); exit(0); # eof