Generated: Sat Oct 24 16:35:14 2020 from dirbysize.pl 2019/10/25 22.4 KB. text copy
#!/perl -w # dirbysize.pl # AIM: Read a file output from the DIR command, and order the files in SIZE # order ... drop out the pure <DIR> entries ... # 15/09/2011 - Limit file name length to say 32 chars, and exit is no input # 2010/03/28 - option to omit CVS/svn/git directories # 2/13/2009 - Add delete (unlink) of log file at end, and skip if <DIR> is 3[2] or 4th[3] entry... # 20061219 - Can NOT use just $0 to get script name, since when directly invoke from # a command prompt, it becomes C:\GTools\Perl\dirbydate.pl, so added some regex to # massage the name, and just get the perl script name ... if a DRIVE included /^\w{1}:\\.*/ # AND, to enable it to FIND the logfile.pl, add unshift(@INC, 'C:/GTools/perl'); # OR this can be use lib '/Users/User/perl-lib';, or perl -MC:/GTools/perl -e dirbydate.pl # and added a $evenspace option ... # geoff mclane - 28 March, 2007 - geoffmclane.com - based on dirbydate.pl use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE use Cwd; unshift(@INC, 'C:/GTools/perl'); require 'lib_utils.pl' or die "Unable to load lib_utils.pl ...\n"; ###require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; my $os = $^O; my $pgm_vers = "0.0.5 2019/10/25"; # $pgm_vers = "0.0.1 2010/01/02" # options my $evenspace = 1; my $maxname = 0; my $maxsize = 0; my $loadlog = 0; my $maxlines = 300; # if greater than ??? lines, $loadout in notepad ... my $fixtm = 0; my $show_repo = 0; my $list_only = 0; my $sort_decend = 0; my $verbosity = 1; my $max_count = -1; my $max_file_name = 28; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $perl_dir = 'C:\GTools\perl'; my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); # debug output my $dbg01 = 0; # show each FOLDER as found ... my $dbg02 = 0; # show count and bytes as found ... my $dbg03 = 0; # show files, as found my $verb3 = 0; # show sort compare ... my $use_default = 0; # set to use DEFAULT file my $def_file = 'C:\DTEMP\templist'; #my $in_file = 'templist.txt'; my @repo_dirs = qw( CVS .svn .git ); my @filelist = (); my @sortlist = (); my $basedir = ''; my @warnings = (); my $in_file = ''; my @inp_files = (); sub VERB1() { return ($verbosity >= 1); } sub VERB2() { return ($verbosity >= 2); } sub VERB5() { return ($verbosity >= 5); } sub VERB9() { return ($verbosity >= 9); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub show_warnings() { if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS ...\n" ); foreach my $line (@warnings) { prt("$line\n" ); } prt("\n"); } else { prt("\nNo warnings issued.\n\n") if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg) if ($val || $verbosity); } $verbosity = 0 if ($val == 0); show_warnings(); close_log($outfile,$loadlog); # unlink($outfile); exit($val); } ################################################## # My particular 'nice number' sub get_nn($) { # perl nice number nicenum add commas my ($n) = shift; if (length($n) > 3) { my $mod = length($n) % 3; my $ret = (($mod > 0) ? substr( $n, 0, $mod ) : ''); my $mx = int( length($n) / 3 ); for (my $i = 0; $i < $mx; $i++ ) { if (($mod == 0) && ($i == 0)) { $ret .= substr( $n, ($mod+(3*$i)), 3 ); } else { $ret .= ',' . substr( $n, ($mod+(3*$i)), 3 ); } } return $ret; } return $n; } sub trimall($) { # version 20061127 my ($ln) = shift; chomp $ln; # remove CR (\n) $ln =~ s/\r$//; # remove LF (\r) $ln =~ s/\t/ /g; # TAB(s) to a SPACE while ($ln =~ /\s\s/) { $ln =~ s/\s\s/ /g; # all double space to SINGLE } while ($ln =~ /^\s/) { $ln = substr($ln,1); # remove all LEADING space } while ($ln =~ /\s$/) { $ln = substr($ln,0, length($ln) - 1); # remove all TRAILING space } return $ln; } sub remdir { my ($f) = shift; my $b2 = quotemeta($basedir); $f =~ s/^$b2\\//; # remove beginning ... return $f; } # put least first sub mycmp_ascend2 { # special - ascend by 2nd component return -1 if (${$a}[2] < ${$b}[2]); return 1 if (${$a}[2] > ${$b}[2]); return 0; } # put largest first sub mycmp_decend2 { # special - ascend by 2nd component return -1 if (${$a}[2] > ${$b}[2]); return 1 if (${$a}[2] < ${$b}[2]); return 0; } sub is_date_entry { my (@ar) = @_; if (scalar @ar == 3) { if (($ar[0] =~ /^\d+$/) && ($ar[1] =~ /^\d+$/) && ($ar[2] =~ /^\d+$/)) { return 1; } } return 0; } sub is_time_entry { my (@ar) = @_; if (scalar @ar == 2) { if (($ar[0] =~ /^\d+$/)&&($ar[1] =~ /^\d+$/)) { return 1; } } return 0; } sub dbs_d2u($) { my ($du) = shift; $du =~ s/\\/\//g; return $du; } sub is_a_respository_dir($) { my ($dir) = @_; my $ud = dbs_d2u($dir); # ensure UNIX path my @arr = split('/',$ud); # split on UNIX path my ($d1,$d2); foreach $d1 (@arr) { foreach $d2 (@repo_dirs) { return 1 if ($d1 eq $d2); } } return 0; } sub set_common_folder($) { my ($ra) = @_; # = \@folders my $comm = ''; my ($fold,$len1,$len2,$len); my ($i,$ch1,$ch2); $len1 = 0; foreach $fold (@{$ra}) { $len2 = length($fold); if ($len1) { $len = $len1; $len = $len2 if ($len2 < $len1); for ($i = 0; $i < $len; $i++) { $ch1 = substr($comm,$i,1); $ch2 = substr($fold,$i,1); last if ($ch1 ne $ch2); } if ($i == $len) { # we reached the end, and they are the SAME } else { # stopped due to difference if ($i == 0) { # all different = NO COMMON $comm = ''; last; } elsif ($i < $len1) { # must truncate common $comm = substr($comm,0,$i); $len1 = length($comm); } } } else { $comm = $fold; $len1 = length($comm); } } prt("Common folder [$comm]\n") if (length($comm) && $verbosity); } sub split_nums($) { my $txt = shift; my @arr = (); my $len = length($txt); my ($i,$ch,$tag,$intag); $intag = 0; $tag = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); if ($intag) { if ($ch =~ /\d/) { $tag .= $ch; } else { $intag = 0; } } else { if ($ch =~ /\d/) { push(@arr,$tag) if (length($tag)); $tag = $ch; $intag = 1; } } } push(@arr,$tag) if (length($tag)); return @arr; } sub process_in_file($) { my ($inf) = @_; open INF, "<$inf" or mydie("ERROR: Can NOT open [$inf] ... $! ...\n" ); my @lines = <INF>; # slurp it all close INF; my $lc = scalar @lines; prt( "Processing $lc lines from [$inf] ...\n" ) if ($verbosity); my $line = ''; my $tln = ''; my $act_folder = ''; my $isrepo = 0; # $basedir = file_dirname($in_file); my ($fn,$bd) = fileparse($in_file); $basedir = $bd if (length($basedir) == 0); my @folders = (); my $lnn = 0; foreach $line (@lines) { $lnn++; chomp $line; $tln = trimall($line); if (length($tln)) { # Volume in drive C has no label. # Volume Serial Number is D833-AEFA if ($line =~ /Volume in drive (.*)/ ) { # ignore } elsif ($line =~ /Volume Serial Number is (.*)/ ) { # ignore } elsif ($line =~ /Total Files Listed/ ) { # ignore } elsif ($line =~ /Directory of (.*)/ ) { $act_folder = $1; prt( "Folder [$act_folder] ...\n" ) if ($dbg01); $isrepo = is_a_respository_dir($act_folder); push(@folders,$act_folder); } elsif ($line =~ /\d+\s+File\(s\)\s+[\d,]+\s+bytes/ ) { prt( $line ) if ($dbg02); } elsif ($line =~ /\d+\s+Dir\(s\)\s+[\d,]+\s+bytes/ ) { prt( $line ) if ($dbg02); } else { my @arr = split(/\s/, $tln); my $ac = scalar @arr; my $pchk = 0; #prt( $line ); if ($ac > 3) { # from Windows 98 machine # .. <DIR> 01/02/04 16:32 .. # IMAGE001 PNG 5 031 01/02/04 16:32 image001.png # from XP machine # 09/07/2008 19:24 <DIR> Sue # 09/07/2008 19:40 81,061 temp.DirBySize.pl.txt # from Vista machine # 01/10/2009 03:00 PM <DIR> 28 # 10/05/2008 02:04 PM 4,180 chgdiff.txt if (($arr[2] eq '<DIR>')||($arr[3] eq '<DIR>')) { # ignore } else { # file names of interest # my @ar2 = split('/', $arr[0]); # get day/month/year my @ar2 = split_nums($arr[0]); # get day/month/year OR month/day/year OR '-' instead of '/' my @ar3 = split(':', $arr[1]); # get hour:minutes if (is_date_entry(@ar2) && is_time_entry(@ar3)) { my $sz = $arr[2]; my $nm = $arr[3]; #if ($fixtm && (($sz eq 'AM')||($sz eq 'PM'))) { if (($sz eq 'AM')||($sz eq 'PM')) { # 12 hour clock - have AM or PM if ($sz eq 'PM') { $ar3[0] += 12; # bump hour by 12 $arr[1] = $ar3[0].':'.$ar3[1]; } $sz = $arr[3]; $nm = $arr[4]; if ($ac > 5) { my $i = 5; while($i < $ac) { $nm .= ' '.$arr[$i]; $i++; } } } else { # 24 hour clock (no AM/PM) if ($ac > 4) { my $i = 4; while($i < $ac) { $nm .= ' '.$arr[$i]; $i++; } } } if ((scalar @ar2 == 3)&&(scalar @ar3 == 2)) { my $ft = int($ar2[2].$ar2[1].$ar2[0].$ar3[0].$ar3[1]); if ($evenspace) { # get maximum lengths $maxname = length($nm) if (length($nm) > $maxname); $maxsize = length($sz) if (length($sz) > $maxsize); } $sz =~ s/,//g; # remove the COMA # 0 1 2 3 4 5 6 push(@filelist, [$ft, $nm, $sz, $act_folder, $arr[0], $arr[1], $isrepo]); #prt( "$ft, $nm, $sz, $act_folder $arr[0], $arr[1]\n") if ($dbg03); prt( "$ft, $nm, $sz, $arr[0], $arr[1]\n") if ($dbg03); } else { $pchk = 1; } } else { # first and second NOT DATE and TIME # maybe a Windows 98 listing # IMAGE001 PNG 5 031 01/02/04 16:32 image001.png # MAROC EXE 72 869 062 09/07/08 18:54 maroc.exe prt("$lnn: '$line'\n"); mydie( "EEK: Handling of this just not yet done ;=((\n" ); } } } else { $pchk = 1; } if ($pchk) { prt( "CHECK: " ); for (my $i = 0; $i < $ac; $i++) { prt( $arr[$i].' ' ); } prt("\n"); } } } } set_common_folder(\@folders); } sub dbs_b2ks2($) { my ($d) = @_; my $oss; my $kss; my $lg = 0; my $ks = ($d / 1024); #// get Ks my $div = 1; if( $ks < 1024 ) { $div = 1; $oss = "KB"; } elsif ( $ks < (1024*1024) ) { $div = 1024; $oss = "MB"; } elsif ( $ks < (1024*1024*1024) ) { $div = 1024 * 1024; $oss = "GB"; } else { $div = 1024 * 1204 * 1240; $oss = "TB"; } $kss = $ks / $div; $kss += 0.05; $kss *= 10; $lg = int($kss); $kss = $lg / 10; $kss .= '.0' if (!($kss =~ /\./)); ###return( ($lg / 10) . " " . $oss ); return "$kss$oss"; } sub show_file_list() { my $fc = scalar @filelist; my $so = ''; ###@sortlist = sort mycmp_decend @filelist; if ($sort_decend) { @sortlist = sort mycmp_decend2 @filelist; $so = 'decending'; } else { @sortlist = sort mycmp_ascend2 @filelist; $so = 'ascending'; } my $fcs = scalar @sortlist; if ($verbosity) { prt( "Got $fc files..."); prt( " spaced mxsz=$maxsize, mxnm=$maxname...") if ($evenspace); prt( " sorted $so..." ); prt( sprintf(" %s repos...", ($show_repo ? "including" : "excluding")) ); # prt( "Got $fcs sorted files ...\n" ); prt("\n"); } $maxname = $max_file_name if ($maxname > $max_file_name); # 15/09/2011 - limit file anem size my $msg = ''; my $skipped = 0; my $total_size = 0; my $repo_size = 0; my ($nm,$sz,$i,$reldir,$isrep,$csz,$cnm,$isrepo,$bgn,$lstcnt); $bgn = 0; if ($max_count != -1) { if ($max_count < $fcs) { $bgn = $fcs - $max_count; } } $loadlog = 1 if ($fcs - $bgn> $maxlines); # if greater than ??? lines, $loadout in notepad ... $lstcnt = 0; for ($i = $bgn; $i < $fcs; $i++) { $reldir = remdir( $sortlist[$i][3] ); $isrepo = $sortlist[$i][6]; # 0 1 2 3 4 5 6 # push(@filelist, [$ft, $nm, $sz, $act_folder, $arr[0], $arr[1], $isrepo]); $sz = $sortlist[$i][2]; $total_size += $sz; $csz = get_nn($sz); $nm = $sortlist[$i][1]; $cnm = $nm; if ($list_only) { $msg = $reldir; $msg .= "\\" if !(($msg =~ /(\\|\/)$/)||($nm =~ /^(\\|\/)/)); $msg .= $nm; } else { if ($evenspace) { $csz = ' ' . $csz while (length($csz) < $maxsize); $cnm = ' ' . $cnm while (length($cnm) < $maxname); $msg = $sortlist[$i][4] . ' ' . $sortlist[$i][5] . ' ' . $csz . ' ' . $cnm . ' ' . $reldir; } else { $msg = $sortlist[$i][4] . ' ' . $sortlist[$i][5] . ' ' . $sortlist[$i][1] . ' ' . $reldir; } } if ($show_repo) { prt( "$msg\n" ); $lstcnt++; } elsif (!$isrepo) { prt( "$msg\n" ); $lstcnt++; } else { $skipped++; $repo_size += $sz; } } $sz = $total_size - $repo_size; $csz = dbs_b2ks2($sz); if ($verbosity) { prt("Listed $lstcnt of $fcs files, $sz ($csz) bytes, in size order $so.\n"); prt("Skipped $skipped files, $repo_size (".dbs_b2ks2($repo_size).") bytes, in repository folders.\n") if ($skipped); } } sub process_in_files($) { my ($ra) = @_; my $fcnt = scalar @{$ra}; my ($file); prt("Processing $fcnt files...\n") if ($verbosity); foreach $file (@{$ra}) { $in_file = $file; process_in_file($file); } } parse_args(@ARGV); process_in_files(\@inp_files); show_file_list(); pgm_exit(0,"Normal exit 0"); ############################# sub give_help { prt("$pgmname: version $pgm_vers\n"); prt("Usages: $pgmname [options] file1 [file2, file3, ...]\n"); prt("where 'file' is the directory listing file to process.\n"); prt("Options:\n"); prt(" -h (or -?) = This help, and exit 0\n"); prt(" -c <dec> = Set maximum output count.\n"); prt(" -i[nvert] = Invert sort order.\n"); prt(" -ll[+|-] = Load log at end.\n"); prt(" -listonly = List the full file only.\n"); prt(" -showrepo = Show file in repository directories as well.\n"); prt(" -v[+|-] = Increase, or decrease (-) verbosity.\n"); } # only handle the -v[vv...][+|-] parameter sub parse_arg_v { my (@av) = @_; my ($arg,$sarg,$len,$ch,$i,$hadp); $hadp = 0; while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); $len = length($sarg); if ($sarg =~ /^v/i) { #prt("Verb: [$sarg] ($len)\n"); if ($sarg =~ /^v(\d+)$/) { $verbosity = $1; } else { for ($i = 0; $i < $len; $i++) { $ch = substr($sarg,$i,1); if ($ch =~ /v/i) { $verbosity++; } elsif ($ch eq '+') { if ($hadp) { $verbosity++; } $hadp++; } elsif ($ch eq '-') { $verbosity-- if ($verbosity); } else { pgm_exit(1,"ERROR: Unknown argument [$arg]! -v can only be followed by 'v', '+', '-'! or number! Try -h for help.\n"); } } } prt("Set verbosity to $verbosity.\n"); # if (VERB1()); } } shift @av; } } sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"Argument $arg requires follwoing count! Try -h for help.\n") if (!@av); } # parse the command line items... sub parse_args { my (@av) = @_; my ($arg,$sarg,$ch,$len,$tmp,$full); parse_arg_v(@av); $tmp = scalar @av; prt("Got $tmp arguments...\n") if ($verbosity > 1); $full = ''; while (@av) { $arg = $av[0]; $full .= ' ' if (length($full)); $full .= $arg; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); $ch = substr($sarg,0,1); $len = length($sarg); if (($ch eq 'h')||($ch eq '?')) { #if (($sarg =~ /^h/)||($sarg =~ /^?/)) { prt("Give help, and exit.\n"); give_help(); pgm_exit(0,"Help exit 0"); } elsif ($sarg =~ /^ll/i) { $tmp = $loadlog; if ($len > 2) { $ch = substr($sarg,2,1); if ($ch eq '-') { $loadlog = 0; } elsif ($ch eq '+') { $loadlog = 1; } else { pgm_exit(1,"ERROR: Unknown argument [$arg]! -l can only be followed by '+' or '-'! Try -h for help.\n"); } } else { $loadlog = 1; } if ($tmp != $loadlog) { prt(sprintf("Set load log %s\n", ($loadlog ? "On" : "Off"))) if ($verbosity); } } elsif ($sarg =~ /^listonly$/i) { $list_only = 1; prt("Set to output list only\n") if ($verbosity); } elsif ($sarg =~ /^showrepo$/i) { $show_repo = 1; prt("Set to include repository files.\n") if ($verbosity); } elsif ($sarg =~ /^i/i) { $sort_decend = 1; prt("Set to invert sort order.\n") if ($verbosity); } elsif ($sarg =~ /^c/i) { need_arg(@av); shift @av; $sarg = $av[0]; $full .= " $sarg"; if ($sarg =~ /^\d+$/) { $max_count = $sarg; if ($max_count) { prt("Set max output count to $max_count.\n") if ($verbosity); } else { pgm_exit(1,"ERROR: Argument [$arg] has to be followed by decimal count GT 0! Not [$sarg].\n"); } } else { pgm_exit(1,"ERROR: Argument [$arg] has to be followed by decimal count! Not [$sarg].\n"); } } elsif ($sarg =~ /^v/i) { # already done } else { pgm_exit(1,"ERROR: Unknown argument [$arg]! Try -h for help.\n"); } } else { # $inp_file = $arg; push(@inp_files,$arg); prt("Adding [$arg] to file list.\n") if ($verbosity); } shift @av; } if (!@inp_files && (-f $def_file) && $use_default) { push(@inp_files,$def_file); prt("Adding default [$def_file] to file list.\n") if ($verbosity); } prt("ARGS:[$full]\n") if ($verbosity > 2); if (!@inp_files) { pgm_exit(1,"ERROR: No input files found on command line!\n"); } } # eof - dirbysize.pl