Generated: Mon Aug 16 14:14:39 2010 from stripcols.pl 2010/06/29 20.5 KB.
#!/perl -w # NAME: stripcols.pl # AIM: Strip the left number of columns off a file # 29/06/2010 - Functionality check # 03/11/2009 - Add more options # 2009/09/23 - geoff mclane - http://geoffair.net/mperl/ use strict; use warnings; use Cwd; use File::Basename; use Time::gmtime; unshift(@INC, "C:\\GTools\\perl"); require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $perl_base = "C:\\GTools\\perl"; # perl directory my $outfile = $perl_base."\\temp.$pgmname.txt"; open_log($outfile); # FEATURES my $load_log = 0; my $dbg_load = 0; # add debug lines top and bottom, and repeat last line my $rule_line = '12345678901234567890123456789012345678901234567890123456789012345678901234567890'; my $dbg_run = 0; # no arguments - run on defaults my $html_out = 0; # output HTML my $dbg2log = 0; # add the output to the LOG file as well my $skip_common = 0; # avoid writing duplicate lines to output my $dbg_sc01 = 0; # PROGRAM VARIABLES my $in_file = "temp.txt"; #my $in_file = "C:\\DTEMP\\temp2.txt"; my $out_file = $perl_base."\\tempoutf.txt"; my $columns = 0; my $newtxt = ''; my @col_ranges = (); my $beyond_col = 0; my @warnings = (); my @html_cols = (); my $html_rtalign = 0; my $top_skip = 0; my $bot_skip = 0; my $got_out = 0; # if given out, and is text, set to load it... sub prtw($) { my ($msg) = @_; prt($msg); $msg =~ s/\n$//; push(@warnings,$msg); } sub show_warnings() { if (@warnings) { prt( "Got ".scalar @warnings." WARNINGS...\n" ); foreach my $msg (@warnings) { prt( "$msg\n" ); } } else { prt( "No warnings issued.\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; show_warnings(); if (length($msg)) { $msg .= "\n" if ( !($msg =~ /\n$/) ); prt($msg); } close_log($outfile,$load_log); exit($val); } sub prt_help() { prt( "$pgmname - Version 0.0.1.\n" ); prt( "$pgmname in_file columns [out_file] [-ll]\n" ); prt( " in_file - file to strip columns from.\n" ); prt( " columns - number of columns to strip from head of line.\n" ); prt( " out_file - Optional: file to write stripped output. (default=$out_file)\n" ); prt( " -b <VAL> - Strip line beyond this column VAL.\n" ); prt( " -c <VAL> - Columns to strip from beginning of lines, 0 to whatever.\n" ); prt( " -cr <RNG> - Give column deletion range, BGN-END. Strip from BGN+1 to END.\n" ); prt( " -i <FILE> - Alternate way to give INPUT file name.\n" ); prt( " -o <FILE> - Alternate way to give OUTPUT file name.\n" ); prt( " -htm - Output into HTML table.\n" ); prt( " -hc <VAL> - Split the table into columns at VAL. May use multiple columns.\n" ); prt( " -rt <VAL> - Skip VAL rows at the top of the file.\n" ); prt( " -rb <VAL> - Skip VAL rows at the bottom of the file.\n" ); prt( " -ll - Optional: load log at end in notepad. (default=No)\n" ); prt( " -sc - Skip duplicate lines in text output.\n"); prt( "Note the output file is unconditionally written, but it is renamed to .old or .bak first!\n" ); } sub get_YYYYMMDD($$) { my ($t,$sep) = @_; # sec, min, hour, mday, mon, year, wday, yday, and isdst. my $tm = gmtime($t); my $m = sprintf( "%04d/%02d/%02d", $tm->year() + 1900, $tm->mon() + 1, $tm->mday()); $m =~ s/\//$sep/g if ($sep ne '/'); return $m; } sub chomp_rule_line($$$) { my ($rrl,$bgn,$end) = @_; my $line = ${$rrl}; my $len = length($line); my @arr = split('',$line); for (my $i = $bgn; $i < $end; $i++) { last if ($i >= $len); $arr[$i] = '_'; } $line = join('',@arr); ${$rrl} = $line; } sub chomp_rule_line_end($$) { my ($rrl,$bc) = @_; my $line = ${$rrl}; my $len = length($line); my @arr = split('',$line); for (my $i = $bc; $i < $len; $i++) { $arr[$i] = '_'; } $line = join('',@arr); ${$rrl} = $line; } sub chomp_rule_line_bgn($$) { my ($rrl,$bc) = @_; my $line = ${$rrl}; my $len = length($line); my @arr = split('',$line); for (my $i = 0; $i < $bc; $i++) { last if ($i >= $len); $arr[$i] = '_'; } $line = join('',@arr); ${$rrl} = $line; } sub process_file($$$) { my ($fil,$cols,$rtxt) = @_; my ($ra, @lines, @nlns, $line, $skip, $len, $nl, $res); my $rc = scalar @col_ranges; my ($i,$bgn,$end,$off,$rl); my $tot_in = 0; my $tot_out = 0; my $mret = 0; if ( open( INF, "<$fil" ) ) { @lines = <INF>; close INF; prt( "Processing ".scalar @lines." lines, from [$fil]\n" ); @nlns = (); push(@nlns,$rule_line) if ($dbg_load); $rl = $rule_line; my $done_rl = 0; my $last_line = ''; foreach $line (@lines) { chomp $line; $len = length($line); $tot_in += $len + 2; $last_line = $line if (length(trim_all($line))); if ($beyond_col) { if ($beyond_col < $len) { $line = substr($line,0,$beyond_col); chomp_rule_line_end(\$rl,$beyond_col) if (!$done_rl); } } $len = length($line); if ($rc) { $nl = $line; $off = 0; for ($i = 0; $i < $rc; $i++) { $bgn = $col_ranges[$i][0] - 1; $end = $col_ranges[$i][1] - 1; if ($end > $len) { $end = $len; } if ($end <= $len) { chomp_rule_line(\$rl,$bgn,$end) if (!$done_rl); $nl = substr($line,0,$bgn-$off).substr($line,$end-$off); $line = $nl; $off += $end - $bgn; } else { prtw("WARNING: Range bgn=$bgn to end $end FAILED\n") if (!$done_rl); } } } $len = length($line); $skip = ($len >= $cols) ? $cols : $len; $line = substr($line,$skip); chomp_rule_line_bgn(\$rl,$cols) if (!$done_rl); $len = length($line); $tot_out += $len + 2; push(@nlns,$line); $done_rl++; } if ($dbg_load) { push(@nlns,$last_line); push(@nlns,$rl); $last_line = "B=$cols"; if ($rc) { $last_line .= ", $rc ranges "; for ($i = 0; $i < $rc; $i++) { $bgn = $col_ranges[$i][0]; $end = $col_ranges[$i][1]; $last_line .= ' ' while (length($last_line) < ($bgn - 1)); $last_line .= "$bgn-$end "; } } if ($beyond_col) { $last_line .= ' ' while (length($last_line) < ($beyond_col-1)); $last_line .= ", and beyond $beyond_col."; } push(@nlns,$last_line); } #$res = join("\n",@nlns); #$res .= "\n"; #${$rtxt} = $res; ${$rtxt} = \@nlns; prt( "Done total $tot_in characters, and output $tot_out...\n"); $mret = 1; } else { prtw("ERROR: Unable to open file [$fil]\n"); } return $mret; } sub get_begin_table() { #my $bt = ' <table align="center" cellpadding="0" cellspacing="0" summary="List of lines">'; my $bt = ' <table align="center" border="1" cellpadding="1" cellspacing="1" summary="List of lines">'; $bt .= "\n"; return $bt; } sub begin_html($) { my ($infile) = @_; my $txt = <<EOF; <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> <html> <head> <title> $infile column stripped </title> <style type="text/css"> <!-- .smallx { font-size: x-small } .small { font-size: small } .medium { font-size: mediam } --> </style> </head> <body> <div class="smallx"> EOF return $txt } sub is_my_file_type($) { my ($fil) = shift; return 1 if ($fil =~ /\.m2ts$/); return 0; } sub get_file_type_txt($) { my ($txt) = shift; $txt = "<a href=\"wedding/$txt\">$txt</a>"; return $txt; } sub write_output($$$) { my ($in,$out,$rtarr) = @_; my ($ntxt,$len,$i); my ($line,$bgn,$txt,$end,$tdtxt); my ($j,$off,$pbgn,$td); my $lncnt = scalar @{$rtarr}; my $hlcnt = scalar @html_cols; my $lnout = 0; my %hash = (); if ($lncnt && ($lncnt > $top_skip)) { if ($html_out) { prt("[dbg_sc01] Output the lines in a HTML shell, $lncnt lines, to file [$out], top_skip=$top_skip bot_skip=$bot_skip\n") if ($dbg_sc01); $ntxt = ''; $tdtxt = ($html_rtalign ? "<td align=\"right\">" : "<td>"); for ($i = $top_skip; $i < $lncnt; $i++) { last if (($lncnt - $i) < $bot_skip); $line = ${$rtarr}[$i]; $len = length($line); $ntxt .= "<tr>\n"; $lnout++; if ($hlcnt) { $off = 0; $bgn = 0; for ($j = 0; $j < $hlcnt; $j++) { $td = $tdtxt; $pbgn = $bgn; $bgn = $html_cols[$j]; if ($bgn < $len) { $txt = substr($line,$off,$bgn-$off); $end = substr($line,$bgn); } else { $txt = $line; $end = ' '; } $txt = trim_all($txt); $off += $bgn - $pbgn; if ($txt =~ /^(\d|,)+$/) { $td = "<td align=\"right\">"; } elsif ($txt =~ /^(\d|,)+\w{1}B$/) { $td = "<td align=\"right\">"; } elsif ( is_my_file_type($txt) ) { $txt = "<a href=\"wedding/$txt\">$txt</a>"; } $ntxt .= "$td\n"; $ntxt .= "$txt\n"; $ntxt .= "</td>\n"; } $txt = trim_all($end); if (length($txt) == 0) { $txt = ' '; } elsif ($txt =~ /^(\d|,)+$/) { $td = "<td align=\"right\">"; } elsif ($txt =~ /^(\d|,)+\w{1}B$/) { $td = "<td align=\"right\">"; } elsif ( is_my_file_type($txt) ) { $txt = get_file_type_txt($txt); # like "<a href=\"wedding/$txt\">$txt</a>"; } $ntxt .= "$td\n"; $ntxt .= "$txt\n"; } else { $line = trim_all($line); $ntxt .= "$line\n"; } $ntxt .= "</td>\n"; $ntxt .= "</tr>\n"; } if ( length($ntxt) ) { $ntxt = begin_html($in).get_begin_table().$ntxt; $ntxt .= "</table>\n"; $ntxt .= "</div>\n"; $ntxt .= "<!-- generated by $pgmname on ".get_YYYYMMDD(time(),'/')." -->\n"; $ntxt .= "</body>\n"; $ntxt .= "</html>\n"; } } else { for ($i = $top_skip; $i < $lncnt; $i++) { last if (($lncnt - $i) < $bot_skip); $line = ${$rtarr}[$i]; $len = length($line); if ($skip_common) { if ($len && !(defined $hash{$line})) { $hash{$line} = 1; $ntxt .= "$line\n"; $lnout++; } } else { $ntxt .= "$line\n"; $lnout++; } } $ntxt .= "\n" if ( length($ntxt) ); } $len = length($ntxt); if ($len) { rename2oldbak($out); write2file($ntxt,$out); prt( "Written $out, with $lnout ($lncnt) lines, $len chars...\n" ); if ($dbg2log) { prt("12345678901234567890123456789012345678901234567890123456789012345678901234567890\n"); prt($ntxt); prt("12345678901234567890123456789012345678901234567890123456789012345678901234567890\n"); } system($out) if ($got_out); } else { prt("ERROR: Got no NEW text!\n"); } } else { if ($lncnt) { prt("WARNING: Got no out lines! $top_skip > $lncnt!\n"); } else { prt("ERROR: Got no NEW lines!\n"); } } } # ================================================ # MAIN parse_args(@ARGV); if ( process_file($in_file, $columns, \$newtxt) ) { write_output($in_file,$out_file,$newtxt); pgm_exit(0,"Normal exit"); } else { pgm_exit(1,"Error exit"); } # ================================================ ################################ sub parameter_verification { my ($rc,$i,$bgn,$end,$j,$bgn2,$end2,$i2,$j2); $rc = scalar @col_ranges; if ($rc) { for ($i = 0; $i < $rc; $i++) { $i2 = $i + 1; $bgn = $col_ranges[$i][0] - 1; $end = $col_ranges[$i][1] - 1; if ($bgn <= $columns) { mydie("ERROR: BAD Range $i2 $bgn - $end <= $columns!\n"); } if ($beyond_col && ($end >= $beyond_col)) { mydie("ERROR: BAD Range $i2 $bgn - $end >= $beyond_col beyond column!\n"); } if ($bgn < $end) { for ($j = ($i + 1); $j < $rc; $j++) { $j2 = $j + 1; $bgn2 = $col_ranges[$j][0] - 1; $end2 = $col_ranges[$j][1] - 1; if ($bgn2 > $end) { # that's ok } else { mydie("ERROR: BAD Range $i2 [ $bgn - $end ] conflicts with $j2 [ $bgn2 - $end2 ]!\n"); } } } else { mydie("ERROR: Range values out of order $bgn GT or EQU $end\n"); } } } } sub need_arg { my ($a,@b) = @_; mydie("ERROR: Argument [$a] needs a following argument!\n") if (! @b); } sub parse_args { my (@av) = @_; my $cnt = scalar @av; prt( "Parsing $cnt arguments...\n" ); $cnt = 0; my (@arr,$flag); $flag = 0; my $abort = 0; while (@av) { my $arg = $av[0]; if ($arg =~ /^-/) { if ($arg eq '-ll') { $load_log = 1; prt( "Set to load log at end\n" ); } elsif (($arg eq '-?') || ($arg eq '-h') || ($arg eq '--help') || ($arg eq '/?') || ($arg eq '/h') || ($arg eq '/help')) { prt_help(); exit(0); } elsif ($arg eq '-c') { need_arg(@av); shift @av; $arg = $av[0]; if ($arg =~ /^\d+$/) { $columns = $arg; prt("Set columns to strip to [$columns]\n"); $flag |= 2; } else { prt("ERROR: -c must be followed by a number 0 to ...! Got [$arg]\n"); $abort++; } } elsif ($arg eq '-cr') { need_arg(@av); shift @av; $arg = $av[0]; @arr = split('-',$arg); if ((scalar @arr == 2)&&($arr[0] =~ /^\d+$/)&&($arr[1] =~ /^\d+$/)) { if ($arr[0] < $arr[1]) { push(@col_ranges, [ $arr[0], $arr[1] ]); prt("Set strip range ".scalar @col_ranges." to $arr[0] -to- $arr[1]\n"); } else { prt("ERROR: -cr range must be in order, and 1st less than 2nd, like 10-20! Got [$arg]\n"); $abort++; } } else { prt("ERROR: -cr must be followed by range, in form, like '10-20'! Got [$arg]\n"); $abort++; } } elsif ($arg eq '-i') { need_arg(@av); shift @av; $arg = $av[0]; if (-f $arg) { $in_file = $arg; prt("Set in file to [$in_file]\n"); $flag |= 1; } else { prt("ERROR: -i must be followed by valid file name! Got [$arg]\n"); $abort++; } } elsif ($arg eq '-o') { need_arg(@av); shift @av; $arg = $av[0]; $out_file = $arg; prt("Set out file to [$out_file]\n"); $flag |= 4; } elsif ($arg eq '-b') { need_arg(@av); shift @av; $arg = $av[0]; if ($arg =~ /^\d+$/) { $beyond_col = $arg; prt("Set beyond column $beyond_col...\n"); } else { prt("ERROR: -b must be followed by a number, like -b 75!\n"); $abort++; } } elsif ($arg eq '-d') { $dbg_load = 1; prt("Set dbg_load - adds debug line to top and bottom.\n"); } elsif ($arg =~ /^-htm/) { $html_out = 1; prt("Set HTML output\n"); } elsif ($arg eq '-hr') { $html_rtalign = 1; prt("Set right align on columns.\n"); } elsif ($arg eq '-hc') { need_arg(@av); shift @av; $arg = $av[0]; if ($arg =~ /^\d+$/) { push(@html_cols,$arg); prt("Set HTML column ".scalar @html_cols." to $arg.\n"); } else { prt("ERROR: -hc must be followed by a column number, like -hc 75!\n"); $abort++; } } elsif ($arg eq '-sc') { $skip_common = 1; prt("Set to skip repeated lines.\n"); } elsif ($arg eq '-rt') { need_arg(@av); shift @av; $arg = $av[0]; if ($arg =~ /^\d+$/) { $top_skip = $arg; prt("Skip $top_skip rows at TOP.\n"); } else { prt("ERROR: -rt must be followed by a row count, like -rc 2!\n"); $abort++; } } elsif ($arg eq '-rb') { need_arg(@av); shift @av; $arg = $av[0]; if ($arg =~ /^\d+$/) { $bot_skip = $arg; prt("Skip $bot_skip rows at BOTTOM.\n"); } else { prt("ERROR: -rt must be followed by a row count, like -rc 2!\n"); $abort++; } } else { prt("ERROR: Unkknown argument [$arg]\n"); $abort++; } } else { $cnt++; # if ($cnt == 1) { if ( !($flag & 1) ) { $in_file = $arg; prt("Set in file to [$in_file]\n"); $flag |= 1; } elsif ( !($flag & 2) ) { #} elsif ($cnt == 2) { $columns = $arg; prt("Set columns to strip to [$columns]\n"); $flag |= 2; } elsif ( !($flag & 4) ) { # } elsif ($cnt == 3) { $out_file = $arg; prt("Set out file to [$out_file]\n"); $flag |= 4; $got_out = 1 if ($out_file =~ /\.txt$/i); } else { prt("ERROR: Unkknown argument [$arg]\n"); $abort++; } } shift @av; } mydie("Aborting...\n") if ($abort); parameter_verification(); if ($dbg_run) { $html_out = 1; # push(@html_cols,6); # push(@html_cols,18); # push(@html_cols,38); # push(@html_cols,44); # #$dbg2log = 1; $load_log = 1; $out_file = 'temphtml.htm'; # $dbg_load = 0; # add debug lines top and bottom, and repeat last line $in_file = 'temp.txt'; $columns = 3; #push(@col_ranges, [ $arr[0], $arr[1] ]); $beyond_col = 62; push(@html_cols,34); push(@html_cols,50); } else { if ( !(($flag & 1)&&($flag & 2)) ) { prt_help(); exit(1); } elsif ( !($flag & 4) ) { prt("Using default out file of [$out_file]\n"); } } prt("In file [$in_file], columns=$columns, out to [$out_file]\n") if ($dbg_run || $dbg_load); } # eof - stripcols.pl