#!/usr/bin/perl -w # NAME: emailobfuscate.pl # AIM: Give an input file, convert all email from to # 2017-11-23 - Idea to reduce 'version.txt' commits to ONE LINE - if -V verhist.log added... # 23/11/2015 - Target to 'authors' needs to be in two places # 07/09/2015 - Add link to authors, and back to top # 04/06/2015 - Remove duplicate end html - TODO: Add some STYLE to HTML output ;=)) # 21/05/2015 geoff mclane http://geoffair.net/mperl use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use Time::Local; use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.7 2017-11-23"; #my $VERS = "0.0.6 2015-09-07"; #my $VERS = "0.0.5 2015-05-21"; my $load_log = 0; my $in_file = ''; my $verbosity = 0; ###my $out_file = $temp_dir.$PATH_SEP."tempobsf.txt"; my $out_file = ''; my $html_version = ''; my $add_html = 0; my $add_suthors = 0; my $vers_file = ''; # ### DEBUG ### my $debug_on = 0; my $def_file = 'F:\Projects\temp2.log'; ### program variables my @warnings = (); my $cwd = cwd(); my %vers_commits = (); my $curr_date = lu_get_YYYYMMDD_hhmmss(time()); sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub html_head($) { my $vers = shift; my $txt = < Release $vers

Release $vers

Change log for this release. List of authors

EOF
    return $txt;
}

sub html_end() {
    my $txt = <eof top



EOF
    return $txt;
}


sub html_tail() {
    my $txt = <
EOF
    $txt .= html_end();
    return $txt;
}

sub get_obs_line($) {
    my $line = shift;
    my ($len,$i,$ch);
    my $nline = '';
    $len = length($line);
    my $inem = 0;
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($line,$i,1);
        if ($inem) {
            if ($ch eq '@') {
                $nline .= ' _at_ ';
            } elsif ($ch eq '.') {
                $nline .= ' _dot_ ';
            } else {
                $nline .= $ch;
            }
            $inem = 0 if ($ch eq '>');
        } else {
            $nline .= $ch;
            $inem = 1 if ($ch eq '<');
        }
    }
    return $nline;
}

######################################################
# Converting SPACES to ' '
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to   ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next 
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ / /g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   if ($t ne $nt) {
       prt( "conv_space: from [$t] to [$nt] ...\n" ) if (VERB9());
   }
   return $nt;
}

###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&' to avoid interpreting as replacement
# 2. Convert '<' to '<' and '>' to '>', to avoid interpreting as HTML
# 3. Convert '"' to '"'
# if flag & 1
# 4. Convert '\t' to SPACES
# if flag & 2
# 5. Finally, if there are double or more SPACES, convert to ' '
###########################################################################
my $tab_space = '    ';
sub html_line($$) {
   my ($t,$flag) = @_;
   my $ot = $t;
   $t =~ s/&/&/g; # all '&' become '&'
   $t =~ s//>/g; # make sure all '>' is/are swapped out
   $t =~ s/\"/"/g; # and all quotes become "
   if ($flag & 1) {
       $t =~ s/\t/$tab_space/g; # tabs to spaces
   }
   if ($flag & 2) {
       if ($t =~ /\s\s/) { # if any two consecutive white space
            $t = conv_spaces($t);
       }
   }
   if ($ot ne $t) {
       prt( "html_line: from [$ot] to [$t] ...\n" ) if (VERB9());
   }
   return $t;
}


my %months = (
    'Jan' => 1,
    'Feb' => 2,
    'Mar' => 3,
    'Apr' => 4,
    'May' => 5,
    'Jun' => 6,
    'Jul' => 7,
    'Aug' => 8,
    'Sep' => 9,
    'Oct' => 10,
    'Nov' => 11,
    'Dec' => 12
    );

# sec,     # seconds of minutes from 0 to 61
# min,     # minutes of hour from 0 to 59
# hour,    # hours of day from 0 to 24
# mday,    # day of month from 1 to 31
# mon,     # month of year from 0 to 11
# year,    # year since 1900
# wday,    # days since sunday
# yday,    # days since January 1st
# isdst    # hours of daylight savings time
# /^\s*(\w+)\s+(\w+)\s+(\d+)\s+(\d{2}):(\d{2}):(\d{2}\s+(\d{4})/
#       Wed     May      13    12:37:20               2015       +0200
sub parsedate($) { 
  my ($s) = @_;
  my ($cday, $year, $month, $day, $hour, $minute, $second, $mon);
  #              cday    mon     day     hour   min      sec       year
  #              1       2       3       4      5        6         7
  if($s =~ /^\s*(\w+)\s+(\w+)\s+(\d+)\s+(\d{2}):(\d{2}):(\d{2})\s+(\d{4})\b/) {
      $cday   = $1;
      $month  = $2;
      $day    = $3;
      $hour   = $4;
      $minute = $5;
      $second = $6;
      $year   = $7;
      if (defined $months{$month}) {
          $mon = $months{$month};
          # which is correct???
          #prt("timelocal($second,$minute,$hour,$day,".($mon-1).",$year)\n");  
          return timelocal($second,$minute,$hour,$day,$mon-1,$year);  
          #return timelocal($second,$minute,$hour,$day,$mon,$year);  
      }
  }
  return -1;
}

sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = ;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$author,$off,@arr,$date,$epoch,$tline,$len);
    my ($raa,$tmp);
    $lnn = 0;
    my @nlines = ();
    my @tlines = ();
    my %authors = ();
    my $bdate = '';
    my $edate = '';
    my $bep = time();
    my $eep = 0;
    my $gotdate = 0;
    my $commits = 0;
    my $comm = '';
    my $got_vers = 0;
    my $got_merge = 0;
    foreach $line (@lines) {
        chomp $line;
        $lnn++;
        $tline = trim_all($line);
        $len = length($tline);
        if ($len == 0) {
            # blank line
        } elsif ($line =~ /^Author:\s+/) {
            $author = $line;
            $author =~ s/^Author:\s+//;
            $off = index($author,'<');
            if ($off > 0) {
                $author = substr($author,0,$off-1);
            }
            $author = trim_all($author);
            $author = 'Geoff R. McLane' if ($author eq 'Geoff McLane');
            if (! defined $authors{$author}) {
                $authors{$author} = [0,0,0];
            }
            $raa = $authors{$author};
            ${$raa}[0]++;
        } elsif ($line =~ /^Date:\s+/) {
            $date = $line;
            $date =~ s/^Date:\s+//;
            $date = trim_all($date);
            $epoch = parsedate($date);
            if ($epoch != -1) {
                if ($epoch > $eep) {
                    $eep = $epoch;
                    $edate = $date;
                }
                if ($epoch < $bep) {
                    $bep = $epoch;
                    $bdate = $date;
                }
                $gotdate++;
            }
        } elsif ($line =~ /^commit\s+/) {
            if (@tlines) {
                if ($got_vers) {
                    push(@nlines,$vers_commits{$comm});
                    ${$raa}[1]++;
                } elsif ($got_merge) {
                    ${$raa}[2]++;
                    $tmp = $tlines[0];
                    push(@nlines,$tmp);
                    $len = scalar @tlines;
                    if ($len > 1) {
                        $tmp = $tlines[1];
                        push(@nlines,$tmp);
                    }
                    push(@nlines,"");
                } else {
                    push(@nlines,@tlines);
                }
                @tlines = ();
            }
            # clear previous
            $got_vers = 0;
            $got_merge = 0;
            undef $raa;
            $commits++;
            $comm = $line;
            $comm =~ s/^commit\s+//;
            $len = length($comm);
            if ($len > 10) {
                $comm = substr($comm,0,10);
            }
            if (defined $vers_commits{$comm}) {
                $got_vers = 1;
            }
        } elsif ($line =~ /^Merge:\s+/) {
            # Merge: 9e09f1a e4fc470
            $got_merge = 1;
        } elsif ($line =~ /^\s+/) {
            # comment text
        } else {
            pgm_exit(1,"$lnn: $line NOT PARSED\n");
        }

        if ($line =~ /<(.+)>/) {
            $inc = get_obs_line($line);
            prt("$lnn: converted\n$line TO\n$inc\n") if (VERB5());
            $line = $inc;
        }
        $line = html_line($line,0) if ($add_html);
        push(@tlines,$line);
        #push(@nlines,$line);
    }
    # add any remainder
    if (@tlines) {
        if ($got_vers) {
            push(@nlines,$vers_commits{$comm});
            ${$raa}[1]++;
        } elsif ($got_merge) {
            ${$raa}[2]++;
            $tmp = $tlines[0];
            push(@nlines,$tmp);
            $len = scalar @tlines;
            if ($len > 1) {
                $tmp = $tlines[1];
                push(@nlines,$tmp);
            }
            push(@nlines,"");
        } else {
            push(@nlines,@tlines);
        }
        @tlines = ();
    }
    $line = '';
    $line .= html_head($html_version) if ($add_html);
    $line .= join("\n",@nlines);
    @arr = keys %authors;
    $lnn = scalar @arr;
    $off = int(($eep-$bep) / 86400);
    if ($add_html) {
        ###########################################################################
        $line .= "

\n"; # close the pre if ($add_suthors && $lnn) { $line .= "\n"; $line .= "\n

This log has $commits commits by $lnn Authors. \n"; $line .= "'T' is total, 'V' is version.txt, 'M' is merges. \n"; $line .= "

\n"; $line .= "\n"; my $cols = 4; my $wrap = 0; while ($wrap < $cols) { $line .= "\n" if ($wrap == 0); $line .= "\n"; $line .= "\n"; $line .= "\n"; $line .= "\n"; $wrap++ } $line .= "\n"; $wrap = 0; foreach $inc (@arr) { #$lnn = $authors{$inc}; $raa = $authors{$inc}; $lnn = ${$raa}[0]; $got_vers = ${$raa}[1]; $got_merge = ${$raa}[2]; #$line .= "$inc $lnn; "; $line .= "\n" if ($wrap == 0); $line .= "\n"; $line .= "\n"; $line .= "\n"; $line .= "\n"; $wrap++; if ($wrap == $cols) { $line .= "\n"; $wrap = 0; } } if ($wrap) { while ($wrap < $cols) { $wrap++; $line .= "\n"; $line .= "\n"; $line .= "\n"; $line .= "\n"; } } $line .= "

Author	T	V	M
$inc	$lnn	$got_vers	$got_merge

\n"; } if (($gotdate > 2)&&($eep > $bep)) { $line .= "\n

Date: from $bdate to $edate ($off days)

\n"; } if (length($out_file)) { $line .= "

"; my ($n,$d) = fileparse($out_file); $line .= "$n, by $pgmname, on $curr_date, from 'git log'"; $line .= "

\n"; } $line .= html_end(); ########################################################################### } else { ########################################################################### if ($add_suthors && $lnn) { $line .= ""; $line .= "\nThis log has $commits commits by $lnn authors: "; foreach $inc (@arr) { $lnn = $authors{$inc}; $line .= "$inc $lnn; "; } $line .= "\n"; } if (($gotdate > 2)&&($eep > $bep)) { ###$line .= "Date: from ".lu_get_YYYYMMDD_hhmmss_UTC($bep)." to ".lu_get_YYYYMMDD_hhmmss_UTC($eep)." UTC\n"; $line .= "Date: from $bdate to $edate ($off days)\n"; } $line .= "\n\n; eof top\n"; ########################################################################### } if (length($out_file)) { rename_2_old_bak($out_file); write2file($line,$out_file); prt("New lines written to '$out_file'\n"); } else { prt($line); prt("Use -o file to write to a file...\n"); } } ############################################## ## Version history at 2017/11/23 #5.5.85 19e8796a5b Geoff McLane Wed Nov 22 15:01:44 2017 +0100 #5.5.84 c2c7b1dab2 Jim Derry Mon Nov 20 09:32:08 2017 -0500 #5.5.83 9e09f1a722 Jim Derry Mon Nov 20 09:29:51 2017 -0500 #5.5.82 302660e3cb Jim Derry Mon Nov 20 09:28:48 2017 -0500 #... #4.9.2 e2cbd9e89f Geoff McLane Thu Jan 29 18:25:57 2015 +0100 ## eof sub load_vers_file() { if (length($vers_file) == 0) { return; } my $inf = $vers_file; if (! open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); } my @lines = ; close INF; my $lncnt = scalar @lines; prt("Processing $lncnt lines, from [$inf]...\n"); my ($line,$comm); my $lnn = 0; foreach $line (@lines) { chomp $line; $lnn++; next if ($line =~ /^\#/); if ($line =~ /^\d+\.\d+\..+\s+([0-9a-f]{10})\s/) { $comm = $1; $vers_commits{$comm} = $line; } else { pgm_exit(1,"Error:$lnn: $line NOT handled!\n"); } } my @arr = keys( %vers_commits); my $cnt = scalar @arr; prt("From $lnn lines, got $cnt commit lines...\n"); } my $def_test = 'F:\Projects\tidy-html5\README\verhist.log'; sub do_test() { $vers_file = $def_test; load_vers_file(); pgm_exit(1,"TEMP EXIT\n"); } ######################################### ### MAIN ### #do_test(); parse_args(@ARGV); process_in_file($in_file); pgm_exit(0,""); ######################################## sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); my $verb = VERB2(); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } $verb = VERB2(); prt("Verbosity = $verbosity\n") if ($verb); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if ($verb); } elsif ($sarg =~ /^i/) { $add_suthors = 1; } elsif ($sarg =~ /^a/) { need_arg(@av); shift @av; $sarg = $av[0]; $html_version = $sarg; $add_html = 1; prt("Add html with version [$html_version].\n") if ($verb); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if ($verb); } elsif ($sarg =~ /^V/) { need_arg(@av); shift @av; $sarg = $av[0]; $vers_file = $sarg; prt("Set vers file to [$vers_file].\n") if ($verb); load_vers_file(); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Set input to [$in_file]\n") if ($verb); } shift @av; } if ($debug_on) { prtw("WARNING: DEBUG is ON!\n"); if (length($in_file) == 0) { $in_file = $def_file; prt("Set DEFAULT input to [$in_file]\n"); } } if (length($in_file) == 0) { give_help(); pgm_exit(1,"\nERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } sub give_help { prt("\n"); prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --add vers (-a) = Add HTML head and tail using this version number.\n"); prt(" --out (-o) = Write output to this file.\n"); prt(" --inc (-i) = Include an authors summary to end.\n"); prt(" --Vers (-V) = Load verhist.log file, and its commits added as 1 line.\n"); prt("\n"); prt(" Given an input of a git log created with a command like -\n"); prt(" \$ git log \"--decorate=full\" 4a4f209..HEAD > temp.log\n"); prt(" and using a command like '\$ emailobs tidy-html5\\temp.log -o 5.1.8.html -a 5.1.8 -i\n"); prt(" generate a suitable html file to publish with the tidy binaries.\n"); } # eof - emailobfuscate.pl