tidycmp02.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:58 2010 from tidycmp02.pl 2007/06/08 10.6 KB.

#!/Perl
# tidycmp02.pl
# ############################################################################
# AIM: To DOWNLOAD the accessibility table from the web site
# http://www.aprompt.ca/Tidy/accessibilitychecks.html
# parse the html, extracting the HTML test file link
# Download the link, advise if FAILED,
# else write the file to an OUTPUT folder,
# converting the line endings to DOS line endings ...
# and compare its contents to Tidy's accesscases.txt
# NOTE: With $dbg1 == 0, there can be quite LONG delays before NEXT output ...
# Likewise if $dbg2 == 0, and/or $dbg3 == 0 - it looks like NOTHING is happening!!!
# #############################################################################
use strict;
use warnings;
use LWP::Simple;
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs
require "htmltools.pl" or die "Missing htmltools.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
my $outfil1 = 'temp1'.$0.'.htm';
# program variables
my $download = 1; # do the ACTUAL downloads,
my $dotidytest = 0;   # compare with Tidy file ...
# or use locally saved file after first download
# online source
my $site = 'http://www.aprompt.ca/Tidy/';
my $URL = $site . 'accessibilitychecks.html';
# local HDD source
###my $src_folder = "F:\\Gtools\\tidyproj\\tidycvs6-2\\test\\";
my $src_folder = "F:\\FGCVS\\tidy\\test\\";
my $in_file = $src_folder.'accesscases.txt';
my $in_folder = $src_folder."accessTest\\";
my $out_folder = 'tmp6'; # and output FOLDER, for download
my $new_out = 'tempaccess.txt';
my @tests = ();
my $text = '';
my $tcnt = 0;
my @arr = ();
my $dtext = '';
my $line = '';
my @lines = ();
my $tln = '';
my $tlcnt = 0;
my @mdarr = ();
my @hrefs = ();
my $thrftxt = '';
my $lhrftxt = 0;
my $thrffil = '';
my @newtest = ();
my @desc = ();
my $lcnt = 0;
my $cnt = 0;
my $we = '';
my $test = '';
my $lev = 0;
my $href = '';
my $href2 = '';
my $flip = 0;
my $fnd = 0;
my $dsc = '';
my $tstcnt = 0;
my $ntcnt = 0;
my $dtcnt = 0;
my $wrtncnt = 0;
my $dsccnt = 0;
my $msg = '';
# information collected
my @zeroonline = ();
my @missingcvs = ();
my @difflevels = ();
# debug
my $dbg1 = 0;   # additional diagnostic output
my $dbg2 = 0;   # output the test cases read in ...
my $dbg3 = 0;   # output information when found ...
#######################################################
### main program
open_log($outfile);
prt( "$0 ... Hello, World...\n" );
if ($download) {
   prt("Fetching text from $URL ...\n");
   $text = get("$URL");
   # this assumes CR line endings
   ###@arr = split("\r", $text);
   ##$dtext = join( "\n", @arr );
   ## so without assumption
   if (defined $text) {
      $dtext = force_unix_le($text);
      $tcnt = length($text);
      $dtcnt = length($dtext);
   } else {
      $text = '';
      $dtext = '';
      $tcnt = 0;
      $dtcnt = 0;
   }
   prt( "Got $tcnt ($dtcnt) characters from URL $URL ...\n");
   write2file($dtext,$outfil1);
   @arr = split( "\n", $dtext );
} else {
   open INF, "<$outfil1" or mydie( "ERROR: Unable to open [$outfil1] ... $1\n" );
   @arr = <INF>; # slurp it all
   close INF;
}
if ($dotidytest) {
   prt("Openning the compare file [$in_file] ...\n");
   open INF, "<$in_file" or mydie( "ERROR: Unable to open [$in_file] ... $1\n" );
   my @tmp = <INF>; # slurp it all
   close INF;
   prt( "Got ".scalar @tmp." from $in_file ... putting into a multi-dimensional array ...\n" );
   foreach $line (@tmp) {
      $tln = trimall($line);
      if (length($tln)) {
         push(@lines, $tln);
         my @ts = split(" ",$tln);
         if (scalar @ts == 3) {
            push(@mdarr, [ $ts[0], $ts[1], $ts[2], 0 ]);
         } else {
            prt( "WARNING: [$tln] did not split correctly ...\n" );
         }
      }
   }
   $tlcnt = scalar @mdarr;
   prt( "Got $tlcnt (".(scalar @lines).") from $in_file ...\n" );
   for (my $i3 = 0; $i3 < $tlcnt; $i3++) {
      $msg = $mdarr[$i3][0] . ' ' . $mdarr[$i3][1] . ' ' . $mdarr[$i3][2];
      prt( "$msg\n" ) if ($dbg2);
   }
}
###my $etext = htmlexpand($text);
###my $ctext = htmlcleanall($etext);
##open WOF, ">$outfil1" or mydie("ERROR: Unable to open $outfil1 - $!\n");
$lcnt = scalar @arr;
prt( "Processing $lcnt lines ...\n" );
# expect something like ..............
# Error number [13.2.1.3] - Priority 2
# or
# Warning number [7.4.1.1] - Priority 2
# Warning number [1.1.1.2] - Priority 1
# All images require text equivalents but "alt" text must also meet ...
#
# Testfile 1.1.1.f2: suspicious "alt" text (filename) 
# View testfile source = link 
# Testfile             = link 
# ....................................
foreach $line (@arr) {
##   print WOF $line."\n";
   $tln = trimall($line);
   $tln = removetag($tln, 'b');
   $tln = removetag($tln, 'br');
   ##if ($line =~ /(Error|Warning)\s+\[(\d+\.\d+\.\d+\.\d+)\]\s+-\s+Priority\s+(\d{1})/) {
   ##if ($tln =~ /(Error|Warning)\s+number\s+/i) {
   ##if ($tln =~ /(Error|Warning)\s+number\s+\[(.*)\]/i) {
   if ($tln =~ /(Error|Warning)\s+number\s+\[(.*)\].+Priority\s+(\d+)/i) {
      $cnt++;
      $we = $1;
      $test = $2;
      $lev = $3;
      ##prt( "[$2] $tln\n" );
      ##prt( "$cnt [$we] [$test] [$lev]\n" );
      push(@tests, [$test, $lev, $we]);
      $flip = 0;
   } elsif ($tln =~ /href=["'](\S+)["']./i ) {
      if ($cnt) {
         my $hrf = $1;
         if ($flip) {
            if ($flip == 1) {
               $href = $site . $hrf;
               if ($download) {
                  prt( "Moment ... loading [$href] ...\n" );
                  $thrftxt = get($href);
                  if (defined $thrftxt) {
                     $lhrftxt = length($thrftxt);
                  } else {
                     $thrftxt = '';
                     $lhrftxt = 0;
                  }
                  if ($lhrftxt) {
                     ###$thrftxt =~ s/\r/\r\n/gm;
                     ###$thrftxt =~ s/\r/\n/gm;
                     $thrftxt = force_unix_le($thrftxt);
                     $thrffil = $test;
                     $thrffil =~ s/\./-/g;
                     $thrffil = $out_folder . '/' . $thrffil . '.html';
                     write2file( $thrftxt, $thrffil );
                     $wrtncnt++; # count another WRITTEN
                     prt( "[$test] Test HREF=\"$href\" length $lhrftxt ... written [$thrffil]\n" ) if ($dbg1);
                  } else {
                     $msg = "[$test] Test HREF=\"$href\" length is ZERO - CHECK ME! ...";
                     prt( "$msg\n" ) if ($dbg3);
                     push(@zeroonline, $msg);
                  }
               } else {
                  # no download done ...
               }
            } else {
               prt( "[$test] CHECK ME HREF=\"$hrf\" \n" );
            }
         } else {
            $href2 = $hrf;
            prt( "[$test] View HREF=\"$href2\"\n" ) if ($dbg1);
         }
         $flip++;
      }
   } elsif ($tln =~ /Testfile\s+\d+.+:\s+(.*)/) {
      # like - Testfile 1.1.1.f1: <img> missing "alt" text
      my $ds = $1;
      $ds =~ s/&lt;/</g;
      $ds =~ s/&gt;/>/g;
      $ds =~ s/"/'/g;
      push(@desc, [$test, $ds]);
      prt( "[$test] Description=[$ds]\n" ) if ($dbg1);
   }
}
prt( "DONE processing $lcnt lines ...\n" );
$tstcnt = scalar @tests;
$dsccnt = scalar @desc;
prt( "Written $wrtncnt new files ... Got $tstcnt test sets ... $dsccnt desciptions ...\n" );
##close WOF;
for (my $i = 0; $i < $tstcnt; $i++) {
   $we = $tests[$i][2];
   $test = $tests[$i][0];
   $lev = $tests[$i][1];
   $fnd = test_in_lines($test);
   $dsc = find_desc($test);
   my ($tf, $tff);
   $tf = $test;
   $tf =~ s/\./-/g;
   $tff = $in_folder . $tf . ".html";
   if ($fnd) {
      $tln = $lines[$fnd-1];
      ###my $tf = $mdarr[$fnd-1][0];
      my $tc = $mdarr[$fnd-1][1];
      ###my $tff = $in_folder . "\\" . $tf . ".html";
      if (-f $tff) {
         my @tmparr = split(" ", $tln);
         if (scalar @tmparr == 3) {
            my $lev2 = $tmparr[2]; 
            if ($lev2 == $lev) {
               prt( "[$test] [$lev] [$tln] [$tc] ok\n" ) if ($dbg1);
            } else {
               $msg = "[$test] [$lev] [$tln] [$tc] ok BUT different level [$lev2] ...";
               push(@difflevels, $msg);
               prt( "$msg\n" ) if ($dbg3);
            }
            push(@newtest, [$tf, $test, $lev, $we, $dsc]);
         } else {
            mydie( "[$test] [$lev] [$tln] [$tc] ok BUT NO LEVEL COMPARE\n" );
         }
      } else {
         prt( "[$test] [$lev] [$tln] [$tc] missing [$tff]?\n" );
         push(@newtest, [$tf, $test, $lev, $we, $dsc]);
      }
   } else {
      if (-f $tff) {
         $msg = "NOT FOUND [$test] [$lev] BUT found [$tff]";
      } else {
         $msg = "NOT FOUND [$test] [$lev]";
      }
      push(@missingcvs, $msg);
      prt( "$msg\n" ) if ($dbg3);
   }
}
# output warning information, if NOT output during processing
if (!$dbg3) { # no output during processing
   $cnt = scalar @zeroonline;
   if ($cnt) {
      prt( "Count $cnt file(s) appear MISSING from on-line site ...\n" );
      foreach $msg (@zeroonline) {
         prt( "$msg\n" );
      }
   }
   $cnt = scalar @missingcvs;
   if ($cnt) {
      prt( "Count $cnt file(s) appear MISSING from CVS download ...\n" );
      foreach $msg (@missingcvs) {
         prt( "$msg\n" );
      }
   }
   $cnt = scalar @difflevels;
   if ($cnt) {
      prt( "Count $cnt item(s) appear to have DIFFERENT priority ...\n" );
      foreach $msg (@difflevels) {
         prt( "$msg\n" );
      }
   }
}
$ntcnt = scalar @newtest;
prt( "\nOutputting $ntcnt tests to [$new_out] ...\n" );
open OUTF, ">$new_out" or mydie( "ERROR: Unable to open $new_out ...$! \n" );
for (my $i = 0; $i < $ntcnt; $i++) {
   print OUTF $newtest[$i][0] . ' ' . $newtest[$i][1] . ' ' . $newtest[$i][2]; 
   print OUTF ' ' . $newtest[$i][3] . ' ' . $newtest[$i][4] . "\n"; 
}
close OUTF;
##system($outfil1);
close_log($outfile,1);
exit(0);
### push(@desc, [$test, $dsc]);
sub find_desc {
   my ($tst) = shift;
   my $d = 'NOT FOUND';
   my $ct = scalar @desc;
   for (my $i2 = 0; $i2 < $ct; $i2++) {
      if ($desc[$i2][0] eq $tst) {
         $d = $desc[$i2][1];
         last;
      }
   }
   return $d;
}
###                file    test    level
###   push(@mdarr, [ $ts[0], $ts[1], $ts[2], 0 ]);
sub test_in_lines {
   my ($tst) = shift;
   my $f = 0;
   my $ct = 0;
   my $ln = '';
   ###prt( "Finding [$tst] ...\n" );
   for (my $i2 = 0; $i2 < $tlcnt; $i2++) {
      $ct++;
      ##my $ts = $mdarr[$i2][1];
      ##prt( "Compare with [$ts] ...\n" );
      if ($mdarr[$i2][1] eq $tst) {
         $f = $ct;
         last;
      }
   }
   return $f;
}
sub trimall {
   my ($ln) = shift;
   chomp $ln;
   $ln =~ s/\r$//;
   $ln =~ s/\t/ /g;
   while ($ln =~ /\s\s/) {
      $ln =~ s/\s\s/ /g;
   }
   while ($ln =~ /^\s/) {
      $ln = substr($ln,1);
   }
   while ($ln =~ /\s$/) {
      $ln = substr($ln,0, length($ln) - 1);
   }
   return $ln;
}
sub force_unix_le {
   my ($dtx) = shift;
   my $ntx = '';
   my $len = length($dtx);
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($dtx,$i,1);
      if ($ch eq "\r") {   # if CR, check for CR/LF
         $i++; # move to next char
         if ($i < $len) { # if length
            $ch = substr($dtx,$i,1);
            if ($ch ne "\n") {   # is is LF
               $ntx .= "\n"; # no, force LF to replace CR
               if ($ch eq "\r") {   # but if it IS another CR
                  $i--; # back up to collect this
                  next;   # and loop
               }
            }
            # else let this caracter be added
         } else { # last char
            $ch = "\n"; # add final LF
         }
      }
      $ntx .= $ch;
   }
   return $ntx;
}
# eof - tidycmp02.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional