fav-04.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:31 2010 from fav-04.pl 2006/07/15 17 KB.

#!/Perl
# AIM: To read the Internet Favorites, and produce
# a HTML document, with links and description
# 2006.07.11 - switch link column, and add (B) broken, from c:\HOMEPAGE\Broken02.htm
# update 2006.06.28 - weed out local references
# Added a MAXIMUM width, so the table approximately 'fits' a 1024 wide screen
# change to using '<base target="_blank">'
# 2005.11.12 - works ok - geoff mclane
# 
use File::stat;
my $DT = '2006.07.15';
$VERSION = '0.4';
$PACKAGE = 'fav-04';
my $hvers = "<!-- P26.$DT - minor update -->\n"; 
$hvers .= "<!-- P26.2006.07.11 - update -->\n";
$hvers .= '<!-- p26.2005.11.11 - List of favorites in PRO-1 geoffmclane.com/favorites.htm -->';
print "$0 ... Hello, World ...\n";
if( !defined( $ENV{'USERPROFILE'} ) ) {
   print "Can NOT locate USERPROFILE in ENVironment!\n";
   exit(1);
}
my $ff = $ENV{'USERPROFILE'} . '\\Favorites';
if( !( -d $ff ) ) {
   print "Folder $ff is NOT a directory!\n";
   exit(2);
}
# set a sample maximum title, wrap start at -10 from this - original set at 60
#             12345678901234567890123456789012345678901234567890123456789012345678901234567890
#                      1         2         3         4         5         6         7
my $maxtit = 'Domain Name Registration, Domain Transfe'; # rs. Your domain name search starts here.';
my $logfil = "temp.$0.txt";
my $htmfil = 'favorites.htm';
my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my $addfold = 1;
my $sch = '™'; # avoid this character ...
my @fav_exclude = (
 'https://geoffmclane.com:2083/frontend/x/index.html' );
my @fav_broken = (
 'http://a.ninemsn.com.au/b.aspx',
 'http://blogs.msdn.com/nikolad/archive/2005/09/02/460368.aspx',
 'http://code.jenseng.com/jenChat/',
 'http://datacompression.info/JPEG.shtml',
 'http://document.ihg.uni-duisburg.de/cgi-bin/mapserv40',
 'http://drivers.soft32.com/index-2-12-110-0-4.html',
 'http://flightgear.org/Downloads/scenery-0.9.5.html',
 'http://free.compuserve.com/trycsfree/index2.adp',
 'http://grass.ibiblio.org/grass57/index.html',
 'http://home.exetel.com.au/atmint.exetel.com.au/2004SBTS.html',
 'http://ourworld.compuserve.com/homepages/GEOFF_MCLANE',
 'http://pubs.logicalexpressions.com/Pub0009/LPMArticle.asp',
 'http://serenitysydney.com.au/',
 'http://usa.asus.com/products/mb/socket478/p4c800-d/overview.htm',
 'http://www.adg.dk/airport.asp',
 'http://www.candleart.com.au/',
 'http://www.cartexpress.com/',
 'http://www.commerce-cgi.com/download.htm',
 'http://www.compalseast.org.au/',
 'http://www.dmartias.fr/mondial/',
 'http://www.e-directory.org/download/list/modules.html',
 'http://www.elanit.com.au/immediacy/main.asp',
 'http://www.fgdc.gov/clearinghouse/clearinghouse.html',
 'http://www.flightgear.org/~curt/Models/Special/Rascal110_2/',
 'http://www.flightgear.org/~curt/Photos/KMHV/',
 'http://www.flightgear.org/Downloads/scenery-0.9.7.html',
 'http://www.flymig.com/iata/r/Country.Papua_New_Guinea.htm',
 'http://www.frenchlinguistics.com/dictionary/',
 'http://www.friendofflowers.com/images/famphots/famphot.php',
 'http://www.interweb.com.au/',
 'http://www.iridiumsoftsol.com/content.aspx',
 'http://www.jobsearch.gov.au/',
 'http://www.libsdl.org/index.php',
 'http://www.linuxguruz.com/',
 'http://www.megxon.com/products/S302/S302.htm',
 'http://www.microsoft.com/downloads/details.aspx',
 'http://www.microsoft.com/isapi/redir.dll',
 'http://www.navigate.com.au/navigate/index.jsp',
 'http://www.netopia.com/buy/download_promo.jsp',
 'http://www.nottingham.ac.uk/~eazdluf/taxidraw.html',
 'http://www.nottingham.ac.uk/~eazdluf/taxidraw/airportdata.html',
 'http://www.ntsb.gov/',
 'http://www.open-bits.org/browse.php',
 'http://www.ossim.org/tiki-read_article.php',
 'http://www.perldoc.com/perl5.8.0/lib.html',
 'http://www.stockill.org/fgfsdb/models.php',
 'http://www.wajb.freeserve.co.uk/codes.htm',
 'http://www.web-developer-india.com/web/jscript/refp_10.html',
 'http://www.worldofmaya.com/t_poly.html',
 'http://www.worldzone.net/games/azrael_dark/PROJECT_ZERO/GMAX.html',
 'http://x-plane.org/home/robinp/AptNavFAQ.htm',
 'https://164.214.2.62/products/digitalaero/index.cfm',
 'https://geoffmclane.com:2083/frontend/x/index.html',
 'https://www.clickstart.com.au/capabiliti/menuscript.asp');
my @oth_broken = (
 'file:///cgi-sys/Count.cgi',
 'file:///cgi-sys/guestbook.cgi',
 'http://&lt;!--',
 'http://docs.rinet.ru:8083/WebPub/ch56.htm',
 'http://emporium.turnpike.net/~viredit/emploi/cv/query.htm',
 'http://ev.free2code.net/plugins/articles/read.php',
 'http://geoffmclane.com/fgfs-003.htm',
 'http://home.netscape.com/assist/net_sites/new_html3_prop.html',
 'http://homepages.wmich.edu/~l0lazaro/perld/fileio.html',
 'http://jobs.iconrec.com.au',
 'http://lib.risk.ee/javanotes/c7/s6.html',
 'http://perl.hamtech.net/prog/ch03_109.htm',
 'http://tidy.sf.net/issue/1365706',
 'http://vadivel.thinkingms.com/PermaLink.aspx',
 'http://www.accuweather.com/adcbin/public/intlocal_index.asp',
 'http://www.ao.net/~juang/IntroJava2/JavaIO/JavaIO.html',
 'http://www.bradchoate.com/weblog/2002/08/12/mtmacro',
 'http://www.cclabs.missouri.edu/things/instruction/perl/perlcourse.html',
 'http://www.cruising.org/cvpc/cruiselines/DisplayShip.cfm',
 'http://www.digistuff.com/story_photos.asp',
 'http://www.flightgear.org/Downloads/scenery-0.9.7.html',
 'http://www.hollandamerica.com/fivestarfleet/rotterdam.htm',
 'http://www.jobsearch.gov.au/',
 'http://www.libsdl.org/cvs.php',
 'http://www.microsoft.com/downloads/details.aspx',
 'http://www.microsoft.com/downloads/details.aspx ',
 'http://www.neosoft.com/neosoft/man/perl.1.html',
 'http://www.netacc.net/~poulsen/moonphase.html',
 'http://www.netscape.com/navigator/',
 'http://www.nottingham.ac.uk/~eazdluf/taxidraw.html',
 'http://www.novell.com/products/netware4/quicklook.html',
 'http://www.opengl.org/resources/libraries/glut.html',
 'http://www.opengl.org/resources/libraries/glut/glut_downloads.html',
 'http://www.reunir.com/fiche.asp',
 'http://www.shfa.nsw.gov.au/content/home.cfm',
 'http://www.stratus.com/products/vos',
 'http://www.tek-tips.com/viewthread.cfm',
 'http://www.x-plane.org/users/robinp',
 'http://www.x-plane.org/users/robinp/',
 'https://ccvs.cvshome.org/',
 'https://www.cvshome.org/');
my $basedir = $ff;
my $blen = length($basedir);
my ($fn,$ffn,$LF,$HF);
my @dirs = ($ff);
my @fils = ();
my @tblist = ();
my @warnings = ();
my $wmsg = '';
my $f_title = '';
my $f_link = '';
my $f_tlink = '';
my $f_data = '';
my ($f_fold, $f_tit);
open $LF, ">$logfil" or die "Can NOT open LOG file $logfil!\n";
open $HF, ">$htmfil" or die "Can NOT open HTML file $htmfil!\n";
my $fcnt = scalar @fils;
my $dcnt = scalar @dirs;
my $maxwid = length($maxtit);
#print "Found $fcnt files, and $dcnt directories ...\n";
while (scalar @dirs) {
   local @dir2 = @dirs;
   @dirs = ();
   while ($fn = shift @dir2) {
      do_dir($fn);
   }
}
$fcnt = scalar @fils;
$dcnt = scalar @dirs;
prt( "Total: $fcnt URL files ...\n" );
prt( "Maximum line length used = $maxwid ...\n" );
# choosing a DOCTYPE
##my $doctyp4 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">';
my $doctyp4 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">';
##my $doctyp3 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">';
out_htm_head();
oh( '<table border="1" width="100%" summary="List of favorites - First column is the title, and the 2nd is link">' );
oh( ' <tr>' );
if ($addfold) {
   oh( '  <td><b>Folder</b></td>' );
}
oh( '  <td><b>Title</b></td>' );
oh( '  <td><b>Link</b></td>' );
oh( '  <td><b>Date</b></td>' );
oh( ' </tr>' );
get_table_arr();
my $tcnt = scalar @tblist;
for (my $i = 0; $i < $tcnt; $i++) {
   $f_title = $tblist[$i][0];
   $f_link =  $tblist[$i][1];
   $f_tlink = $tblist[$i][2];
   $f_date =  $tblist[$i][3];
   $f_fold =  $tblist[$i][4];
   $f_tit  =  $tblist[$i][5];
   oh( '   <tr>' );
   if ($addfold) {
      oh( "    <td>$f_fold</td>" );
      oh( "    <td>$f_tit</td>" );
   } else {
      oh( "    <td>$f_title</td>" );
   }
   oh( "    <td><a href=\"$f_link\">$f_tlink</a></td>" );
   oh( "    <td>$f_date</td>" );
   oh( '   </tr>' );
   ###prt( "$i [".$tblist[$i][0].", ".$tblist[$i][1].", ".$tblist[$i][2].", ".$tblist[$i][3]."]\n" );
   ###prt( "$i [$f_title, $f_link, $f_tlink, $f_date, $f_fold, $f_tit]\n" );
}
oh( '</table>' );
out_htm_tail();
if (@warnings) {
   prt( "Repeating WARNINGS issues ...\n" );
   foreach $wmsg (@warnings) {
      prt($wmsg);
   }
}
prt("Loading $htmfil ... may have to be closed to continue...\n");
close( $HF );
close( $LF );
system( $htmfil );
##system( $logfil );
exit(0);
#######################################################################
### just subs below
sub get_table_arr {
   prt( "Getting array of ".scalar @fils." files ...\n" );
   foreach $fn (@fils) {
      # process each file
      local $FH;
      my $sb = stat($fn);
      my $tms = get_YYYYMMDD(scalar localtime $sb->mtime);
      if ( open( $FH, $fn ) ) {
         local @lns = <$FH>; # slurp in the lines
         local $sn = remdir($fn);   # file name is the TITLE of the favorite ...
         ###prt( "Processing " . remdir($fn) . " of " . scalar @lns . " lines ...\n");
         close( $FH );
         local $line;
         my $fnd = 1;
         my $bkn = 0; # assume NOT broken link, per FP
         # get the FOLDER
         my $ind = rindex($sn, "\\");
         my $fold = '.';
         my $tit = $sn;
         if ($ind != -1) {
            $fold = substr($sn, 0, $ind);
            $tit = substr($sn, ($ind + 1));
         }
         foreach $line (@lns) {
            chomp $line;
            if( $line =~ /^URL=/ ) {
               local $u = substr($line,4); ## ~ s/^URL=//;
               if (in_exclude($u)) {
                  $fnd = 0; # avoid a WARNING ...
                  last;
               }
               if (in_fav_broken($u) || in_oth_broken($u)) {
                  $bkn = 1;
               }
               ##prt( "\"$sn\",$u\n" );
               local $mu = max_sub2($u,$maxwid);
               $mu =~ s/&/&amp;/g;
               $sn = max_sub($sn, $maxwid); # wrap text to max width
               $sn =~ s/&/&amp;/mg; # possible MULTIPLE lines
               if (($fold eq 'Links') && (substr($tit,0,4) eq 'FIFA')) {
                  prt( "Exception - changed [$sn] and [$tit] \n" );
                  $tit =~ s/$sch/&#153;/g;
                  $sn =~ s/$sch/&#153;/mg;
                  prt( "Exception - to [$sn] and [$tit] \n" );
               }
               $u  =~ s/&/&amp;/g;
               $tit =~ s/&/&amp;/g;
               if ($bkn) {
                  ### $tms .= '<b>(B)</b>';
                  $sn = '<b>(B)</b> '.$sn;
                  $tit = '<b>(B)</b> '.$tit;
               }
               push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit]);
               ###prt( "push(\@tblist, [$sn, $u, $mu, $tms, $fold])\n" );
               $fnd = 0;
               last;
            }
         }
         if ($fnd) {
            $wmsg = "WARNING: Did NOT find a URL line in [$fn] ...\n"; 
            prt($wmsg);
            push(@warnings,$wmsg);
         }
      } else {
         $wmsg = "WARNING: Unable to open file [$fn] ...\n";
         prt($wmsg);
         push(@warnings,$wmsg);
      }
   }
   prt( "Got array of ".scalar @tblist." items ...\n" );
}
## month to number
sub mth_to_num {
   my ($mth) = shift;
   my $cnt = 0;
   ###prt( "Chk [$mth] " );
   foreach my $m (@mths) {
      $cnt++;
      if ($m eq $mth) {
         ###prt( "Is $m - return $cnt\n" );
         return $cnt;
      }
   }
   mydie( "ERROR: Returning 0!!! for [$mth]\n" );
   return '??';
}
sub get_YYYYMMDD {
   my ($tm) = shift;
   my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005'
   my $ac = scalar @arr;
   my $doff = 2;
   my $yoff = 4;
   if ($ac == 5) {
      $doff = 2;
      $yoff = 4;
   } elsif ($ac == 6) {
      $doff = 3;
      $yoff = 5;
   } else {
      mydie( "ERROR: Time ($tm) did NOT split correctly!\n" );
   }
   my $mn = mth_to_num( $arr[1] );
   if ($mn < 10) {
      $mn = '0'.$mn;
   }
   my $dn = $arr[$doff];
   if ($dn < 10) {
      $dn = '0'.$dn;
   }
   my $dtt = $arr[$yoff].'/'.$mn.'/'.$dn; # translated to 2005/03/12
   return $dtt;
}
sub get_lists {
   ###foreach $fn (@files) {
   while ($fn = shift @_) {
      next if ($fn eq '.');
      next if ($fn eq '..');
      $ffn = $ff . '\\' . $fn;
      if( -d $ffn ) {
         push(@dirs, $ffn);
      } else {
         if ($fn =~ /\.url$/i) {
            push(@fils, $ffn);
         } else {
            prt( "Discarding file $ffn ...\n" );
         }
      }
   }
   $fcnt = scalar @fils;
   $dcnt = scalar @dirs;
   prt( "Found $fcnt files, and $dcnt directories ...\n" );
}
sub do_dir {
   local ($dn) = @_;
   print "Processing $dn ...\n";
   opendir(DIRH, $dn);
   local @f = readdir(DIRH);
   closedir(DIRH);
   print "Found " . scalar @f . " entries ...\n";
   $ff = $dn;
   get_lists(@f);
}
sub prt {
   my $msg = shift;
   print $msg;
   print $LF $msg;
}
sub max_sub2 {
   my ($ln, $max) = @_;
   if (length($ln) > ($max+5)) {
      $ln = substr($ln,0,$max) . '...';
   }
   return $ln;
}
sub max_sub {
   my ($ln, $max) = @_;
   my $nln = $ln;
   if (length($ln) > $max) {
      my @arr = split(/ /,$ln);
      $nln = '';
      my $bit = '';
      my $bl = 0;
      my $sl = 0;
      my $sc = 0;
      foreach my $s (@arr) {
         $sl = length($s);
         $bl = length($bit);
         while ($sl > $max) {
            if ($bl) {
               $bit .= ' ';
            }
            $bit .= substr($s, 0, $max - $bl);
            $s = substr($s, $max - $bl);
            if (length($nln)) {
               $nln .= "<br>\n";
            }
            $nln .= $bit;
            $bit = '';
            $sl = length($s);
            $bl = length($bit);
            $sc = 0;
         }
         if ($bl) {
            if (( $bl + $sc + length($s) ) > $max ) {
               if (length($nln)) {
                  $nln .= "<br>\n";
               }
               $nln .= $bit;
               $bit = $s;
               $sc = 0;
            } else {
               $bit .= ' ';
               $sc++;
               $bit .= $s;
            }
         } else {
            $bit = $s;
            $sc = 0;
         }
      }
      if (length($bit)) {
         if (length($nln)) {
            $nln .= "<br>\n";
         }
         $nln .= $bit;
      }
   }
   return $nln;
}
sub remdir {
   local ($f) = @_;
   local $b2 = quotemeta($basedir);
   ###$f =~ s/^$basedir//;
   ###$f = substr( $f, (length($basedir) + 1) );
   ##$f = substr( $f, ($blen + 1), (length($f) - $blen - 5) );
   $f =~ s/^$b2\\//; # remove beginning ...
   $f =~ s/\.url$//; # and remove tail
   return $f;
   ###return (max_sub($f, $maxwid));
}
sub ohl {
   print $HF "\n";
}
sub oh {
   local ($txt) = @_;
   print $HF $txt;
   ohl();
}
sub out_htm_head {
oh( $doctyp4 );
oh( '<html>' );
oh( '<head>' );
oh( "<title>List of Geoff Favorites</title>" );
oh( '<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">' );
oh( '<meta name="Author" content="Geoff Mclane">' );
oh( '<style type="text/css">' );
oh( '<!-- /* Style Definitions */' );
oh( 'body {' );
oh( ' background-image:url("clds3.jpg");' );
oh( ' margin: 0cm 1cm 0cm 1cm;' );
oh( '}' );
oh( 'h1{' );
oh( ' background:#efefef;' );
oh( ' border-style: solid solid solid solid;' );
oh( ' border-color:#d9e2e2;' );
oh( ' border-width:1px;' );
oh( ' padding:2px 2px 2px 2px;' );
oh( ' font-size:200%;' );
oh( ' text-align:center;' );
oh( '}' );
oh( '.ctr { text-align:center; }' );
oh( '.bld { font-weight:bold; }' );
oh( '-->' );
oh( '</style>' );
oh( '<base target="_blank">' ); # set so ALL open in 'New Window'
oh( '</head>' );
oh( '<body>' );
oh( '<h1><a name="top"></a>List of Geoff Favorites</h1>' );
oh( '<p class="ctr"><a href="favorite.htm">back</a> <a href="home2.htm">home</a> ' );
oh( '<a target="_self" href="#bottom">bottom</a></p>' );
oh( '<p>This is a simple table, as at $DT, of my ever changing, personal <span class="bld">Favorites</span>. ');
oh( 'It is autogenerated periodically, using a Perl script, in an attempt to keep it up to date ;=)) ' );
oh( 'It does contain some broken links, sites that have disappeared, but most are valid and current. ' );
oh( 'The base target has been set to _blank, so when a link is clicked, it should open in a NEW ');
oh( 'browser page. While the link text is sometimes truncated, the underlying anchor reference ');
oh( 'contains the full link ... Enjoy ...</p>' );
}
sub out_htm_tail {
oh( '<p><a name="bottom"></a>' );
oh( "This table is auto-generated from a Perl script, reading and analysing my 'Favorites' folder, " );
oh( "from the USERPROFILE given in the environment. Those marked with a <b>(B)</b> were <b>BROKEN</b> links ");
oh( "at the last full verification done by FrontPage ... sometimes it is due to the fact that they are " );
oh( "secure sites (https), and sometimes due to the fact that the site, or at least that page, has since been pulled down, " );
oh( "but I have yet to delete this link from my personal 'Favorites' ... and just sometimes FrontPage " );
oh( "makes a mistake in its verification process, and/or the site has a redirection active!</p>" );
oh( '<p class="ctr"><a href="favorite.htm">back</a> <a href="home2.htm">home</a> ' );
oh( '<a target="_self" href="#top">top</a></p>' );
   print $HF <<"EOF";
<p><a href="http://validator.w3.org/check?uri=referer">
<img src="images/valid-html401.gif" alt="Valid HTML 4.01 Transitional" height="31" width="88"></a></p>
</body>
$hverss
</html>
EOF
}
sub in_fav_broken {
   my ($h) = shift;
   foreach $l (@fav_broken) {
      if ($l eq $h) {
         return 1;
      }
   }
   return 0;
}
sub in_oth_broken {
   my ($h) = shift;
   foreach $l (@oth_broken) {
      if ($l eq $h) {
         return 1;
      }
   }
   return 0;
}
# my @fav_exclude = (
sub in_exclude {
   my ($h) = shift;
   foreach $l (@fav_exclude) {
      if ($l eq $h) {
         return 1;
      }
   }
   return 0;
}
#eof

index -|- top

checked by tidy  Valid HTML 4.01 Transitional