sitemap02.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:56 2010 from sitemap02.pl 2009/10/16 7.9 KB.

#!\perl -w
#
# sitemap02.pl -- a command-line utility for building an HTML site map
# ===========================================
# Usage:   perl --localpath=/file/path/to/serverroot/ 
#                     --exts="xhtml, html" sitemap.p 
# 2009/10/16 - revisited
# 22/07/2007 - some initial experiments
use strict;
use warnings;
use File::Basename;
use Cwd;
use File::stat;
use File::Find;
use Getopt::Long;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# debug
my $dbg01 = 1;   # output extra messages
my $dbg2 = 0;   # output extra messages
my $dbg3 = 0;   # output extra messages
my $dbg4 = 0;
my $dbg05 = 0;  # show prt( "[dbg05] Skipping $directory ...\n" ) if ($dbg05);
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
###prt( "$0 ... Hello, World ...\n" );
# features 
my $use_table = 1;  # output HTML results in a TABLE form
my $use_fn_only = 1;
my $out_file = 'tempsm.htm';
# HTML stuff
my $m_doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">';
my (%file_list, $local_path, $file_exts, $fn, @lines, $line, $max, $title);
my @results = ();
my @fpfolders = qw( _vti_cnf _vti_pvt _private _derived );
# parse the command line options:
##GetOptions( "localpath=s" => \$local_path,
##            "exts=s"      => \$file_exts);
# or JUST A SIMPLE SHIFT
$local_path = shift || 'C:/homepage/GA';
push(@fpfolders, 'ok');
#$local_path = shift || 'C:/homepage/GeoffAir';
#$file_exts = shift || 'htm, html, xhtml, php';
$file_exts = shift || 'htm, html, xhtml';
#mydie( "Usage: perl --localpath=/file/path/to/root/  --exts='xhtml, html' sitemap.pl\n" ) 
mydie( "Usage: perl sitemap02.pl /file/path/to/root/  ['xhtml, html']\n" ) 
     unless $local_path and $file_exts;
$local_path =~ s/\/$//;  
my @file_exts = split (/\s?,\s?/, $file_exts);
my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun);
sub get_date_time {
   my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime();
   my $year = 1900 + $yearOffset;
   my $theTime = "$hour:$minute:$second";
   my $theDate = "$weekDays[$dayOfWeek] $months[$month] $dayOfMonth, $year";
   return "$theTime, $theDate"; 
}
%file_list = &find_local_files(\@file_exts, [$local_path]);
##
# now, to the hunt...
#
DIR: foreach my $directory ( sort (keys (%file_list))) {
   my @arr = split('/', $directory);
   if (is_fp_folder( $arr[-1] ) ) {
       prt( "[dbg05] Skipping $directory ...\n" ) if ($dbg05);
      next;
   }
   prt( "[dbg01] Processing $directory ...\n" ) if ($dbg01);
    FILE: foreach my $file (sort (@{$file_list{$directory}})) {
      $fn = substr($file, (length($local_path)+1));
      prt( "Processing file $fn [$file] ...\n" ) if ($dbg2);
      if (open INF, "<$file") {
         @lines = <INF>;
         close INF;
         $line = join('',@lines);
         $max = length($line);
         prt( "Processing $fn, $max characters, in ".scalar @lines." lines ...\n" ) if ($dbg4);
         $title = get_title($line);
         push(@results, [$fn, $title]);
         prt( "push(\@results, [$fn, $title])\n") if ($dbg4);
      } else {
         prt( "WARNING: Unable to open file [$file] ... $! ... \n" );
      }
   }
}
sub write_a_html_file($$) {
    my ( $out, $rresa ) = @_;
    my $mcnt = scalar @{$rresa};
    prt( "Got $mcnt results ...\n" );
    if ($mcnt) {
        my ($OF, $i, $fnm, $tit,$cnt, $dr,$dnm);
        if (open $OF, ">$out") {
            write_html_head($OF);
            print $OF "<p>List of $max files found ...</p>\n";
            if ($use_table) {
                print $OF "<table border=\"0\" cellpadding=\"2\" cellspacing=\"2\" summary=\"List of site pages\" align=\"center\">\n";
                print $OF " <tr>\n";
                print $OF "  <th>Num</th><th>Link</th><th>Title</th>\n";
                print $OF " </tr>\n";
                $cnt = 0;
                for ($i = 0; $i < $mcnt; $i++) {
                    $cnt++;
                    $fnm = ${$rresa}[$i][0];
                    $tit = ${$rresa}[$i][1];
                    $dnm = $fnm;
                    ($dnm,$dr) = fileparse($fnm) if ($use_fn_only);
                    print $OF " <tr>\n";
                    print $OF "  <td align=\"right\">$cnt</td><td><a href=\"$fnm\">$dnm</a></td><td><b>$tit</b></td>\n";
                    print $OF " </tr>\n";
                }
                print $OF "</table>\n";
            } else {
                print $OF "<ol>\n";
                for ($i = 0; $i < $mcnt; $i++) {
                    $fnm = ${$rresa}[$i][0];
                    $tit = ${$rresa}[$i][1];
                    print $OF "<li><a href=\"$fnm\">$fnm</a> - <b>$tit</b></li>\n";
                }
                print $OF "</ol>\n";
            }
            write_html_tail($OF, $out_file);
            close $OF;
            system($out_file);
        } else {
            prt( "WARNING: Unable to create file [$out] ... $! ... \n" );
        }
   }
}
write_a_html_file( $out_file, \@results );
my $msg = "TBD";
prt( "$msg\n" );
close_log($outfile,1);
exit(0);
# utility subroutines
sub get_title {
   my ($txt) = shift;
   my $len = length($txt);
   my $tit = '';
   my $tag = '';
   my ($i, $ch);
   prt( "Get title from $len characters ...\n" ) if ($dbg3);
   for ($i = 0; $i < $len; $i++) {
      $ch = substr($txt,$i,1);
      if ($ch eq '<') {
         $tag = $ch;
         $i++;
         for (; $i < $len; $i++) {
            $ch = substr($txt,$i,1);
            $tag .= $ch;
            if ($ch eq '>') {
               last;
            }
         }
         ##prt( "Got TAG $tag ...\n" );
         if ($tag =~ /<title>/i) {
            $i++;
            prt( "Got $tag ...\n" ) if ($dbg3);
            for (; $i < $len; $i++) {
               $ch = substr($txt,$i,1);
               if ($ch eq '<') {
                  last;
               }
               $tit .= $ch;
            }
            last;
         }
      }
   }
   return trim_all($tit);
}
sub find_local_files {
    my ($extensions, $directories) = @_;
    my %file_list = ();   
    my $extension_re = '(';
    $extension_re .= join ('|', @{$extensions});
    $extension_re .= ')';
    prt( "Finding files with extensions [$extension_re]...\n" );    
    local *wanted_files = sub {
                              return if -d;
                              return if -l;
                              push (@{$file_list{$File::Find::dir}}, $File::Find::name)
                              if $File::Find::name =~ /\.$extension_re$/;
                          };
    File::Find::find(\&wanted_files, @{$directories});
    my $cnt = scalar keys(%file_list);
    prt( "Done... returning file list of $cnt items...\n" );
    return %file_list;
}
sub translate_path {     
    my ($old_path, $new_path, $file) = @_;
    $file =~ s|$old_path|$new_path|;
    return $file;             
}
################################################
# ignore FRONTPAGE folders
################################################
sub is_fp_folder {
   my ($inf) = shift;
   foreach my $fil (@fpfolders) {
      if (lc($inf) eq lc($fil)) {
         return 1;
      }
   }
   return 0;
}
sub write_html_head { # ($OF)
   my ($f) = shift;
   print $f "$m_doctype\n";
   print $f <<"EOF";
<html>
<head>
<title>Site Index</title>
<meta http-equiv="Content-Language" content="en-au">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
</head>
<body>
<h1 align="center"><a name="bm_top"
   id="bm_top"></a>Site Index</h1>
EOF
}
sub write_html_tail {   # ($OF, filename);
   my ($f, $of) = @_;
   my ($msg);
   print $f <<"EOF";
<p><a name="bm_end"
   id="bm_end">EOF - $off
</p>
EOF
   $msg = "<!-- generated by $pgmname -->\n";
   $msg .= "<!-- ";
   $msg .= get_date_time();
   $msg .= " -->\n";
   print $f $msg;
   print $f "</body>\n";
   print $f "</html>\n";
}
# eof - sitemap02.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional