chklink.pl to HTML.

index -|- end

Generated: Tue Jun 8 17:26:32 2010 from chklink.pl 2010/04/19 3.9 KB.

#!/perl -w
# NAME: chklink.pl
# AIM: Develop a method to CHECK a URL link ...
# 2010/04/19 - some minor tidying only...
# 23/08/2007 geoff mclane geoffair.net/mperl
use strict;
use warnings;
use Socket;
use LWP::Simple;
unshift(@INC, 'C:/GTools/perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $perl_root = 'C:/GTools/perl';
my $outfile = $perl_root."\\temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Check links...\n" );

# features
my $do_data_fetch = 0;

### debug
my $debug_on = 1;
my $dbg3 = 1;   # show resolved IP addresses

my @urls_org = qw( http://babelfish.altavista.com/ http://www.geoffmclane.com/fg/
http://www.colorcombo.com/array.html http://help.godaddy.com/index.php?
http://www.microsoft.com/isapi/redir.dll?prd=ie&pver=6ar=IStart );

# note: seems http://sourceforge.net/projects/giflib/files/ took over from 
# http://prdownloads.sourceforge.net/libungif/libungif-4.1.3.tar.gz below.
my @urls = qw( http://prdownloads.sourceforge.net/freetype/ft219.zip
ftp://ftp.remotesensing.org/pub/gdal/gdal126.zip
http://www.xmission.com/~nate/glut/glut-3.7.6-src.zip
ftp://ftp.simtel.net/pub/simtelnet/msdos/graphics/jpegsr6.zip
http://prdownloads.sourceforge.net/libungif/libungif-4.1.3.tar.gz
ftp://swrinde.nde.swri.edu/pub/png/src/lpng128.zip
ftp://ftp.remotesensing.org/pub/proj/proj-4.4.9.zip
ftp://ftp.remotesensing.org/pub/libtiff/tiff-3.7.2.zip
http://www.zlib.net/zlib122.zip );

###my $url = 'http://babelfish.altavista.com/';
###my $url = 'http://www.geoffmclane.com/fg/';
###my $url = 'http://www.colorcombo.com/array.html';
###my $url = 'http://help.godaddy.com/index.php?';
my $url = 'http://www.microsoft.com/isapi/redir.dll?prd=ie&pver=6ar=IStart';

sub Get_Host_Name($) {
   my ($nm) = shift;
   if ($nm =~ /^http:\/\/(.*)/i) {
      $nm = $1;
   } elsif ($nm =~ /^https:\/\/(.*)/i) {
      $nm = $1;
   } elsif ($nm =~ /^ftp:\/\/(.*)/i) {
      $nm = $1;
   }
    #elsif ($nm =~ /^(\w+):\/\/(.*)$/)
    #    $nm = $2;
    #}
   my @arr = split('/', $nm);
   $nm = $arr[0];
   return $nm;
}

sub Get_Domain_Name($) {
   my ($nm) = shift;
   $nm = Get_Host_Name($nm);
   if ($nm =~ /^www\.(.*)/) {
      $nm = substr($nm,4);
   }
   return $nm;
}

sub Get_URL_Text_Count($) {
   my ($url) = shift;
   my $txt = get( $url );
   my $tcnt = 0;
   $tcnt =   length($txt) if ($txt);
   return $tcnt;
}

############################################################
# Show IP Address
# uses sockets, gethostbyname
# Return 0, if can NOT be resolved.
# else the number of IP addresses resolved.
############################################################
sub showIPAddress($) {
   my ($nm) = shift;
   my $hnm = Get_Host_Name($nm);
   my @addr = gethostbyname($hnm);
   my $cnt = 0;
   if( !@addr ) {
      prt( "Can't resolve [$nm]($hnm): error: $!\n" );
      return 0;
   }
   @addr = map { inet_ntoa($_) } @addr[4 .. $#addr];
   foreach my $k (@addr) {
      $cnt++;
      prt( "$cnt: [$nm] (domain=[$hnm]) resolves to IP [$k]\n" ) if ($dbg3);
   }
   return $cnt;
}



sub process_url_list($) {
    my ($ra) = @_;
    my $max = scalar @{$ra};
    prt("Got $max URLS to process...\n");
    my $cnt = 0;
    foreach my $u (@{$ra}) {
        $cnt++;
        prt("\n$cnt of $max: [$u]\n");
        showIPAddress( $u );
        if ($do_data_fetch) {
            my $tc = Get_URL_Text_Count($u);
            if ($tc) {
                prt( "Got $tc chars from [$u] ...\n" );
            } else {
                prt( "FAILED **** get on [$u] ...\n" );
            }
        }
   }
}

#showIPAddress( $url );
#my $tc = Get_URL_Text_Count($url);
#if ($tc) {
#   prt( "Got $tc chars from [$url] ...\n" );
#} else {
#   prt( "FAILED get on [$url] ...\n" );
#}

process_url_list( \@urls );

close_log($outfile,1);
exit(0);

#################################


# eof - chklink.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional