autoexch03.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:22 2010 from autoexch03.pl 2006/08/26 8.3 KB.

#!/Perl
# AIM: Download the $URL, and find the currency
# in a <PRE...> tag, between
# MONETARY, and
# MEMO: each line looking like -
# COUNTRY         UNIT       Aug. 21      Aug. 22      Aug. 23      Aug. 24      Aug. 25
# *AUSTRALIA      DOLLAR         0.7640       0.7625       0.7628       0.7632       0.7568
# BRAZIL         REAL           2.1364       2.1303       2.1346       2.1633       2.1593
#
use LWP::Simple;
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs
require "htmltools.pl" or die "Missing htmltools.pl ...\n";
require "currencyISO.pl" or die "Missing currencyISO.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
my $outfil1 = 'tempcurr03.htm';
my $outfil2 = 'tempcurr03.csv';
# program variables
my $URL = 'http://www.federalreserve.gov/releases/h10/Update/';
my @hrefs = ();
my @imgs = ();
my @currency = ();
my $m_date1 = '';
my $m_date2 = '';
my $etext = '';
open_log($outfile);
prt( "$0 ... Hello, World...\n" );
prt("Fetching text from $URL ...\n");
my $text = get("$URL");
my $tcnt = length($text);
prt( "Got $tcnt characters from URL $URL ...\n");
$etext = htmlexpand($text);
my $ctext = htmlclean03($etext);
write2file($ctext,$outfil1);
###prt("[$ctext]\n");
prt( "len=".length($ctext)."\n"); # "[$txt2]\n");
my $ccnt = 0;
my $ccnt = extractcurrencies03($ctext);
###my $ccnt = scalar @currency;
prt( "Got $ccnt currencies ...\n" );
my $msg = '';
my $txt = '';
if ($ccnt) {
   open OF, ">$outfil2" or mydie("ERROR: Unable to open CSV out file!\n");
   prt("Writing CSV file $outfil2 ...\n");
   $msg = "Country,Currency,ISO,Rate $m_date1,Rate $m_date2"; 
   prt("$msg\n");
   print OF $msg."\n";
   for (my $i = 0; $i < $ccnt; $i++) {
      ###$msg = "".($currency[$i][0]).",".($currency[$i][1]).",".($currency[$i][2]).",".($currency[$i][3]).",".($currency[$i][4]);
      $msg = '';
      for (my $i2 = 0; $i2 < 5; $i2++) {
         $txt = "".($currency[$i][$i2]);
         if ($txt =~ /,/) {
            $txt = '"'.$txt.'"';
         }
         if (length($msg)) {
            $msg .= ",";
         }
         $msg .= $txt;
      }
      prt( "$msg\n" );
      print OF $msg."\n";
   }
   $msg = "From [$URL] on ".(scalar localtime);
   prt("$msg\n");
   print OF $msg."\n";
   close(OF);
} else {
   prt( "WARNING: FAILED TO EXTRACT ANY CURRENCIES!!!\n" );
}
prt( "All done ...\n" );
prt("[$ctext]\n");
close_log($outfile,1);
###system( $outfil1 );
###system( $outfil2 );
exit(0);
sub htmlclean03 {
   my ($rtxt) = shift;
prt( "len=".length($rtxt)." Add PRE TAG to new line ...\n");
$rtxt = tag2newline($rtxt,'pre');
prt( "len=".length($rtxt)." Drop comments <!--...--> ...\n");
$rtxt = dropcomments($rtxt);
prt( "len=".length($rtxt)." Strip <HEAD>...</HEAD> tag ...\n");
$rtxt = striptag($rtxt, 'HEAD');
prt( "len=".length($rtxt)." Strip <script>...</script> tag ...\n");
$rtxt = striptag($rtxt,'script');
prt( "len=".length($rtxt)." Strip <noscript>...</noscript> tag ...\n");
$rtxt = striptag($rtxt,'noscript');
prt( "len=".length($rtxt)." Strip <SELECT>...</SELECT> tag ...\n");
$rtxt = striptag($rtxt,'select');
prt( "len=".length($rtxt)." Remove <font ...> tags ...\n");
$rtxt = removefont($rtxt);
prt( "len=".length($rtxt)." Remove <b> tags ...\n");
$rtxt = removetag($rtxt,'b');
prt( "len=".length($rtxt)." Remove <nobr> tags ...\n");
$rtxt = removetag($rtxt,'nobr');
prt( "len=".length($rtxt)." Remove th attributes ...\n");
$rtxt = removetagattrib($rtxt,'th');
prt( "len=".length($rtxt)." Remove tr attributes ...\n");
$rtxt = removetrattrib($rtxt);
prt( "len=".length($rtxt)." Remove td attributes ...\n");
$rtxt = removetdattrib($rtxt);
prt( "len=".length($rtxt)." Delete <a...> & </a>\n");
$rtxt = collecthrefs($rtxt,1);
prt( "len=".length($rtxt)." Delete <img...>\n");
$rtxt = collectimgs($rtxt,1);
prt( "len=".length($rtxt)." Do substitutions ...\n");
$rtxt = substitutions($rtxt);
prt( "len=".length($rtxt)." Trim blank lines ...\n");
$rtxt = trimblanklines($rtxt);
prt( "len=".length($rtxt)." Trim inline td ...\n");
$rtxt = triminlinetd($rtxt);
   return $rtxt;
}
sub extractcurrencies03 {
   my ($txt) = shift;
   my @arr = split( /\n/, $txt );
   my $lcnt = scalar @arr;
   my $ccnt = 0;
   my $inpre = 0;
   my $bgn = 0;
   my $ctry = '';   # country
   my $curr = '';   # currency
   my @nums = ();
   my $itm = '';
   my $vlast = ''; # last quoted rate
   my $v2last = ''; # last quoted rate
   my $bcnt = 0;
   my %hiso = ();
   my $kcnt = 0;
   my $mcnt = 0;
   my $srch = '';
   my $iso = '';
   my $tms = get_YYYYMMDD(scalar localtime);
   my $cyear = substr($tms,0,4);
   prt("Processing $lcnt lines ...\n");
   foreach my $line (@arr) {
      chomp $line;
      if (substr($line, length($line)-1) eq "\r") {
         $line = substr($line,0,length($line)-1);
      }
      if ($inpre) {
         if ($bgn) {
            if ($line =~ /MEMO:/) {
               prt("End currency ...[$line]\n");
               $bgn = 0;
            } else {
               if (substr($line,0,1) eq '*') {
                  $line = substr($line,1);
               }
               my @lnarr = split(/\s+/, $line);
               if ($bgn == 1) {
                  # get the last two dates
                  prt("Header=[$line]".scalar @lnarr."\n");
                  # COUNTRY UNIT Aug. 21 Aug. 22 Aug. 23 Aug. 24 Aug. 25
                  $m_date1 = $lnarr[-4].' '.$lnarr[-3].' '.$cyear;
                  $m_date2 = $lnarr[-2].' '.$lnarr[-1].' '.$cyear;
               } else {
                  $curr = '';
                  $ctry = '';
                  @nums = ();
                  $bcnt = 0;
                  $v2last = '';
                  $vlast = '';
                  foreach $itm (@lnarr) {
                     $bcnt++;
                     if ($itm =~ /^\d+/) {
                        push(@nums, $itm);
                        $v2last = $vlast; # keep 2nd last RATE
                        $vlast = $itm; # keep last a CURRENT RATE
                     } else {
                        # if the NEXT is a NUMBER
                        if ($lnarr[$bcnt] =~ /^\d+/) {
                           $curr = $itm;
                        } else {
                           if (length($ctry)) {
                              $ctry .= " ";
                           }
                           $ctry .= $itm;
                        }
                     }
                  }
                  %hiso = Currency2ISO($ctry);
                  ###prt("Line=[$line]".scalar @lnarr."\n");
                  prt("[$ctry] [$curr] ");
                  $kcnt = keys %hiso;
                  if ($kcnt == 0) {
                     @lnarr = split( /(,|\s)/, $ctry);
                     $mcnt = scalar @lnarr;
                     $bcnt = 0;
                     while (($kcnt == 0)&&($bcnt < $mcnt)) {
                        $srch = trimbothends($lnarr[$bcnt]);
                        if (length($srch)) {
                           prt("\n*SEARCH ON [$srch] * ");
                           %hiso = Currency2ISO( $srch );
                           $kcnt = keys %hiso;
                        }
                        $bcnt++;                  
                     }
                     if ($kcnt == 0) {
                        # if STILL zero
                        if ($lnarr[0] =~ /EMU/i) {
                           prt("\n*SEARCH ON Euro* ");
                           %hiso = Currency2ISO( "Euro" );
                           $kcnt = keys %hiso;
                        }
                     }
                  }
                  if ($kcnt == 0) {
                     prt( "\n*NO ISO FOR $ctry* " );
                  } elsif ($kcnt > 1) {
                     prt( "\n*MULT ISO FOR $ctry* ");
                  }
                  foreach my $k (keys %hiso) {
                     prt("$k ");
                     $iso = $k;
                  }
                  foreach $itm (@nums) {
                     prt("$itm ");
                  }
                  prt("\n");
                  ###$msg = "Country,Currency,ISO,Rate $m_date1,Rate $m_date2"; 
                  push(@currency, [$ctry, $curr, $iso, $v2last, $vlast]);
                  $ccnt++;
               }
               $bgn++; # move off first line
            }
         } else {
            if ($line =~ /MONETARY/) {
               prt("Start currency ...[$line]\n");
               $bgn = 1;
            }
         }
         if ($line =~ /<\/pre.*>/i) {
            $inpre = 0;
            prt("Exit PRE tag ...[$line]\n");
         }
      } elsif ($line =~ /<pre.*>/i) {
         prt("Entering PRE tag ...[$line]\n");
         $inpre = 1;
      }
   }
   return $ccnt;
}
## month to number
sub mth_to_num {
   my ($mth) = shift;
   my $cnt = 0;
   my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
   ###prt( "Chk [$mth] " );
   foreach my $m (@mths) {
      $cnt++;
      if ($m eq $mth) {
         ###prt( "Is $m - return $cnt\n" );
         return $cnt;
      }
   }
   mydie( "ERROR: Returning 0!!! for [$mth]\n" );
   return '??';
}
# translated to YYYY/MM/DD, like 2005/03/12
sub get_YYYYMMDD {
   my ($tm) = shift;
   my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005'
   my $ac = scalar @arr;
   my $doff = 2;
   my $yoff = 4;
   if ($ac == 5) {
      $doff = 2;
      $yoff = 4;
   } elsif ($ac == 6) {
      $doff = 3;
      $yoff = 5;
   } else {
      mydie( "ERROR: Time ($tm) did NOT split correctly!\n" );
   }
   my $mn = mth_to_num( $arr[1] );
   if ($mn < 10) {
      $mn = '0'.$mn;
   }
   my $dn = $arr[$doff];
   if ($dn < 10) {
      $dn = '0'.$dn;
   }
   my $dtt = $arr[$yoff].'/'.$mn.'/'.$dn; # translated to 2005/03/12
   return $dtt;
}
# eof = autoexch03.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional