Generated: Tue Feb 2 17:54:22 2010 from autoexch03.pl 2006/08/26 8.3 KB.
#!/Perl # AIM: Download the $URL, and find the currency # in a <PRE...> tag, between # MONETARY, and # MEMO: each line looking like - # COUNTRY UNIT Aug. 21 Aug. 22 Aug. 23 Aug. 24 Aug. 25 # *AUSTRALIA DOLLAR 0.7640 0.7625 0.7628 0.7632 0.7568 # BRAZIL REAL 2.1364 2.1303 2.1346 2.1633 2.1593 # use LWP::Simple; require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs require "htmltools.pl" or die "Missing htmltools.pl ...\n"; require "currencyISO.pl" or die "Missing currencyISO.pl ...\n"; # log file stuff my ($LF); my $outfile = 'temp'.$0.'.txt'; my $outfil1 = 'tempcurr03.htm'; my $outfil2 = 'tempcurr03.csv'; # program variables my $URL = 'http://www.federalreserve.gov/releases/h10/Update/'; my @hrefs = (); my @imgs = (); my @currency = (); my $m_date1 = ''; my $m_date2 = ''; my $etext = ''; open_log($outfile); prt( "$0 ... Hello, World...\n" ); prt("Fetching text from $URL ...\n"); my $text = get("$URL"); my $tcnt = length($text); prt( "Got $tcnt characters from URL $URL ...\n"); $etext = htmlexpand($text); my $ctext = htmlclean03($etext); write2file($ctext,$outfil1); ###prt("[$ctext]\n"); prt( "len=".length($ctext)."\n"); # "[$txt2]\n"); my $ccnt = 0; my $ccnt = extractcurrencies03($ctext); ###my $ccnt = scalar @currency; prt( "Got $ccnt currencies ...\n" ); my $msg = ''; my $txt = ''; if ($ccnt) { open OF, ">$outfil2" or mydie("ERROR: Unable to open CSV out file!\n"); prt("Writing CSV file $outfil2 ...\n"); $msg = "Country,Currency,ISO,Rate $m_date1,Rate $m_date2"; prt("$msg\n"); print OF $msg."\n"; for (my $i = 0; $i < $ccnt; $i++) { ###$msg = "".($currency[$i][0]).",".($currency[$i][1]).",".($currency[$i][2]).",".($currency[$i][3]).",".($currency[$i][4]); $msg = ''; for (my $i2 = 0; $i2 < 5; $i2++) { $txt = "".($currency[$i][$i2]); if ($txt =~ /,/) { $txt = '"'.$txt.'"'; } if (length($msg)) { $msg .= ","; } $msg .= $txt; } prt( "$msg\n" ); print OF $msg."\n"; } $msg = "From [$URL] on ".(scalar localtime); prt("$msg\n"); print OF $msg."\n"; close(OF); } else { prt( "WARNING: FAILED TO EXTRACT ANY CURRENCIES!!!\n" ); } prt( "All done ...\n" ); prt("[$ctext]\n"); close_log($outfile,1); ###system( $outfil1 ); ###system( $outfil2 ); exit(0); sub htmlclean03 { my ($rtxt) = shift; prt( "len=".length($rtxt)." Add PRE TAG to new line ...\n"); $rtxt = tag2newline($rtxt,'pre'); prt( "len=".length($rtxt)." Drop comments <!--...--> ...\n"); $rtxt = dropcomments($rtxt); prt( "len=".length($rtxt)." Strip <HEAD>...</HEAD> tag ...\n"); $rtxt = striptag($rtxt, 'HEAD'); prt( "len=".length($rtxt)." Strip <script>...</script> tag ...\n"); $rtxt = striptag($rtxt,'script'); prt( "len=".length($rtxt)." Strip <noscript>...</noscript> tag ...\n"); $rtxt = striptag($rtxt,'noscript'); prt( "len=".length($rtxt)." Strip <SELECT>...</SELECT> tag ...\n"); $rtxt = striptag($rtxt,'select'); prt( "len=".length($rtxt)." Remove <font ...> tags ...\n"); $rtxt = removefont($rtxt); prt( "len=".length($rtxt)." Remove <b> tags ...\n"); $rtxt = removetag($rtxt,'b'); prt( "len=".length($rtxt)." Remove <nobr> tags ...\n"); $rtxt = removetag($rtxt,'nobr'); prt( "len=".length($rtxt)." Remove th attributes ...\n"); $rtxt = removetagattrib($rtxt,'th'); prt( "len=".length($rtxt)." Remove tr attributes ...\n"); $rtxt = removetrattrib($rtxt); prt( "len=".length($rtxt)." Remove td attributes ...\n"); $rtxt = removetdattrib($rtxt); prt( "len=".length($rtxt)." Delete <a...> & </a>\n"); $rtxt = collecthrefs($rtxt,1); prt( "len=".length($rtxt)." Delete <img...>\n"); $rtxt = collectimgs($rtxt,1); prt( "len=".length($rtxt)." Do substitutions ...\n"); $rtxt = substitutions($rtxt); prt( "len=".length($rtxt)." Trim blank lines ...\n"); $rtxt = trimblanklines($rtxt); prt( "len=".length($rtxt)." Trim inline td ...\n"); $rtxt = triminlinetd($rtxt); return $rtxt; } sub extractcurrencies03 { my ($txt) = shift; my @arr = split( /\n/, $txt ); my $lcnt = scalar @arr; my $ccnt = 0; my $inpre = 0; my $bgn = 0; my $ctry = ''; # country my $curr = ''; # currency my @nums = (); my $itm = ''; my $vlast = ''; # last quoted rate my $v2last = ''; # last quoted rate my $bcnt = 0; my %hiso = (); my $kcnt = 0; my $mcnt = 0; my $srch = ''; my $iso = ''; my $tms = get_YYYYMMDD(scalar localtime); my $cyear = substr($tms,0,4); prt("Processing $lcnt lines ...\n"); foreach my $line (@arr) { chomp $line; if (substr($line, length($line)-1) eq "\r") { $line = substr($line,0,length($line)-1); } if ($inpre) { if ($bgn) { if ($line =~ /MEMO:/) { prt("End currency ...[$line]\n"); $bgn = 0; } else { if (substr($line,0,1) eq '*') { $line = substr($line,1); } my @lnarr = split(/\s+/, $line); if ($bgn == 1) { # get the last two dates prt("Header=[$line]".scalar @lnarr."\n"); # COUNTRY UNIT Aug. 21 Aug. 22 Aug. 23 Aug. 24 Aug. 25 $m_date1 = $lnarr[-4].' '.$lnarr[-3].' '.$cyear; $m_date2 = $lnarr[-2].' '.$lnarr[-1].' '.$cyear; } else { $curr = ''; $ctry = ''; @nums = (); $bcnt = 0; $v2last = ''; $vlast = ''; foreach $itm (@lnarr) { $bcnt++; if ($itm =~ /^\d+/) { push(@nums, $itm); $v2last = $vlast; # keep 2nd last RATE $vlast = $itm; # keep last a CURRENT RATE } else { # if the NEXT is a NUMBER if ($lnarr[$bcnt] =~ /^\d+/) { $curr = $itm; } else { if (length($ctry)) { $ctry .= " "; } $ctry .= $itm; } } } %hiso = Currency2ISO($ctry); ###prt("Line=[$line]".scalar @lnarr."\n"); prt("[$ctry] [$curr] "); $kcnt = keys %hiso; if ($kcnt == 0) { @lnarr = split( /(,|\s)/, $ctry); $mcnt = scalar @lnarr; $bcnt = 0; while (($kcnt == 0)&&($bcnt < $mcnt)) { $srch = trimbothends($lnarr[$bcnt]); if (length($srch)) { prt("\n*SEARCH ON [$srch] * "); %hiso = Currency2ISO( $srch ); $kcnt = keys %hiso; } $bcnt++; } if ($kcnt == 0) { # if STILL zero if ($lnarr[0] =~ /EMU/i) { prt("\n*SEARCH ON Euro* "); %hiso = Currency2ISO( "Euro" ); $kcnt = keys %hiso; } } } if ($kcnt == 0) { prt( "\n*NO ISO FOR $ctry* " ); } elsif ($kcnt > 1) { prt( "\n*MULT ISO FOR $ctry* "); } foreach my $k (keys %hiso) { prt("$k "); $iso = $k; } foreach $itm (@nums) { prt("$itm "); } prt("\n"); ###$msg = "Country,Currency,ISO,Rate $m_date1,Rate $m_date2"; push(@currency, [$ctry, $curr, $iso, $v2last, $vlast]); $ccnt++; } $bgn++; # move off first line } } else { if ($line =~ /MONETARY/) { prt("Start currency ...[$line]\n"); $bgn = 1; } } if ($line =~ /<\/pre.*>/i) { $inpre = 0; prt("Exit PRE tag ...[$line]\n"); } } elsif ($line =~ /<pre.*>/i) { prt("Entering PRE tag ...[$line]\n"); $inpre = 1; } } return $ccnt; } ## month to number sub mth_to_num { my ($mth) = shift; my $cnt = 0; my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); ###prt( "Chk [$mth] " ); foreach my $m (@mths) { $cnt++; if ($m eq $mth) { ###prt( "Is $m - return $cnt\n" ); return $cnt; } } mydie( "ERROR: Returning 0!!! for [$mth]\n" ); return '??'; } # translated to YYYY/MM/DD, like 2005/03/12 sub get_YYYYMMDD { my ($tm) = shift; my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005' my $ac = scalar @arr; my $doff = 2; my $yoff = 4; if ($ac == 5) { $doff = 2; $yoff = 4; } elsif ($ac == 6) { $doff = 3; $yoff = 5; } else { mydie( "ERROR: Time ($tm) did NOT split correctly!\n" ); } my $mn = mth_to_num( $arr[1] ); if ($mn < 10) { $mn = '0'.$mn; } my $dn = $arr[$doff]; if ($dn < 10) { $dn = '0'.$dn; } my $dtt = $arr[$yoff].'/'.$mn.'/'.$dn; # translated to 2005/03/12 return $dtt; } # eof = autoexch03.pl