autoexch01.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:22 2010 from autoexch01.pl 2006/08/27 4.9 KB.

#!/Perl
use LWP::Simple;
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs
require "htmltools.pl" or die "Missing htmltools.pl ...\n";
require "currencyISO.pl" or die "Missing currencyISO.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
# program variables
my $URL = 'http://www.x-rates.com/d/USD/table.html';
my @hrefs = ();
my @imgs = ();
my @currency = ();
open_log($outfile);
prt( "$0 ... Hello, World...\n" );
prt("Fetching text from $URL ...\n");
my $text = get("$URL");
my $tcnt = length($text);
prt( "Got $tcnt characters from URL $URL ...\n");
###prt("[$text]\n");
$text = htmlexpand($text);
my $txt2 = htmlclean01($text);
###prt( "len=".length($txt2)." [$txt2]\n");
##my $ccnt = scalar @currency;
my $ccnt = extractcurrency01($txt2);
prt( "Got $ccnt currencies ...\n" );
my $msg = '';
if ($ccnt) {
   prt( "And finally, in CSV form ...\n" );
   prt( "Currency,1 USD,2 USD\n" );
   for (my $i = 0; $i < $ccnt; $i++) {
      prt( "".($currency[$i][0]).",".($currency[$i][1]).",".($currency[$i][2])."\n" );
   }
   prt("From [$URL] on ".(scalar localtime)."\n");
}
prt( "All done ...\n" );
close_log($outfile,1);
exit(0);
sub htmlclean01 {
   my ($rtxt) = shift;
prt( "len=".length($rtxt)." Drop comments <!--...--> ...\n");
$rtxt = dropcomments($rtxt);
##prt("Text with NO COMMENTS [$txt1]\n");
prt( "len=".length($rtxt)." Strip <HEAD>...</HEAD> tag ...\n");
$rtxt = striptag($rtxt, 'HEAD');
prt( "len=".length($rtxt)." Strip <script>...</script> tag ...\n");
$rtxt = striptag($rtxt,'script');
prt( "len=".length($rtxt)." Strip <noscript>...</noscript> tag ...\n");
$rtxt = striptag($rtxt,'noscript');
prt( "len=".length($rtxt)." Remove <font ...> tags ...\n");
$rtxt = removefont($rtxt);
prt( "len=".length($rtxt)." Remove <b> tags ...\n");
$rtxt = removetag($rtxt,'b');
prt( "len=".length($rtxt)." Remove tr attributes ...\n");
$rtxt = removetrattrib($rtxt);
prt( "len=".length($rtxt)." Remove td attributes ...\n");
$rtxt = removetdattrib($rtxt);
prt( "len=".length($rtxt)." Delete <a...> & </a>\n");
$rtxt = collecthrefs($rtxt,1);
prt( "len=".length($rtxt)." Delete <img...>\n");
$rtxt = collectimgs($rtxt,1);
prt( "len=".length($rtxt)." Do substitutions ...\n");
$rtxt = substitutions($rtxt);
prt( "len=".length($rtxt)." Trim blank lines ...\n");
$rtxt = trimblanklines($rtxt);
prt( "len=".length($rtxt)." Trim inline td ...\n");
$rtxt = triminlinetd($rtxt);
   return $rtxt;
}
sub extractcurrency01 {
   my ($txt) = shift;
   my $len = length($txt);
   my $ln = '';
   my $ch = '';
   my $lt = '';
   my $nlt = '';
   my $nln = '';
   my $mode = 0;
   my $cnt = 0;
   my $cur = '';
   my $v1 = 0;
   my $v2 = 0;
   my $i = 0;
   my $rcnt = 0;
   for ($i = 0; $i < $len; $i++) {
      $ch = substr($txt,$i,1);
      if ($ch eq "\n") {
         if ($ln =~ /.*<td.*>(.*)<\/td>/i) {
            $lt = $1;   # get text between <td>...</td>
            # $nlt =~ s/\s//g; this removes ALL spaces - NOT GOOD!
            $nlt = trimbothends($lt);
            if (length($nlt)) {
               ###prt("Got inline <td>...</td> - [$ln] [$lt] [$nlt]...\n");
               $nln = $ln;
               $nln =~ s/$lt/$nlt/;
               ###prt("New line [$nln]...\n");
               if ($mode == 3) {
                  if ($cnt == 0) {
                     $cur = $nlt;
                     $cnt += 1;
                  } elsif ($cnt == 1) {
                     $v1 = $nlt;
                     $cnt += 1;
                  } elsif ($cnt == 2) {
                     $v2 = $nlt;
                     $cnt = 0;
                     push(@currency, [$cur, $v1, $v2]);
                     $rcnt++;
                  }
               } else {
                  if ($mode == 0) {
                     if ($nlt eq 'click on values to see graphs') {
                        $mode = 1;
                     }
                  } elsif ($mode == 1) {
                     if ($nlt eq '1 USD') {
                        $mode = 2;
                     } else {
                        $mode = 0;
                     }
                  } elsif ($mode == 2) {
                     if ($nlt eq 'in USD') {
                        $mode = 3;
                     } else {
                        $mode = 0;
                     }
                  }
               }
               $ln = $nln;
            }
         } elsif ($ln =~ /<\/table>/i) {
            ###prt( "Close of TABLE ...\n" );
            $mode = 0;
         }
         $ln = '';
      } else {
         $ln .= $ch;
      }
   }
   return $rcnt;
}
# sample CVS file
#Currency,1 USD,2 USD
#Australian Dollar,1.32135,0.756802
#Brazilian Real,2.1593,0.463113
#British Pound,0.529549,1.8884
#Canadian Dollar,1.1078,0.90269
#Chinese Yuan,7.969,0.125486
#Danish Krone,5.8435,0.17113
#Euro,0.783269,1.2767
#Hong Kong Dollar,7.7796,0.128541
#Indian Rupee,46.41,0.0215471
#Japanese Yen,117.31,0.00852442
#Malaysian Ringgit,3.6785,0.27185
#Mexican Peso,10.964,0.0912076
#New Zealand Dollar,1.57282,0.635801
#Norwegian Kroner,6.302,0.15868
#Singapore Dollar,1.5802,0.632831
#South African Rand,7.1915,0.139053
#South Korean Won,961.8,0.00103972
#Sri Lanka Rupee,103.27,0.00968335
#Swedish Krona,7.2448,0.13803
#Swiss Franc,1.2375,0.808081
#Taiwan Dollar,32.85,0.0304414
#Thai Baht,37.67,0.0265463
#Venezuelan Bolivar,2144.6,0.000466287
#From [http://www.x-rates.com/d/USD/table.html] on Sun Aug 27 11:34:18 2006
# try to fit to
# Country, Country, ISO, Rate
# eof - autoexch01.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional