xml2h01.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:55:01 2010 from xml2h01.pl 2007/11/10 5.7 KB.

#!/perl -w
# NAME: xml2h01.pl
# AIM: Color code XML file data
# 10/11/2007 - geoff mclane - http://geoffair.net/mperl
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
my $in_file = "temp1.xml";
my @newlines = ();
my $tab_space = '   ';   # convert tabs
# colors
# c bgn/end tag & = - blue - #0000ff
# a attributes - red - #ff0000
#tags - greenish - #808000
#attr value - purple - #ff00ff
my $a_class = 'a';   # read
my $b_class = 'b';   # 
my $c_class = 'c';   # blue
my $d_class = 'd';   # tag
# debug
my $dbg3 = 0;
my $dbg4 = 0;
process_file( $in_file );
prt( "<pre class=\"cd\">" );
foreach my $ln (@newlines) {
   prt( "$ln\n" );
}
prt("</pre>\n");
close_log($outfile,1);
exit(0);
##############################
sub add_red {
   my ($t) = shift;
   return ('<span class="'.$a_class.'">'.$t.'</span>');
}
sub add_value {
   my ($t) = shift;
   return ('<span class="'.$b_class.'">'.$t.'</span>');
}
sub add_blue {
   my ($t) = shift;
   return ('<span class="'.$c_class.'">'.$t.'</span>');
}
sub add_tag {
   my ($t) = shift;
   return ('<span class="'.$d_class.'">'.$t.'</span>');
}
sub process_file {
   my ($fil) = shift;
   if (open INF, "<$fil") {
      my @lines = <INF>;
      close INF;
      my $lc = scalar @lines;
      prt( "Processing $lc line from $fil ...\n" );
      my ($i, $ch, $line, $nline, $ll, $intag, $tag, $hadsp, $attr, $hadeq, $value, $i2);
      $intag = 0;
      foreach $line (@lines) {
         chomp $line;
         $nline = '';
         $tag = '';
         $hadsp = 0;
         $hadeq = 0;
         $attr = '';
         $value = '';
         $ll = length($line);
         for ($i = 0; $i < $ll; $i++) {
            $ch = substr($line,$i,1);
            if ($intag) {
               if ($ch eq '>') {
                  if ($hadsp) {
                     $nline .= add_red(html_line($attr)) if (length($attr));
                     $nline .= add_blue(html_line($ch));
                     $attr = '';
                  } else {
                     $nline .= add_tag(html_line($tag)) if (length($tag));
                     $nline .= add_blue(html_line($ch));
                     $tag = '';
                  }
                  $intag = 0;
               } else {
                  if ($hadsp) {
                     if ($ch eq '=') {
                        $nline .= add_red(html_line($attr)) if (length($attr));
                        $nline .= add_blue(html_line($ch));
                        $i++;
                        $attr = '';
                        $value = '';
                        for (; $i < $ll; $i++) {
                           $ch = substr($line,$i,1);
                           if (($ch =~ /\s/)||($ch eq '>')||($ch eq '?')) {
                              $i--;
                              last;
                           }
                           $value .= $ch;
                        }
                        $nline .= add_value(html_line($value)) if (length($value));
                        $value = '';
                     } else {
                        $attr .= $ch;
                     }
                  } else {
                     if ($ch =~ /\s/) {
                        $hadsp = 1;
                        $nline .= add_tag(html_line($tag)) if (length($tag));
                        $nline .= $ch;
                        $tag = '';
                     } else {
                        $tag .= $ch;
                     }
                  }
               }
            } else {
               if ($ch eq '<') {
                  $intag = 1;
                  $nline .= add_blue(html_line($ch));
                  $i2 = $i + 1;
                  if (($i2 < $ll)&&(substr($line,$i2,1) eq '?')) {
                     $i++;
                     $nline .= add_red('?');
                  }
                  $tag = '';
                  $attr = '';
                  $value = '';
               } else {
                  $nline .= html_line($ch);
               }
            }
         }
         push(@newlines, $nline);
      }
   } else {
      prt( "ERROR: FAILED to open file $fil ... $! ...\n" );
   }
}
######################################################
# Converting SPACES to '&nbsp;'
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to &nbsp; ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next 
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ /&nbsp;/g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   prt( "conv_space: from [$t] to [$nt] ...\n" ) if $dbg4;
   return $nt;
}
###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&amp;' to avoid interpreting as replacement
# 2. Convert '<' to '&lt;' and '>' to '&gt;', to avoid interpreting as HTML
# 3. Convert '"' to '&quot;'
# 4. Convert '\t' to SPACES
# 5. Finally, if there are double or more SPACES, convert to '&nbsp;'
###########################################################################
sub html_line {
   my $t = shift;
   my $ot = $t;
   $t =~ s/&/&amp;/g; # all '&' become '&amp;'
   $t =~ s/</&lt;/g; # make sure all '<' is/are swapped out
   $t =~ s/>/&gt;/g; # make sure all '>' is/are swapped out
   $t =~ s/\"/&quot;/g; # and all quotes become &quot;
   $t =~ s/\t/$tab_space/g; # tabs to spaces
   if ($t =~ /\s\s/) { # if any two consecutive white space
      return conv_spaces($t);
   }
   prt( "html_line: from [$ot] to [$t] ...\n" ) if $dbg3;
   return $t;
}
# eof - xml2h01.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional