v15, i03: Listing 1 dwreplace.pl

Listing 1
mar2006.tar
Listing 1 dwreplace.pl

#!/usr/bin/perl
# This program is licensed under the terms of the GNU 
# General Public License (GPL). 
# This program is a copyrighted by Neptune Web, Inc.
# Please send improvements and changes to 
# charles.dalsass@neptuneweb.com

use Getopt::Long;

my $replacementfile;
my $searchonly;
my ($showexample);
my $verbose;
my $casesensitive;
Getopt::Long::GetOptions("rfile=s" => \$replacementfile, "s+" => \
             \$searchonly, "e+" => \$showexample, "v+" => \$verbose, 
             "ni+" => \$casesensitive);

if (!$replacementfile && !$showexample) {
    print <<'END_OF_MARKER';

usage: dwreplace (--s) --rfile=rfile file1 <file2 ... file(n)>\n\n

Flags:
--rfile=<file> => where file is the rfile to use (this is a mandatory argument)
--s            => search only, do not replace
--e            => show an example rfile
--v            => verbose mode, shows detailed matching info and \
                  highlighted text for changes
--ni           => case sensitive mode (not insensitive)

END_OF_MARKER

exit(1);

} elsif ($showexample) {
    
#################################### SAMPLE RFILE #########################
print <<'END_OF_MARKER';

# Example rfile:

$OLD[0] = <<'END_OF_TEXT';
Sample Love Letter:
END_OF_TEXT

$NEW[0] = <<'END_OF_TEXT';
<font color="blue">Sample Love Letter:</font>
END_OF_TEXT

# Reversing the file to match a <table (unkown) ..(unkown) known text

$OPTIONS[1] = [ isregexp , reversefile,  casesensitive ];

$OLD[1] = 'ot ereh kcilC.*?elbat<';

$NEW[1] = '<p><center>{$sendbutton}</center></p>';

## if you want to interpolate from a regexp, those things found using
## brace notation.  use in conjunction with
## "escape_perl_regular_expression" program if your regular expression
## contains alot of whitespace and HTML

# Another words -- you can't put a HERE document for the right side (new 
# text) if you are doing interpolation! There should be a way to make this 
# work, however.

$OPTIONS[2] = [ isregexp , interpolator ];

$OLD[2] = '<SPAN\s*CLASS=\"body\">(.*?)<a\s*href=\"#\"\s*onclick=\"backToTop\ \
  (0\)\">Back\s*To\s*Top<\/a>';

$NEW[2] = '<editable>$1</editable><a href="#" onclick="backToTop(0)"> \
  Back To Top</a>';

# Example 4 most advanced. Pull out all absolute URLS and replace with an 
# interstitial page. 

sub urlEncode {
    my ($toencode) = @_;
    return undef unless defined($toencode);
    $toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg;
    return $toencode;
}

$OLD[3] = '(href|src)="?(http:\/\/.*?)[">\s]';
$NEW[3] = '"speedbump.html?page=$1=\"" . urlEncode($2) . "\""';

$OPTIONS[3] = [ isregexp , interpolator , eflag  ]; # use the eflag to 
                                                    # evaluate the right 
                                                    # hand side.

#################################### SAMPLE RFILE #########################
END_OF_MARKER

    exit();
} else {
    if (!open(F,"$replacementfile")) {
        print STDERR "Your rfile $replacementfile could not be opened.\n";
        exit(-599);
    }
    my $f; # read in the file.
    while (<F>) {
        $f .= $_;
    }
    close (F);
    eval($f);
    if ($@) {
    if ($@ =~ /did not return a true value/i) {
        print STDERR "Your rfile ($replacementfile) must return a true \
          value... \nYou may need to put a '1;' at the bottom of the rfile. \
          run with --e for an example rfile\n"; print STDERR "Error: $@\n";
    } else {
        print STDERR "Your rfile ($replacementfile) did not compile \
          properly\n"; print STDERR "Error: $@\n"; 
    }
    exit()
    };
}

if ($#ARGV == -1) {
    print "no files specified\n";
    exit();
}

if ($#NEW == -1 || $#OLD == -1) {
    print "nothing to do (OLD OR NEW EMPTY)\n";
    exit();
}

if (!($#NEW == $#OLD)) {
    print "NEW, OLD, AND OPTIONS arrays are of differing size\n";
    exit();
}

if ($#OPTIONS > $#NEW) {
    print "options array larger than newtext array.\n";
    exit();
}
if ($#OPTIONS > $#OLD) {
    print "options array larger than newtext array.\n";
    exit();
}


my $j;
foreach ($j = 0; $j <  $#NEW; $j++) {
    if (!defined($OLD[$j]) || !defined($NEW[$j])) {
    print "NEW[$j], OLD[$j], AND OPTIONS[$j] are empty.\n";
    exit();
    }
}

my $filename;
# loop through each file provided on the command prompt
foreach $filename (@ARGV) {
    open G, "<$filename" or die "Cannot open file $filename for reading";
    undef $/; 
    my $file = <G>; $/ = "\n";

    my $index;
    my $matchcount = 0;
    my %failedMatches;

    my %mperstr;         # match per str
    my $mperstrcnt = 0;  #  "     "   "  count
    my $hlitfile = $file;    # file with parts to be changed highlighted

    for ($index = 0; $index <= $#NEW; $index++) {
    my $regexp;
    my $endingModifiers;
    if ($OPTIONS[$index] && grep(/^isregexp$/,@{$OPTIONS[$index]})) {
        $regexp = $OLD[$index];
    } else {
        $regexp = removeWhiteSpaceAndEscapeRegexp($OLD[$index]);
    }
    # the main 'workhorse' regular expression
    my $finalRegexp;
    my $reversedRightHandSide; # only used if reversing, so as not to 
                               # override the actual global variables.
    
    if ($OPTIONS[$index] && grep(/^isregexp$/,@{$OPTIONS[$index]}) && \
      grep(/^interpolator$/,@{$OPTIONS[$index]})) {
        my $rightSideExp = $NEW[$index];
        # it seems that they never want / in the right hand side if they 
        # are doing interpolation on right.
        $rightSideExp  =~ s/([\/])/\\$1/g;
        if (!$searchonly) {
        $finalRegexp = '$file =~ ' .  "s/\$regexp/" . $rightSideExp. "/";
        } else {
        $finalRegexp = '$file =~ ' .  "/\$regexp/";
        }
    } else {
        
        if ($OPTIONS[$index] && grep(/^reversefile$/,@{$OPTIONS[$index]}) ) {
        # reverse the right hand side for convenience ....
        $reversedRightHandSide = scalar reverse $NEW[$index];
        if (!$searchonly) {
            $finalRegexp = '$file =~ ' .  "s/\$regexp/\$reversedRightHandSide/";
        } else {
            $finalRegexp = '$file =~ ' .  "/\$regexp/";
        }
        } else {
        if (!$searchonly) {
            $finalRegexp = '$file =~ ' .  "s/\$regexp/\$NEW[\$index]/";
        } else {
            $finalRegexp = '$file =~ ' .  "/\$regexp/";
        }
        }
    }
    if (!$searchonly && $OPTIONS[$index] && \
      grep(/^nonglobalreplace$/,@{$OPTIONS[$index]}) ) {
        $endingModifiers = "is";
        if ($casesensitive ||grep(/^casesensitive$/,@{$OPTIONS[$index]})) {
        $endingModifiers = "s";
        }
    } elsif (!$searchonly) {
        $endingModifiers = "isg";
        if ($casesensitive || grep(/^casesensitive$/,@{$OPTIONS[$index]})) {
        $endingModifiers = "sg";
        }
    } else {  # search only
        $endingModifiers = "isg";
        if ($casesensitive || grep(/^casesensitive$/,@{$OPTIONS[$index]})) {
        $endingModifiers = "sg";
        }
    }
    # sometimes you'll need to reverse the contents of the file.
    if ($OPTIONS[$index] && grep(/^reversefile$/,@{$OPTIONS[$index]})) {
        $file = scalar reverse $file;
    }

    #print "\n\n------\n\n$regexp\n\n$finalRegexp\n\n$file\n\n-----\n\n";

    # count how many times a particular match occurs
    my $rr = "\$file =~ " . "/$regexp/" . $endingModifiers;
    while (eval($rr)){
        die "Error ---: $@\n Code:\n$rr\n" if ($@);
        $mperstrcnt++; 
    }
    $mperstr{$index} = $mperstrcnt;
    $mperstrcnt = 0;

    # if you want to see what you are replacing highlighted within the 
    # file, only in verbose mode
    if ($verbose) {
        $hlitfile = highlightFile($regexp, $endingModifiers, $hlitfile);
        }
    $finalRegexp .= $endingModifiers;

    # use the e flag when we want to evaluate the right side of the reg 
    # exp. during replace
    if (!$searchonly && grep(/^eflag$/,@{$OPTIONS[$index]})) {
        $finalRegexp .= "e";
    }

    if (eval($finalRegexp)) {
        die "Error ---: $@\n Code:\n$finalRegexp\n"  if ($@);  
        $matchcount++;
    } else {
        # count your failed matches...
        $failedMatches{$index} = 1;
    };
# sometimes you'll need to reverse the contents of the file, but don't 
# forget to put it back!!
    if ($OPTIONS[$index] && grep(/^reversefile$/,@{$OPTIONS[$index]})) {
        $file = scalar reverse $file;
    }
    }
    if (!$searchonly) {
    open G, ">$filename" or die "Cannot open file $filename for writing";
    print G "$file";
    close G;
    }
    print "$filename ($matchcount matches found)\n";

    if($verbose) {    # sometimes you may want more information on 
                      # matching, triggers verbose mode
    my $ii = 1;
    my $value;    # print out how many matches for each match type 

        print $hlitfile . "\n";

        print "Times each match is found data below: \n";
        foreach $value (keys %mperstr) {
        # ii --> match index,   hash value --> how many of that type of 
        # match was made
        print "\t$ii -- $mperstr{$value} times\n"; 
        $ii++;
    }
    }
}

sub removeWhiteSpaceAndEscapeRegexp() {
    my ($ins) = @_;
# remove all regexp meaningful characters.
    $ins =~ s/([\@\$\|\*\?\]\[\^\/\+\.\"\(\)])/\\$1/isg;
    # add whitespace independence, but only after weird 
    # (regular expression) chars have been removed.
    $ins =~ s/\s+/\\s*/isg;
    return $ins;
}

sub highlightFile() {
    my ($r, $endMods, $f) = @_;

    my $hfile = $f;

    require POSIX;       
    use Term::Cap;  # use the terminal feature to create highlighted text 
                    # on the screen
        
    my $term = $ENV{TERM} || 'vt100';
    my $terminal;
        
    my $termios = POSIX::Termios->new();
    $termios->getattr;
    my $ospeed = $termios->getospeed;
        
    $terminal =  Term::Cap->Tgetent( { TERM=>undef, OSPEED=>$ospeed } );
        
    print "Error from eval: $@" if ($@);
        
    my ($SO, $SE) = ($terminal->Tputs('so'), $terminal->Tputs('se'));

    my $exp = '$hfile =~ ' . "s/($r)/\${SO}\$1\${SE}/" . $endMods;

    eval($exp);

    die "Error ---: $@\n Code:\n$exp\n" if ($@);
   
    return $hfile;
}