Article Figure 1 Figure 2 Figure 3 Figure 4
Figure 5 Listing 1 Listing 2 Listing 3 Listing 4 aug2004.tar

Listing 1 compare_table_CRC.pl: Tells if two tables in two remote databases are different

#!/usr/bin/perl -w
# compare_table_CRC
# (C) Giuseppe Maxia 2003
#
# Compares two tables in two remote databases
# and checks for any difference, by comparing a global CRC
# of all the records.
# Does not return individual different records. Use the 
# find_differences script for such purposes
use strict;
use DBI;

# This script is for educational purposes only.
# Parameters are embedded in the script itself.
# For production, it woyld be better to use either
# a config file or any Getopt::XX modules
my $host1 = "localhost";
my $host2 = "192.168.59.9";
my $db1 = "xcompany";
my $db2 = "xcompany";
my $tablename = "employees";
my ($user1, $password1) = ('myuser', 'secret');
my ($user2, $password2) = ('myuser', 'secret');

my $dbh1 =  DBI->connect("DBI:mysql:$db1;"
    . "host=$host1;port=3306",
    $user1, 
    $password1, 
    {RaiseError => 1, PrintError =>0}) 
    or die "can't connect : $DBI::errstr";

my $dbh2 =  DBI->connect("DBI:mysql:$db2;"
    . "host=$host2;port=3306",
    $user2, 
    $password2, 
    {RaiseError => 1, PrintError =>0}) 
    or die "can't connect : $DBI::errstr";

unless (grep {$_ eq $tablename} $dbh1->tables()) {
    die "$tablename not found in $db1\n"
}

unless (grep {$_ eq $tablename} $dbh2->tables()) {
    die "$tablename not found in $db2\n"
}

my $fields = get_fields($dbh1, $tablename);

my $check_table = qq{
   SELECT 
   COUNT(*) AS cnt, 
   CONCAT(SUM(CONV(SUBSTRING(\@CRC:=MD5(CONCAT_WS('/##/',$fields)),1,8),16,10)),
    SUM(CONV(SUBSTRING(\@CRC, 9,8),16,10)),
    SUM(CONV(SUBSTRING(\@CRC,17,8),16,10)),
    SUM(CONV(SUBSTRING(\@CRC,25,8),16,10))
    ) AS sig 
    FROM $tablename 
    };

print $check_table,$/;

my $probe1 = $dbh1->selectall_arrayref($check_table);
my $probe2 = $dbh2->selectall_arrayref($check_table);

print "$db1\t $probe1->[0]->[0]\t $probe1->[0]->[1]\n";
print "$db2\t $probe2->[0]->[0]\t $probe2->[0]->[1]\n";

my $result = (($probe2->[0][0] ne $probe1->[0][0]) 
    or ($probe2->[0][1] ne $probe1->[0][1])) ?
        "DIFFERENCE FOUND" :  "NO DIFFERENCE" ; 
 
print "$result\n";

# returns the list of fields, ready to insert into the CRC query
sub get_fields {
    my ($dbh, $tablename) = @_;
    my $sth = $dbh->prepare(qq{describe $tablename});
    $sth->execute();
    my @fields=();
    while (my $row = $sth->fetchrow_hashref()) {
        # discards TIMESTAMP fields from the comparison
        next if lc $row->{Type} eq 'timestamp';
        my $field ="`$row->{Field}`";
        # if the field is nullable,
        # then a COALESCE function is used
        # to prevent the whole CONCAT from becoming NULL
        if (lc $row->{Null} eq 'yes') {
            $field = qq[coalesce($field,"#NULL#")];
        }
        push @fields, $field;
    }
    return join ",", @fields; 
}