Listing 4 compare_counts.pl: Finding missing records
from remote tables
#!/usr/bin/perl -w
# compare_counts
# (C) Giuseppe Maxia 2003
# Compare two tables in two remote databases, looking for missing records.
# If the global count is different, compares the tables group by group
# and returns the missing records.
# It does not find records that are missing in the same number, in both
# tables and in the same group. For such cases, the find_differences script
# must be used.
use strict;
use DBI;
# This script is for educational purposes only.
# Parameters are embedded in the script itself.
# For production, it woyld be better to use either
# a config file or any Getopt::XX modules
my $host1 = "localhost";
my $host2 = "192.168.59.9";
my $db1 = "xcompany";
my $db2 = "xcompany";
my $tablename = "employees";
# primary key of the table
my $PK = "employee_code";
# Grouping column, with eventually a formula to obtain it
# (Used in the field list of a SELECT statement)
my $grouping_col = qq{FLOOR(employee_code / 1204)};
# alias for grouping column, to be used in the GROUP BY clause
my $grouping_alias = "empcode";
my ( $user1, $password1 ) = ( 'myuser', 'secret' );
my ( $user2, $password2 ) = ( 'myuser', 'secret' );
my $dbh1 =
DBI->connect( "DBI:mysql:$db1;" . "host=$host1", $user1, $password1,
{ RaiseError => 1, PrintError => 0 } )
or die "can't connect : $DBI::errstr";
my $dbh2 =
DBI->connect( "DBI:mysql:$db2;" . "host=$host2", $user2, $password2,
{ RaiseError => 1, PrintError => 0 } )
or die "can't connect : $DBI::errstr";
unless ( grep { $_ eq $tablename } $dbh1->tables() ) {
die "$tablename not found in $db1\n";
}
unless ( grep { $_ eq $tablename } $dbh2->tables() ) {
die "$tablename not found in $db2\n";
}
compare_counts();
$dbh1->disconnect();
$dbh2->disconnect();
sub compare_counts {
my $count_query = qq{SELECT COUNT(*) FROM $tablename};
my ($count1) = @{ $dbh1->selectcol_arrayref($count_query) };
my ($count2) = @{ $dbh2->selectcol_arrayref($count_query) };
if ( $count2 == $count1 ) {
print "Table $tablename in $db1 and in $db2 ",
"have the same number of records\n";
return;
}
my $get_counts_query =
qq{SELECT $grouping_col AS $grouping_alias, COUNT(*) FROM $tablename
GROUP BY $grouping_alias };
my $get_PK_query = qq{SELECT $PK FROM $tablename WHERE $grouping_col = ? };
# gets the counts for each range
my $set1 = get_counts( $dbh1, $get_counts_query );
my $set2 = get_counts( $dbh2, $get_counts_query );
my %differences = ();
# finds the groups in server2 missing from server 1
for ( keys %$set2 ) {
if ( not exists $set1->{$_} ) {
my $missing_columns =
get_columns( $dbh2, $get_PK_query, $set2->{$_}->[0] );
print "missing from $db1 (whole group $set2->{$_}->[0] ):",
" @$missing_columns\n";
}
}
# finds the groups in server1 missing from server2
for ( keys %$set1 ) {
my ( $dbh, $db, $columns ) = ( undef, undef, undef );
if ( not exists $set2->{$_} ) {
my $missing_columns =
get_columns( $dbh1, $get_PK_query, $set1->{$_}->[0] );
print "missing from $db2 (whole group $set1->{$_}->[0] ):",
" @$missing_columns\n";
}
# establishes which set is smaller and gets the primary key from
# the lesser one.
elsif ( $set1->{$_}[1] > $set2->{$_}[1] ) {
$columns = get_columns( $dbh2, $get_PK_query, $set2->{$_}->[0] );
$db = $db1;
$dbh = $dbh1;
}
elsif ( $set2->{$_}[1] > $set1->{$_}[1] ) {
$columns = get_columns( $dbh1, $get_PK_query, $set1->{$_}->[0] );
$db = $db2;
$dbh = $dbh2;
}
# if the count is the same for this group, do nothing
else {
next;
}
# gets the missing record from the server with the most
# records in this group by comparing with the list taken
# from the lesser server
my $group = $set1->{$_}->[0];
my $get_missing_PK_query =
qq{SELECT $PK FROM $tablename WHERE $grouping_col = $group
AND $PK NOT IN (}
. join ( ",", map { $dbh->quote($_) } @$columns ) . ")";
my $missing_cols = $dbh->selectcol_arrayref($get_missing_PK_query);
print "missing from ", ( ( $db eq $db1 ) ? $db2 : $db1 ),
" (missing lines): @$missing_cols\n";
}
}
# returns the first column of one query with a specific parameter
sub get_columns {
my ( $dbh, $query, $param ) = @_;
my $sth = $dbh->prepare($query);
$sth->execute($param);
my @columns = map { $_->[0] } @{ $sth->fetchall_arrayref() };
return \@columns;
}
# returns the set of counts converted into a hash
sub get_counts {
my ( $dbh, $query ) = @_;
my $sth = $dbh->prepare($query);
$sth->execute();
my %set = ();
while ( my $row = $sth->fetchrow_arrayref() ) {
$set{ $row->[0] } = [ $row->[0], $row->[1] ];
}
return \%set;
} |