#!/usr/bin/perl
use strict;
use File::Spec;
use File::Compare;
use File::Copy;
use File::Basename;
use Cwd;
our $DEBUG=0;
if ($DEBUG){
use Data::Dump qw(dump);
}
our $TMP="DUPFILES";
our $order=0;
our %files=();
our %dirs=();
sub find_dir {
my ($a, $level) = @_;
opendir(DIR, $a) or warn "Can't open directory '$a'";
my @dl=readdir(DIR);
closedir(DIR);
foreach my $d (@dl){
next if $d=~m/^\./o;
next if $d eq $TMP;
my $canon=File::Spec->canonpath(
Cwd::realpath(
File::Spec->catfile($a,$d)
)
);
return if $dirs{$canon}++;
if (-f $canon){
unless (defined($files{$canon})){
$files{$canon}=$level<<8 | $order++;
}
} elsif (-d $canon){
find_dir($canon, $level+1);
}
}
}
foreach my $a (@ARGV){
find_dir($a,0);
}
my @flist=sort {$files{$a} <=> $files{$b}} keys %files;
my %S;
foreach my $f (@flist){
push @{$S{(stat($f))[7]}}, $f;
}
dump(%S) if $DEBUG;
while ( my ($size, $refnames) = each %S ){
my @n=@{$refnames};
my $ns=@n;
for (my $j=0; $j < $ns-1; $j++){
next unless $n[$j];
for (my $i=1+$j; $i < $ns; $i++){
next unless $n[$i];
print "$n[0] : $n[$i]\n" if $DEBUG;;
if (!File::Compare::compare($n[0],$n[$i])){
print "dup: $n[$i] == $n[0]\n";
$n[$i]=undef if myunlink($n[$i]);
}
}
}
}
sub myunlink {
my ($fn, $dir, $suff) = File::Basename::fileparse($_[0]);
my $tmpdir="$dir$TMP";
if (-d $tmpdir || mkdir($tmpdir)){
return File::Copy::move($_[0], $tmpdir);
} else {
warn "mkdir($tmpdir): $!";
}
return 0;
}
__END__
=head1 NAME
dedup - Remove duplicate files from directories list
=head1 SYNOPSIS
dedup.pl DIR1 DIR2 ..
=head1 DESCRIPTION
This utility recursively passes through directories list hierarchy
DIR1, DIR2, .. and leaves only one copy from identical files was found.
Duplicate files are being moved to special directories
DUPFILES for further revision and easy manual removing from
=1= |