PROXY  WHOIS  RQUOTE  TEXTS  SOFT  FOREX  BBOARD
 Music  Philosophy  Code  Literature  Russian

= ROOT|Technical|Code_Examples|Perl|dedup.pl =

page 1 of 2



#!/usr/bin/perl

use strict;
use File::Spec;
use File::Compare;
use File::Copy;
use File::Basename;
use Cwd;

our $DEBUG=0;
if ($DEBUG){
 use Data::Dump qw(dump);
}

our $TMP="DUPFILES";
our $order=0;
our %files=();
our %dirs=();

sub find_dir {
 my ($a, $level) = @_;
 opendir(DIR, $a) or warn "Can't open directory '$a'";
 my @dl=readdir(DIR);
 closedir(DIR);
 foreach my $d (@dl){
  next if $d=~m/^\./o;
  next if $d eq $TMP;
  my $canon=File::Spec->canonpath(
   Cwd::realpath(
    File::Spec->catfile($a,$d)
   )
  );
  return if $dirs{$canon}++;
  if (-f $canon){
   unless (defined($files{$canon})){
    $files{$canon}=$level<<8 | $order++;
   }
  } elsif (-d $canon){
   find_dir($canon, $level+1);
  }
 }
}


foreach my $a (@ARGV){
 find_dir($a,0);
}

my @flist=sort {$files{$a} <=> $files{$b}} keys %files;
my %S;

foreach my $f (@flist){
 push @{$S{(stat($f))[7]}}, $f;
}

dump(%S) if $DEBUG;

while ( my ($size, $refnames) = each %S ){
 my @n=@{$refnames};
 my $ns=@n;
 for (my $j=0; $j < $ns-1; $j++){
  next unless $n[$j];
  for (my $i=1+$j; $i < $ns; $i++){
   next unless $n[$i];
   print "$n[0] : $n[$i]\n" if $DEBUG;;
   if (!File::Compare::compare($n[0],$n[$i])){
    print "dup: $n[$i] == $n[0]\n";
    $n[$i]=undef if myunlink($n[$i]);
   }
  }
 }
}

sub myunlink {
 my ($fn, $dir, $suff) = File::Basename::fileparse($_[0]);
 my $tmpdir="$dir$TMP";
 if (-d $tmpdir || mkdir($tmpdir)){
  return File::Copy::move($_[0], $tmpdir);
 } else {
  warn "mkdir($tmpdir): $!";
 }
 return 0;
}

__END__

=head1 NAME

dedup - Remove duplicate files from directories list

=head1 SYNOPSIS

    dedup.pl DIR1 DIR2 ..

=head1 DESCRIPTION

  This utility recursively passes through directories list hierarchy
  DIR1, DIR2, .. and leaves only one copy from identical files was found.
  Duplicate files are being moved to special directories
  DUPFILES for further revision and easy manual removing from
=1=

= PAGE 1 = NEXT > |2

UP TO ROOT | UP TO DIR

Google
 


E-mail Facebook Google Digg del.icio.us BlinkList Fark Furl Ma.gnolia Netscape NewsVine Reddit Slashdot Spurl StumbleUpon Technorati YahooMyWeb LiveJournal Blogmarks TwitThis Live News2.ru BobrDobr.ru Memori.ru MoeMesto.ru

0.0197241 wallclock secs ( 0.01 usr + 0.00 sys = 0.01 CPU)