Search a directory tree for all files with same content


 hashmap = {} # content signature -> list of matching filenames  
 for path, dirs, files in os.walk('.'): # walk current directory  
   for filename in files:  
     fullname = os.path.join(path, filename)  
     contents = open(fullname).read()  
     h = hashlib.md5(contents).hexdigest()  
     hashmap.setdefault(h,[]).append(fullname)  
 pprint.pprint(hashmap)  

Σχόλια

Δημοφιλείς αναρτήσεις