woxinfeixiang2015

啥也不说了,直接上源码

#! /usr/bin/env python
#coding=utf-8
import os
import md5
import time

def getmd5( filename ):
  \'\'\'
  形参:文件名
  返回:文件的MD5码
  \'\'\'
  file = open( filename, \'rb\' )
  file_content = file.read(1024*1024)
  file.close()
  m = md5.new( file_content )
  return m.hexdigest()

def delfile(flist_temp):
  \'\'\'
  形参:文件名list
  返回:要删除的文件名list
  \'\'\'
  dellist = []
  flist = []
  fsize = []

  for f in flist_temp:
    if os.path.isfile( f ):
      fsize.append( os.stat(f).st_size )
      flist.append( f )

  for i in range( len(fsize) ):
    for j in range( i+1, len(fsize) ):
      if fsize[i] == fsize[j]:
        if getmd5( flist[i] ) == getmd5( flist[j] ):
          dellist.append( flist[i] )
          break

  return dellist

def main():
  \'\'\'
  删除当前目录下的重复文件(包括子文件夹下的重复文件)
  \'\'\'
  print \'删除当前目录下的重复文件(包括子文件夹下的重复文件)\n\'
  start = time.clock()
  path = os.getcwd()
  list_fn = []
  for i in os.walk(path):
    for fn in i[-1]:
      full_path = os.path.join( i[0],fn )
      list_fn.append( full_path )
  
  list_fn.reverse()
  print \'文件总数:\t\',len( list_fn ),\'\n\'
  delf = delfile( list_fn )
  for f in delf:
    print \'删除\t\',f
    #os.remove( f )
  end = time.clock()
  print \'\n文件总数:\t\',len( list_fn ),\'\n\'
  print \'删除文件:\t\',len( delf ),\'\n\'
  print \'总共用时:\t\',end-start,\'\n\'
  #os.remove(\'delReFile.py\')
  time.sleep(30)
  return 0


if __name__ == \'__main__\':
  main()
View Code

 

结果:

收好不谢

参考:删除目录下相同文件 -> 逐级优化(python实现)http://www.cnblogs.com/ma6174/archive/2012/05/05/2484415.html

分类:

技术点:

相关文章: