这里的正则表达式是错误的工具我没有你的文件,所以我创建了随机演示数据:
import random
import string
random.seed(42) # make random repeatable
def generateFileNames(amount):
"""Generate 2*amount of names XX_XXXX with X in [a-zA-T0-9] with duplicates in it"""
def rndName():
"""generate one random name XX_XXXX with X in [a-zA-T0-9]"""
characters = string.ascii_lowercase + string.digits
return random.choices(characters,k=2)+['_']+random.choices(characters,k=4)
for _ in range(amount): # create 2*amount names, some duplicates
name = rndName()
yield ''.join(name) # yield name once
if random.randint(1,10) > 3: # more likely to get same names twice
yield ''.join(name) # same name twice
else:
yield ''.join(rndName()) # different 2nd name
def generateNumberParts(amount):
"""Generate 2*amount of 6-digit-strings, some with 17+18 as starting numbers"""
def rndNums(nr):
"""Generate nr digits as string list"""
return random.choices(string.digits,k=nr)
for _ in range(amount):
choi = rndNums(4)
# i am yielding 18 first to demonstrate that sorting later works
yield ''.join(['18']+choi) # 18xxxx numbers
if random.randint(1,10) > 5:
yield ''.join(['17']+choi) # 17xxxx
else:
yield ''.join(rndNums(6)) # make it something other
# half the amount of files generated
m = 10
# generate filenames
filenames = [''.join(x)+'.xml' for x in zip(generateFileNames(m),
generateNumberParts(m)]
现在我有我的名字作为列表,可以开始找出哪些是具有较新时间戳的骗子:
# make a dict out of your filenames, use first 7 as key
# with list of values of files starting with this key a values:
fileDict={}
for names in filenames:
fileDict.setdefault(names[0:7],[]).append(names) # create key=[] or/and append names
for k,v in fileDict.items():
print (k, " " , v)
# get files to delete (all the lower nr of the value-list if multiple in it)
filesToDelete = []
for k,v in fileDict.items():
if len(v) == 1: # nothing to do, its only 1 file
continue
print(v, " to ", end = "" ) # debugging output
v.sort(key = lambda x: int(x[7:9])) # sort by a lambda that integerfies 17/18
print (v) # debugging output
filesToDelete.extend(v[:-1]) # add all but the last file to the delete list
print("")
print(filesToDelete)
输出:
# the created filenames in your dict by "key [values]"
xa_ji0y ['xa_ji0y188040.xml', 'xa_ji0y501652.xml']
v3_a3zm ['v3_a3zm181930.xml']
mm_jbqe ['mm_jbqe171930.xml']
ck_w5ng ['ck_w5ng180679.xml', 'ck_w5ng348136.xml']
zy_cwti ['zy_cwti184296.xml', 'zy_cwti174296.xml']
41_iblj ['41_iblj182983.xml', '41_iblj172983.xml']
5x_ff0t ['5x_ff0t187453.xml']
sd_bdw2 ['sd_bdw2177453.xml']
vn_vqjt ['vn_vqjt189618.xml', 'vn_vqjt179618.xml']
ep_q85j ['ep_q85j185198.xml', 'ep_q85j175198.xml']
vf_1t2t ['vf_1t2t180309.xml', 'vf_1t2t089040.xml']
11_ertj ['11_ertj188425.xml', '11_ertj363842.xml']
# sorting the names by its integer at 8/9 position of name
['xa_ji0y188040.xml','xa_ji0y501652.xml'] to ['xa_ji0y188040.xml','xa_ji0y501652.xml']
['ck_w5ng180679.xml','ck_w5ng348136.xml'] to ['ck_w5ng180679.xml','ck_w5ng348136.xml']
['zy_cwti184296.xml','zy_cwti174296.xml'] to ['zy_cwti174296.xml','zy_cwti184296.xml']
['41_iblj182983.xml','41_iblj172983.xml'] to ['41_iblj172983.xml','41_iblj182983.xml']
['vn_vqjt189618.xml','vn_vqjt179618.xml'] to ['vn_vqjt179618.xml','vn_vqjt189618.xml']
['ep_q85j185198.xml','ep_q85j175198.xml'] to ['ep_q85j175198.xml','ep_q85j185198.xml']
['vf_1t2t180309.xml','vf_1t2t089040.xml'] to ['vf_1t2t089040.xml','vf_1t2t180309.xml']
['11_ertj188425.xml','11_ertj363842.xml'] to ['11_ertj188425.xml','11_ertj363842.xml']
# list of files to delete
['xa_ji0y188040.xml', 'ck_w5ng180679.xml', 'zy_cwti174296.xml', '41_iblj172983.xml',
'vn_vqjt179618.xml', 'ep_q85j175198.xml', 'vf_1t2t089040.xml', '11_ertj188425.xml']