1 #!/usr/bin/env python3 2 3 # -*- coding: UTF-8 -*- 4 5 from bs4 import BeautifulSoup 6 import operator 7 import os,shutil 8 import re 9 10 def processhtml(item): 11 html_path = item 12 with open(html_path) as fp: 13 soup = BeautifulSoup(fp, "html.parser") 14 return soup 15 16 def IsComputer(soup_arg): 17 soup = soup_arg 18 result = False 19 try: 20 value = soup.find(\'input\', {\'name\':\'资源类型\'}).get(\'value\') 21 if value == \'主机\': 22 print(\'资源类型:主机\') 23 result = True 24 elif value == \'数据库\': 25 print(\'资源类型:数据库\') 26 else: 27 print(\'资源类型:其他\') 28 except: 29 pass 30 return result 31 32 def IsAgree(soup_arg): 33 soup = soup_arg 34 result = False 35 try: 36 for row in soup.findAll(\'tr\'): 37 cells = row.findAll(\'td\') 38 if len(cells) == 4: 39 if cells[1].findChild("font") != None: 40 nStr = "" 41 nStr = nStr.join(cells[0].string) 42 target = [\'帐号管理人员处理\'] 43 if (operator.eq(nStr.split(), target)): 44 print(cells[1].font.string) 45 if (operator.eq(nStr.split(), target) and cells[1].font.string == \'同意\'): 46 print("满足条件为:%s && 审批意见(同意)" % nStr.split()[0]) 47 result = True 48 except IndexError as e: 49 pass 50 return result 51 52 def IsIntersect(soup_arg): 53 soup = soup_arg 54 result = False 55 try: 56 value = soup.find(\'input\', {\'name\':\'239385_资源名称\'}).get(\'value\') 57 temp_list = re.split(\'[、:\n]\', value) 58 hosts_list = [] 59 hosts_list.clear() 60 for hostlist in temp_list: 61 if re.search(\'[a-z]\', hostlist): 62 print(hostlist) 63 hosts_list.append(hostlist) 64 hosts_set = set(hosts_list) 65 if target_hosts.intersection(hosts_set): 66 print(\'非空,有交集\') 67 result = True 68 else: 69 print("空,无交集") 70 except: 71 pass 72 return result 73 74 def IsIntersect2(soup_arg): 75 soup = soup_arg 76 result = False 77 try: 78 value = soup.find(\'input\', {\'name\':\'所在的硬件设备/软件平台\'}).get(\'value\') 79 temp_list = re.split(\'[、:\n]\', value) 80 hosts_list = [] 81 hosts_list.clear() 82 for hostlist in temp_list: 83 if re.search(\'[a-z]\', hostlist): 84 hosts_list.append(hostlist) 85 hosts_set = set(hosts_list) 86 if target_hosts.intersection(hosts_set): 87 print(\'非空,有交集\') 88 result = True 89 else: 90 print("空,无交集") 91 except: 92 pass 93 return result 94 95 if __name__ == \'__main__\': 96 target_hosts = {\'cmszsoaa\', \'cmszsoab\', \'cmszdcss\', \'cmszicss\', \'cmsznpsa\', \'cmsznpsb\', \'cmszinta\', \'cmszintb\', 97 \'cmszdpsa\', \'cmszdpsb\', \'mcbsoaa\', \'mcbsoab\', \'mcbinta\', \'mcbintb\', \'mcbdpsa\', \'mcbdpsb\', 98 \'mcbnpsa\', \'mcbnpsb\', \'mcbdcss\', \'mcbicss\', \'newdcss\', \'newicss\'} 99 100 work_dir = \'/root/XmlOut/\' 101 target_dir = \'/root/AccountOut/\' 102 103 for parent, dirnames, filenames in os.walk(work_dir, followlinks=True): 104 for filename in filenames: 105 file_path = os.path.join(parent, filename) 106 print("filename with full path: %s" % file_path) 107 soup = processhtml(file_path) 108 flag1 = IsComputer(soup) 109 flag2 = IsAgree(soup) 110 flag3 = IsIntersect(soup) 111 flag4 = IsIntersect2(soup) 112 if (flag1 and flag2 and (flag3 or flag4)): 113 print(\'%s, ok----\' % (file_path)) 114 shutil.copy(file_path, target_dir)
相关文章: