donggongdechen
  1 #!/usr/bin/env python3
  2 
  3 # -*- coding: UTF-8 -*-
  4 
  5 from bs4 import BeautifulSoup
  6 import operator
  7 import os,shutil
  8 import re
  9 
 10 def processhtml(item):
 11   html_path = item
 12   with open(html_path) as fp:
 13     soup = BeautifulSoup(fp, "html.parser")
 14   return soup
 15 
 16 def IsComputer(soup_arg):
 17   soup = soup_arg
 18   result = False
 19   try:
 20     value = soup.find(\'input\', {\'name\':\'资源类型\'}).get(\'value\')
 21     if value == \'主机\':
 22       print(\'资源类型:主机\')
 23       result = True
 24     elif value == \'数据库\':
 25       print(\'资源类型:数据库\')
 26     else:
 27       print(\'资源类型:其他\')
 28   except:
 29     pass
 30   return result
 31 
 32 def IsAgree(soup_arg):
 33   soup = soup_arg
 34   result = False
 35   try:
 36     for row in soup.findAll(\'tr\'):
 37       cells = row.findAll(\'td\')
 38       if len(cells) == 4:
 39         if cells[1].findChild("font") != None:
 40           nStr = ""
 41           nStr = nStr.join(cells[0].string)
 42           target = [\'帐号管理人员处理\']
 43           if (operator.eq(nStr.split(), target)):
 44             print(cells[1].font.string)
 45           if (operator.eq(nStr.split(), target) and cells[1].font.string == \'同意\'):
 46             print("满足条件为:%s && 审批意见(同意)" % nStr.split()[0])
 47             result = True
 48   except IndexError as e:
 49     pass
 50   return result
 51 
 52 def IsIntersect(soup_arg):
 53   soup = soup_arg
 54   result = False
 55   try:
 56     value = soup.find(\'input\', {\'name\':\'239385_资源名称\'}).get(\'value\')
 57     temp_list = re.split(\'[、:\n]\', value)
 58     hosts_list = []
 59     hosts_list.clear()
 60     for hostlist in temp_list:
 61       if re.search(\'[a-z]\', hostlist):
 62         print(hostlist)
 63         hosts_list.append(hostlist)
 64     hosts_set = set(hosts_list)
 65     if target_hosts.intersection(hosts_set):
 66       print(\'非空,有交集\')
 67       result = True
 68     else:
 69       print("空,无交集")
 70   except:
 71     pass
 72   return result
 73 
 74 def IsIntersect2(soup_arg):
 75   soup = soup_arg
 76   result = False
 77   try:
 78     value = soup.find(\'input\', {\'name\':\'所在的硬件设备/软件平台\'}).get(\'value\')
 79     temp_list = re.split(\'[、:\n]\', value)
 80     hosts_list = []
 81     hosts_list.clear()
 82     for hostlist in temp_list:
 83       if re.search(\'[a-z]\', hostlist):
 84         hosts_list.append(hostlist)
 85     hosts_set = set(hosts_list)
 86     if target_hosts.intersection(hosts_set):
 87       print(\'非空,有交集\')
 88       result = True
 89     else:
 90       print("空,无交集")
 91   except:
 92     pass
 93   return result
 94 
 95 if __name__ == \'__main__\':
 96   target_hosts = {\'cmszsoaa\', \'cmszsoab\', \'cmszdcss\', \'cmszicss\', \'cmsznpsa\', \'cmsznpsb\', \'cmszinta\', \'cmszintb\',
 97           \'cmszdpsa\', \'cmszdpsb\', \'mcbsoaa\', \'mcbsoab\', \'mcbinta\', \'mcbintb\', \'mcbdpsa\', \'mcbdpsb\',
 98           \'mcbnpsa\', \'mcbnpsb\', \'mcbdcss\', \'mcbicss\', \'newdcss\', \'newicss\'}
 99 
100   work_dir = \'/root/XmlOut/\'
101   target_dir = \'/root/AccountOut/\'
102 
103   for parent, dirnames, filenames in os.walk(work_dir, followlinks=True):
104     for filename in filenames:
105       file_path = os.path.join(parent, filename)
106       print("filename with full path: %s" % file_path)
107       soup = processhtml(file_path)
108       flag1 = IsComputer(soup)
109       flag2 = IsAgree(soup)
110       flag3 = IsIntersect(soup)
111       flag4 = IsIntersect2(soup)
112       if (flag1 and flag2 and (flag3 or flag4)):
113         print(\'%s, ok----\' % (file_path))
114         shutil.copy(file_path, target_dir)

分类:

技术点:

相关文章: