# from aip import AipOcr # import requests # import re # import os # from decimal import Decimal#浮点数精度计算 # from lxml import html#此处直接引入etree报错是因为版本问题,换个方式引入 # etree = html.etree#引入etree方法 # from string import punctuation # # """ 你的 APPID AK SK """ # APP_ID = \'23597797\' # API_KEY = \'Va3onwymweV9htshK13GiNUs\' # SECRET_KEY = \'FiAd8gWb489uDD2yUI7Y1iKaxQUOwqwM\' # client = AipOcr(APP_ID, API_KEY, SECRET_KEY) # # headers={\'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36\'} # source= requests.get(\'http://tj.ziroom.com/x/777120723.html\',headers=headers).text # # print(source) # base=etree.HTML(source).xpath(\'/html/body/section/aside/div[1]/i/@style\') # px=[] # urls1=\'\' # for i in base: # a=\'background-position:-(.*?)px;background-image: url\((.*?)\);\' # demo = re.compile(a) # 将用正则取出的字符串编译为字节代码。 # lists = demo.findall(i) # px.append(lists[0][0]) # urls1=\'http:\'+lists[0][1]#验证图片链接 # print(px,urls1) # #获取超链接图片打码 # url = urls1 # """ 调用通用文字识别, 图片参数为远程url图片 """ # client.basicGeneralUrl(url); # """ 如果有可选参数 """ # options = {} # options["language_type"] = "CHN_ENG" # options["detect_direction"] = "true" # options["detect_language"] = "true" # options["probability"] = "true" # """ 带参数调用通用文字识别, 图片参数为远程url图片 """ # base=client.basicGeneralUrl(url, options) # print(base) # dicts={} # n=0 # b=31.24 # for i in range(10):#生成以图片位置坐标与数字下标对应的键值对 # dicts[str(n)]=i # n=round(n+b,2)#浮点型数据保留两位小数 # print(dicts) # index=[dicts[x] for x in px]#找出与图片获取到的位置相对应的下标 # print(index) # yzm=base[\'words_result\'][0][\'words\']#打码出的内容 # jg=[yzm[x] for x in index ]#通过下标找出打码出的内容中对应的数 # jg=\'\'.join(jg)#合并为字符串 # print(yzm) # print(jg)