js 修改 iframe
it=document.getElementById(\'ueditor_0\').contentWindow.document.getElementsByTagName("body")[0];
it.innerHTML=\'<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>\'
from selenium import webdriver from time import sleep import time from selenium.webdriver.common.keys import Keys import os import requests import time import threading import logging import random start_time = time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())) os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] logf = this_file_name + \'.log\' try: logging.basicConfig(level=logging.INFO, format=\'%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]\', datefmt=\'%a, %d %b %Y %H:%M:%S\', filename=logf, filemode=\'a\') except Exception as e: s = \'%s%s%s\' % (\'logging.basicConfig EXCEPTION \', time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), e) with open(logf, \'a\') as fo: fo.write(s) print(s) os._exit(4002) logging.info(\'START\') img_url = \'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png\' img_dir = \'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\\' def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default=\'default.DONOT_REMOVE.png\'): r = \'%s%s\' % (img_dir, local_default) try: bytes = requests.get(img_url)._content r = \'%s%s%s%s%s\' % ( img_dir, time.strftime(\'%Y%m%d%H%M%S\', time.localtime(time.time())), str(threading.get_ident()), img_url.replace(\'/\', \'_xl_\').replace(\':\', \'_fxl_\').replace(\'?\', \'_fxlquestion_\').replace(\'=\', \'_fxlequal_\').replace( \'&\', \'_fxland_\'), \'.png\') if bytes != 0: with open(r, \'wb\')as f: f.write(bytes) except Exception as e: print(e) return r import pymysql h, pt, u, p, db = \'192.168.2.210\', 3306, \'root\', \'joke_\', \'star_media_helper\' def mysql_fetch(sql, res_type=\'tuple\'): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\') except Exception as e: print(e) return () if res_type == \'dic\': cursor = conn.cursor(pymysql.cursors.DictCursor) else: cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return cursor.fetchall() def mysql_write(sql): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\') except Exception as e: print(e) return 1 cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return 0 import random while True: logging.info(\'LOOP----\') sql = \'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )\' sql = \'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )\' res = mysql_fetch(sql) ac_l = [{\'u\': i[0], \'p\': i[1], \'toutiao_uid\': i[2]} for i in res] for ac in ac_l: myid, mypwd, toutiao_uid = ac[\'u\'], ac[\'p\'], ac[\'toutiao_uid\'] # 发布限制条件逻辑 sql = "SELECT * FROM joke__star_helper_relation_wukong_question WHERE INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); sql = "SELECT * FROM joke__helper_article_publish WHERE INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); print(sql) logging.info(sql) res_content = mysql_fetch(sql, \'dic\') if len(res_content) == 0: continue id_article_list = [i[\'id_article_list\'] for i in res_content] sql = \'SELECT * FROM joke__helper_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke__helper_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; \'.format( \',\'.join([i[\'id_article_list\'] for i in res_content]), toutiao_uid) # sql = \'SELECT * FROM joke__star_helper_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke__star_helper_toutiaouser_wukong_question) LIMIT 1\' logging.info(sql) res_content = mysql_fetch(sql, \'dic\') if len(res_content) == 0: continue browser = webdriver.Chrome() f_url_l = [\'https://www.toutiao.com/group/1589657566362638/\', \'https://www.wukong.com/question/6388670742287876353/\', \'https://www.wukong.com/tag/6215497898671475202/\'] f_url_l += [\'https://www.wukong.com/question/6512777037948649741/\', \'https://www.wukong.com/question/6469247721038414093/\', \'https://www.wukong.com/question/6481502080249889037/\'] # f_url_l = [] f_url_l = [\'https://www.toutiao.com/a6514526304476332552/\', \'https://www.toutiao.com/a6514661446876398088/\', \'https://www.toutiao.com/a6514778729951003150/\'] f_url_l += [\'https://www.toutiao.com/a6514216125151052291/\', \'https://www.toutiao.com/a6512315164463727111/\', \'https://www.toutiao.com/a6513334304318161411/\'] f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)] # browser.get(random.choice(f_url_l)) browser.get(f_url_l_a) time.sleep(random.randint(10, 20)) js = \'window.location.href="https://sso.toutiao.com/login/";\' js = \'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";\' browser.execute_script(js) time.sleep(random.randint(10, 20)) # js = \'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";\' browser.execute_script(js) ac_type = \'qq\' if ac_type == \'qq\': myid, mypwd = ac[\'u\'], ac[\'p\'] xp = \'/html/body/div/div/div[2]/div/div/div/ul/li[3]\' browser.find_element_by_xpath(xp).click() time.sleep(10) js = \'%s%s%s\' % (\'document.getElementById("u").value="\', myid, \'"\') browser.execute_script(js) js = \'%s%s%s\' % (\'document.getElementById("p").value="\', mypwd, \'"\') browser.execute_script(js) time.sleep(random.randint(5, 15)) xp_newpage = \'//*[@id="go"]\' browser.find_element_by_xpath(xp_newpage).click() time.sleep(random.randint(10, 20)) elif ac_type == \'mail_qq\': continue time.sleep(5) browser.refresh() js = \'window.location.href="https://www.toutiao.com/";\' browser.execute_script(js) browser.refresh() time.sleep(6) js = \'window.location.href="https://www.wukong.com/";\' js = \'window.location.href="https://mp.toutiao.com/profile_v2/publish/";\' js = \'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";\' browser.execute_script(js) time.sleep(6) # title js = \'%s%s%s\' % (\'document.getElementById("title").value="\', \'林志玲捐款记录被翻出 单笔高达千万\', \'"\') js = \'document.getElementById("title").value="{}"\'.format(\'林志玲捐款记录被翻出 单笔高达千万\') browser.execute_script(js) time.sleep(2) fhtml, dbhtml_str = \'toutaio.db.html\', \'\' with open(fhtml, \'r\', encoding=\'utf-8\') as fr: for hi in fr: dbhtml_str = \'{}{}\'.format(dbhtml_str, hi.replace(\'\n\', \'\')) db_html = dbhtml_str # db_html = \'<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>\' js = \'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"\'.format( db_html) browser.execute_script(js) time.sleep(2) xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\' xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\' browser.find_element_by_xpath(xp).click() dd = 9 # js = \'document.getElementsByClassName("ask")[0].click();\' # browser.execute_script(js) # time.sleep(12) # time.sleep(random.randint(10, 20)) # # 需要键盘事件 反爬虫 # tmp_target = browser.find_element_by_class_name(\'input-box\').find_element_by_tag_name(\'input\') # tmp_target.send_keys(Keys.SPACE) # tmp_target.send_keys(Keys.CONTROL, \'a\') # tmp_target.send_keys(Keys.CONTROL, \'x\') # tmp_target.send_keys(Keys.CONTROL, \'v\') # tmp_target.send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(10, 20)) # res_content = [] for i in res_content[0:1]: dbid, content, img_list = i[\'id\'], i[\'content\'], i[\'img_list\'] tmp_l = [\'口红\', \'指甲油\', \'护发素\', \'沐浴露\', \'洗手液\', \'洗发水\', \'牙膏\'] tmp_l_1 = [\'老人\', \'小孩\', \'白领\', \'前台妹子\', \'行政妹子\', \'大学生\', \'高中生\'] tmp_l_2 = [\'类型\', \'特质\', \'种类\', \'价位\', \'原材料\', \'主要成分\', \'价格\'] s = \'{}{}{}{}{}{}{}\'.format(str(random.randint(1, 12)), \'月份,\', random.choice(tmp_l_1), \'适合使用什么\', random.choice(tmp_l_2), \'的\', random.choice(tmp_l)) js = \'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";\'.format(s) browser.execute_script(js) time.sleep(12) # # tmp_target.send_keys(Keys.SPACE) js = \'document.getElementsByClassName("step-btn next")[0].click();\' browser.execute_script(js) # step-btn submit js = \'document.getElementsByClassName("step-btn submit")[0].click();\' browser.execute_script(js) time.sleep(12) # js = \'window.location.href="https://www.wukong.com/user/?uid={}&type=1";\'.format(toutiao_uid) browser.execute_script(js) time.sleep(12) res_url = browser.find_element_by_class_name(\'question-title\').find_elements_by_tag_name(\'a\')[ 0].get_attribute(\'href\') # print(i) # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea\' # try: # browser.find_element_by_xpath(xp_newpage) # except Exception as e: # print(e) # break # browser.find_element_by_xpath(xp_newpage).click() # words = content # # Message: SyntaxError: unterminated string literal # mytxt = words.replace(\'\n\', \' \').replace(\'\r\', \' \').replace(\'\\br\', \' \').replace(\'"\', \'“\').replace("\'", \'‘\') # # Message: SyntaxError: missing ; before statement # mytxt = mytxt.replace("\'", \'‘\') # # 2000 头条 # mytxt = mytxt[0:2000] # mytxt = \'好消息\' if len(mytxt.replace(\' \', \'\')) == 0 else mytxt # # # 需要键盘事件 反爬虫 # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'a\') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'x\') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'v\') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(2, 5)) # # try: # # js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', \'\', \'"\') # # browser.execute_script(js) # js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', mytxt, \'"\') # browser.execute_script(js) # time.sleep(3) # except Exception as jse: # print(\'.getElementsByTagName("textarea")--log-\', jse) # continue # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span\' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(3) # try: # upload = browser.find_element_by_id(\'fileElem\') # # logs_img = \'\' # img_url_list = img_list.split(\',\') # # for imgid in img_url_list: # img_url = \'http://192.168.2.212:83/file/get?type=star_helper&id=199\'.replace(\'199\', str(imgid)) # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default=\'default.DONOT_REMOVE.png\') # print(local_img_path) # time.sleep(random.randint(2, 4)) # logs_img += img_url # logs_img += local_img_path # upload.send_keys(local_img_path) # time.sleep(random.randint(3, 7)) # except Exception as ee: # img_url_default = \'\' # img_url = img_url_default # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default=\'default.DONOT_REMOVE.png\') # sleep(2) # logs_img += img_url # logs_img += local_img_path # # upload.send_keys(local_img_path) # logging.exception(ee) # # try: # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/ul\' # browser.find_element_by_xpath(xp_newpage).click() # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a\' # browser.find_element_by_xpath(xp_newpage).click() # # time.sleep(random.randint(8, 20)) # js = \'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"\' # browser.execute_script(js) # # time.sleep(random.randint(2, 5)) # xp_newpage = \'/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a\' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(random.randint(3, 6)) # url_curr = browser.current_url # # with open(\'toutiao_success.log\', \'a\', encoding=\'utf-8\') as f: # logs = \'%s%s%s%s%s\n\' % ( # time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), ac_type, myid[0:4], mytxt, # logs_img) # print(logs) # f.write(logs) sql = \'INSERT INTO joke__helper_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");\' % ( dbid, res_url, int(time.time()), toutiao_uid) mysql_write(sql) print(sql) time.sleep(random.randint(20, 30)) js = \'window.location.href="https://www.wukong.com/"\' js = \'window.location.href="https://www.toutiao.com/"\' browser.execute_script(js) # except Exception as e_url_jump: # print(\'e_url_jump\', e_url_jump) try: browser.quit() except Exception as e1: print(e1) logging.exception(e1) time.sleep(random.randint(120, 300))
xp_newpage = \'//*[@id="title"]\'
mytxt = \'林志玲捐款记录被翻出 单笔高达千万\'
browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
<div class=\'article\' id=\'artibody\'>
<div class=\'img_wrapper\'>
<img alt=\'林志玲\' src=\'http://n.sinaimg.cn/ent/transform/703/w253h450/20180416/77p2-fzcyxmv1344655.jpg\'>
<span class=\'img_descr\'>林志玲</span>
</div>
<div class=\'img_wrapper\'>
<img alt=\'林志玲捐款记录\' src=\'http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/CVWm-fzcyxmv1342897.jpg\'>
<span class=\'img_descr\'>林志玲捐款记录</span>
</div>
<div class=\'img_wrapper\'>
<img alt=\'林志玲捐款记录\' src=\'http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/hXMn-fzcyxmv1342914.jpg\'>
<span class=\'img_descr\'>林志玲捐款记录</span>
</div> <!--video-list-->
<div class=\'video-2017\' id=\'videoList0\'></div>
<!--/video-list-->
<p> 新浪娱乐讯 据台湾媒体报道,林志玲
自出道以来,热心公益,甚至创立了自己的基金会,每年固定发行公益年历。近日明星从事公益的话题发烧,她也被网友挖出,几乎每个月都在转帐捐款,且其中一笔高达1000万人民币,更让网友惊呼连连。
</p>
<div id=\'ad_44124\' class=\'otherContent_01\'
style=\'display: block; margin: 10px 20px 10px 0px; float: left; overflow: hidden; clear: both; padding: 4px; width: 300px; height: 250px;\'>
</div>
<p>
林志玲被网友翻出,2016年至2018年间的捐款纪录,几乎每个月都有记录,且最低都是人民币万元起跳,其中甚至有一笔高达1000万人民币,捐款项目是“筑巢行动”,不少人看到明细,都惊讶表示,原来志玲姐姐私下默默捐了这么多善款,还有人笑称:“她是不是拿着手机,无聊就转帐的那种人?”、“真的人美心也美”、“太圈粉了”、“志玲姐姐真的太低调了”。</p>
<p>
43岁的林志玲1998年出道,从伸展台转战影视圈,尚未出名前就热心公益,更在2011年,主动以个人名义,成立“志玲姐姐慈善基金会”。她也固定每年拍摄公益写真年历,所得全数捐给儿福机构,或是帮助弱势孩童急难救助等,多年从不间断,且义卖期间,她从不公开做宣传,低调行善,受到不少人赞赏。</p>
<p>
林志玲2016年受访曾透露,投入公益的契机,是因为身边罹癌友人的一句话,才让她下定决心。当时这位好友问她:“你希望离开后,怎样被大家记得?”她想了一想,认为既然是公众人物,就应该让大家记得自己微笑的样子,要用这样的身分,做些有影响力的事,从此将公益当做自我赋予的使命,一做就是好多年。ETtoday/文</p>
<p class=\'article-editor\'>(责编:kita)</p>
<div style=\'font-size: 0px; height: 0px; clear: both;\'></div>
</div>
<!-- 非定向300*250按钮 end -->
</div>
from selenium import webdriver
from time import sleep
import time
from selenium.webdriver.common.keys import Keys
import os
import requests
import time
import threading
import logging
import random
start_time = time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
-1]
logf = this_file_name + \'.log\'
try:
logging.basicConfig(level=logging.INFO,
format=\'%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]\',
datefmt=\'%a, %d %b %Y %H:%M:%S\',
filename=logf,
filemode=\'a\')
except Exception as e:
s = \'%s%s%s\' % (\'logging.basicConfig EXCEPTION \', time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), e)
with open(logf, \'a\') as fo:
fo.write(s)
print(s)
os._exit(4002)
logging.info(\'START\')
img_url = \'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png\'
img_dir = \'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\\'
def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default=\'default.DONOT_REMOVE.png\'):
r = \'%s%s\' % (img_dir, local_default)
try:
bytes = requests.get(img_url)._content
r = \'%s%s%s%s%s\' % (
img_dir, time.strftime(\'%Y%m%d%H%M%S\', time.localtime(time.time())), str(threading.get_ident()),
img_url.replace(\'/\', \'_xl_\').replace(\':\', \'_fxl_\').replace(\'?\', \'_fxlquestion_\').replace(\'=\',
\'_fxlequal_\').replace(
\'&\', \'_fxland_\'), \'.png\')
if bytes != 0:
with open(r, \'wb\')as f:
f.write(bytes)
except Exception as e:
print(e)
return r
import pymysql
h, pt, u, p, db = \'192.168.2.210\', 3306, \'root\', \'joke\', \'star_media_joke\'
def mysql_fetch(sql, res_type=\'tuple\'):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\')
except Exception as e:
print(e)
return ()
if res_type == \'dic\':
cursor = conn.cursor(pymysql.cursors.DictCursor)
else:
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return cursor.fetchall()
def mysql_write(sql):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\')
except Exception as e:
print(e)
return 1
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return 0
import random
while True:
logging.info(\'LOOP----\')
sql = \'SELECT username,password,toutiaoid FROM joke_star_joke_joke_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )\'
sql = \'SELECT username,password,toutiaoid FROM joke_star_joke_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )\'
res = mysql_fetch(sql)
ac_l = [{\'u\': i[0], \'p\': i[1], \'toutiao_uid\': i[2]} for i in res]
for ac in ac_l:
myid, mypwd, toutiao_uid = ac[\'u\'], ac[\'p\'], ac[\'toutiao_uid\']
# 发布限制条件逻辑
sql = "SELECT * FROM joke_star_joke_relation_wukong_question WHERE INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={} ORDER BY id DESC; ".format(
toutiao_uid, int(time.time()));
sql = "SELECT * FROM joke_joke_article_publish WHERE INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={} ORDER BY id DESC; ".format(
toutiao_uid, int(time.time()));
print(sql)
logging.info(sql)
res_content = mysql_fetch(sql, \'dic\')
if len(res_content) == 0:
continue
id_article_list = [i[\'id_article_list\'] for i in res_content]
sql = \'SELECT * FROM joke_joke_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke_joke_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; \'.format(
\',\'.join([i[\'id_article_list\'] for i in res_content]), toutiao_uid)
# sql = \'SELECT * FROM joke_star_joke_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke_star_joke_toutiaouser_wukong_question) LIMIT 1\'
logging.info(sql)
res_content = mysql_fetch(sql, \'dic\')
if len(res_content) == 0:
continue
browser = webdriver.Chrome()
f_url_l = [\'https://www.toutiao.com/group/1589657566362638/\',
\'https://www.wukong.com/question/6388670742287876353/\',
\'https://www.wukong.com/tag/6215497898671475202/\']
f_url_l += [\'https://www.wukong.com/question/6512777037948649741/\',
\'https://www.wukong.com/question/6469247721038414093/\',
\'https://www.wukong.com/question/6481502080249889037/\']
# f_url_l = []
f_url_l = [\'https://www.toutiao.com/a6514526304476332552/\', \'https://www.toutiao.com/a6514778729951003150/\']
f_url_l += [\'https://www.toutiao.com/a6514216125151052291/\', \'https://www.toutiao.com/a6512315164463727111/\',
\'https://www.toutiao.com/a6513334304318161411/\']
f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
# browser.get(random.choice(f_url_l))
browser.get(f_url_l_a)
time.sleep(random.randint(10, 20))
js = \'window.location.href="https://sso.toutiao.com/login/";\'
js = \'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";\'
browser.execute_script(js)
time.sleep(random.randint(10, 20))
# js = \'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";\'
browser.execute_script(js)
ac_type = \'qq\'
if ac_type == \'qq\':
myid, mypwd = ac[\'u\'], ac[\'p\']
xp = \'/html/body/div/div/div[2]/div/div/div/ul/li[3]\'
browser.find_element_by_xpath(xp).click()
time.sleep(10)
js = \'%s%s%s\' % (\'document.getElementById("u").value="\', myid, \'"\')
browser.execute_script(js)
js = \'%s%s%s\' % (\'document.getElementById("p").value="\', mypwd, \'"\')
browser.execute_script(js)
time.sleep(random.randint(5, 15))
xp_newpage = \'//*[@id="go"]\'
browser.find_element_by_xpath(xp_newpage).click()
time.sleep(random.randint(10, 20))
elif ac_type == \'mail_qq\':
continue
time.sleep(5)
browser.refresh()
js = \'window.location.href="https://www.toutiao.com/";\'
browser.execute_script(js)
browser.refresh()
time.sleep(6)
js = \'window.location.href="https://www.wukong.com/";\'
js = \'window.location.href="https://mp.toutiao.com/profile_v2/publish/";\'
js = \'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";\'
browser.execute_script(js)
time.sleep(6)
# title
# js = \'%s%s%s\' % (\'document.getElementById("title").value="\', \'林志玲捐款记录被翻出 单笔高达千万\', \'"\')
# js = \'document.getElementById("title").value="{}"\'.format(\'林志玲捐款记录被翻出 单笔高达千万\')
# browser.execute_script(js)
xp_newpage = \'//*[@id="title"]\'
mytxt = \'林志玲捐款记录被翻出 单笔高达千万\'
browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
time.sleep(2)
fhtml, dbhtml_str = \'toutaio.db.html\', \'\'
with open(fhtml, \'r\', encoding=\'utf-8\') as fr:
for hi in fr:
dbhtml_str = \'{}{}\'.format(dbhtml_str, hi.replace(\'\n\', \'\'))
xp = \'//*[@id="edui18_body"]/div[1]\'
# //*[@id="edui18_body"]/div[1]
browser.find_element_by_xpath(xp).click()
time.sleep(2)
# //*[@id="images"]/div[1]/div
xp = \'//*[@id="images"]/div[1]/div\'
xp = \'//*[@id="images"]/div[1]/div/span\'
browser.find_element_by_xpath(xp).click()
time.sleep(1)
db_html = dbhtml_str
# db_html = \'<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>\'
js = \'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"\'.format(
db_html)
browser.execute_script(js)
xp = \'//*[@id="edui18_body"]/div[1]\'
# //*[@id="edui18_body"]/div[1]
browser.find_element_by_xpath(xp).click()
time.sleep(2)
# //*[@id="images"]/div[1]/div
xp = \'//*[@id="images"]/div[1]/div\'
xp = \'//*[@id="images"]/div[1]/div/span\'
browser.find_element_by_xpath(xp).click()
time.sleep(1)
# xp=\'//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[2]/div[1]/div[2]/i\'
# browser.find_element_by_xpath(xp)
# xp=\'//*[@id="pgc-text-img"]/div/div[1]/div[1]\'
# browser.find_element_by_xpath(xp)
xp = \'//*[@id="pgc-text-img"]/div/div[2]/div/button[1]\'
browser.find_element_by_xpath(xp)
xp = \'//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div/div/label[3]/div/input\'
browser.find_element_by_xpath(xp)
time.sleep(2)
time.sleep(2)
xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\'
xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\'
browser.find_element_by_xpath(xp).click()
dd = 9
# js = \'document.getElementsByClassName("ask")[0].click();\'
# browser.execute_script(js)
# time.sleep(12)
# time.sleep(random.randint(10, 20))
# # 需要键盘事件 反爬虫
# tmp_target = browser.find_element_by_class_name(\'input-box\').find_element_by_tag_name(\'input\')
# tmp_target.send_keys(Keys.SPACE)
# tmp_target.send_keys(Keys.CONTROL, \'a\')
# tmp_target.send_keys(Keys.CONTROL, \'x\')
# tmp_target.send_keys(Keys.CONTROL, \'v\')
# tmp_target.send_keys(Keys.BACK_SPACE)
# time.sleep(random.randint(10, 20))
# res_content = []
for i in res_content[0:1]:
dbid, content, img_list = i[\'id\'], i[\'content\'], i[\'img_list\']
tmp_l = [\'口红\', \'指甲油\', \'护发素\', \'沐浴露\', \'洗手液\', \'洗发水\', \'牙膏\']
tmp_l_1 = [\'老人\', \'小孩\', \'白领\', \'前台妹子\', \'行政妹子\', \'大学生\', \'高中生\']
tmp_l_2 = [\'类型\', \'特质\', \'种类\', \'价位\', \'原材料\', \'主要成分\', \'价格\']
s = \'{}{}{}{}{}{}{}\'.format(str(random.randint(1, 12)), \'月份,\', random.choice(tmp_l_1), \'适合使用什么\',
random.choice(tmp_l_2), \'的\', random.choice(tmp_l))
js = \'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";\'.format(s)
browser.execute_script(js)
time.sleep(12)
#
# tmp_target.send_keys(Keys.SPACE)
js = \'document.getElementsByClassName("step-btn next")[0].click();\'
browser.execute_script(js)
# step-btn submit
js = \'document.getElementsByClassName("step-btn submit")[0].click();\'
browser.execute_script(js)
time.sleep(12)
#
js = \'window.location.href="https://www.wukong.com/user/?uid={}&type=1";\'.format(toutiao_uid)
browser.execute_script(js)
time.sleep(12)
res_url = browser.find_element_by_class_name(\'question-title\').find_elements_by_tag_name(\'a\')[
0].get_attribute(\'href\')
# print(i)
# xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea\'
# try:
# browser.find_element_by_xpath(xp_newpage)
# except Exception as e:
# print(e)
# break
# browser.find_element_by_xpath(xp_newpage).click()
# words = content
# # Message: SyntaxError: unterminated string literal
# mytxt = words.replace(\'\n\', \' \').replace(\'\r\', \' \').replace(\'\\br\', \' \').replace(\'"\', \'“\').replace("\'", \'‘\')
# # Message: SyntaxError: missing ; before statement
# mytxt = mytxt.replace("\'", \'‘\')
# # 2000 头条
# mytxt = mytxt[0:2000]
# mytxt = \'好消息\' if len(mytxt.replace(\' \', \'\')) == 0 else mytxt
#
# # 需要键盘事件 反爬虫
# browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'a\')
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'x\')
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'v\')
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
# time.sleep(random.randint(2, 5))
#
# try:
# # js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', \'\', \'"\')
# # browser.execute_script(js)
# js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', mytxt, \'"\')
# browser.execute_script(js)
# time.sleep(3)
# except Exception as jse:
# print(\'.getElementsByTagName("textarea")--log-\', jse)
# continue
#
# browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
# xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span\'
# browser.find_element_by_xpath(xp_newpage).click()
# time.sleep(3)
# try:
# upload = browser.find_element_by_id(\'fileElem\')
#
# logs_img = \'\'
# img_url_list = img_list.split(\',\')
#
# for imgid in img_url_list:
# img_url = \'http://192.168.2.212:83/file/get?type=star_joke&id=199\'.replace(\'199\', str(imgid))
# local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
# local_default=\'default.DONOT_REMOVE.png\')
# print(local_img_path)
# time.sleep(random.randint(2, 4))
# logs_img += img_url
# logs_img += local_img_path
# upload.send_keys(local_img_path)
# time.sleep(random.randint(3, 7))
# except Exception as ee:
# img_url_default = \'\'
# img_url = img_url_default
# local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
# local_default=\'default.DONOT_REMOVE.png\')
# sleep(2)
# logs_img += img_url
# logs_img += local_img_path
# # upload.send_keys(local_img_path)
# logging.exception(ee)
#
# try:
# xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/ul\'
# browser.find_element_by_xpath(xp_newpage).click()
# xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a\'
# browser.find_element_by_xpath(xp_newpage).click()
#
# time.sleep(random.randint(8, 20))
# js = \'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"\'
# browser.execute_script(js)
#
# time.sleep(random.randint(2, 5))
# xp_newpage = \'/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a\'
# browser.find_element_by_xpath(xp_newpage).click()
# time.sleep(random.randint(3, 6))
# url_curr = browser.current_url
#
# with open(\'toutiao_success.log\', \'a\', encoding=\'utf-8\') as f:
# logs = \'%s%s%s%s%s\n\' % (
# time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
# logs_img)
# print(logs)
# f.write(logs)
sql = \'INSERT INTO joke_joke_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");\' % (
dbid, res_url, int(time.time()), toutiao_uid)
mysql_write(sql)
print(sql)
time.sleep(random.randint(20, 30))
js = \'window.location.href="https://www.wukong.com/"\'
js = \'window.location.href="https://www.toutiao.com/"\'
browser.execute_script(js)
# except Exception as e_url_jump:
# print(\'e_url_jump\', e_url_jump)
try:
browser.quit()
except Exception as e1:
print(e1)
logging.exception(e1)
time.sleep(random.randint(120, 300))
<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" alt="pgc-image/152385934210854ceb909ec" _src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" buttonadded="true">
\'\'\'
<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true">
\'\'\'
dbhtml_str_ = dbhtml_str
img_n = dbhtml_str_.count(\'<img\')
s = \'<img onload="editor.fireEvent(\\'contentchange\\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">\'
s = \'<img onload="editor.fireEvent(\\'contentchange\\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">\'
# s = "<img onload=\'editor.fireEvent(\\'contentchange\\')\' src=\'https://p1.pstatp.com/large/pgc-image/TTimgCode\' _src=\'https://p1.pstatp.com/large/pgc-image/TTimgCode\' alt=\'pgc-image/TTimgCode\' buttonadded=\'true\'>"
ss = \'\'
l = pgc_img_url_l_toutiao
for i in range(img_n):
if i == 0:
p1 = dbhtml_str.index(\'<img \', 0)
else:
p1 = dbhtml_str.index(\'<img \', p1 + 3)
tmp = \'{}{}\'.format(dbhtml_str[0:p1].replace(\'>\', \'X\'), dbhtml_str[p1:])
p2 = tmp.index(\'>\')
ss = s.replace(\'TTimgCode\', l[0].split(\'/\')[-1])
dbhtml_str = \'{}{}{}\'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:])
del l[0]
print(\'-----------------\')
print(dbhtml_str)
time.sleep(2)
js = \'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";\'
browser.execute_script(js)
time.sleep(6)
xp_newpage = \'//*[@id="title"]\'
mytxt = d[\'title\']
browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
time.sleep(2)
# SAVE NOT DEL
"""
xp = \'//*[@id="edui18_body"]/div[1]\'
# //*[@id="edui18_body"]/div[1]
browser.find_element_by_xpath(xp).click()
time.sleep(2)
xp = \'//*[@id="images"]/div[1]/div/span\'
browser.find_element_by_xpath(xp).click()
time.sleep(3)
\'\'\'
\'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"\'.format(dbhtml_str.replace(\'onload="editor.fireEvent(\\'contentchange\\')"\',\'\').replace(\'"\',"\'").replace(\'\n\',\'\'))
\'\'\'
# 结合浏览器控制台,拼接符合语法的js字符串
r_d = {\'onload="editor.fireEvent(\\'contentchange\\')"\': \'\', \'"\': "\'", \'\n\': \'\'}
dbhtml_str_py_js = dbhtml_str
for k in r_d:
dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
dbhtml_str_py_js = dbhtml_str_py_js.replace(\'nbsp;\', \' \')
js = \'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"\'.format(
dbhtml_str_py_js)
browser.execute_script(js)
"""
# 激活编辑区
browser.find_element_by_class_name(\'ql-container\').click()
# 进行上传图片圆圈操作
browser.find_element_by_class_name(\'icon-pic_tool\').click()
# 激活目标上传口
browser.find_element_by_class_name(\'tui-tab-list\').find_elements_by_class_name(\'tui-tab\')[-1].click()
# 关闭上传口
browser.find_element_by_class_name(\'tui-tab-panel-active\').find_elements_by_class_name(\'tui-btn\')[
-1].click()
# 结合浏览器控制台,拼接符合语法的js字符串
r_d = {\'onload="editor.fireEvent(\\'contentchange\\')"\': \'\', \'"\': "\'", \'\n\': \'\'}
dbhtml_str_py_js = dbhtml_str
for k in r_d:
dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
dbhtml_str_py_js = dbhtml_str_py_js.replace(\'nbsp;\', \' \')
#传入键盘化的html
# document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML = "44"
js = \'document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML ="{}"\'.format(
dbhtml_str_py_js)
browser.execute_script(js)