rsapaper

 

 

 

 

 

 

 

js  修改  iframe

 

 

 

it=document.getElementById(\'ueditor_0\').contentWindow.document.getElementsByTagName("body")[0];

it.innerHTML=\'<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>\'

 

 

 

from selenium import webdriver
from  time import sleep
import time
from selenium.webdriver.common.keys import Keys
import os

import requests
import time
import threading
import logging
import random

start_time = time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
    -1]
logf = this_file_name + \'.log\'
try:
    logging.basicConfig(level=logging.INFO,
                        format=\'%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]\',
                        datefmt=\'%a, %d %b %Y %H:%M:%S\',
                        filename=logf,
                        filemode=\'a\')
except Exception as e:
    s = \'%s%s%s\' % (\'logging.basicConfig EXCEPTION \', time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), e)
    with open(logf, \'a\') as fo:
        fo.write(s)
        print(s)
        os._exit(4002)

logging.info(\'START\')

img_url = \'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png\'
img_dir = \'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\\'


def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default=\'default.DONOT_REMOVE.png\'):
    r = \'%s%s\' % (img_dir, local_default)
    try:
        bytes = requests.get(img_url)._content
        r = \'%s%s%s%s%s\' % (
            img_dir, time.strftime(\'%Y%m%d%H%M%S\', time.localtime(time.time())), str(threading.get_ident()),
            img_url.replace(\'/\', \'_xl_\').replace(\':\', \'_fxl_\').replace(\'?\', \'_fxlquestion_\').replace(\'=\',
                                                                                                     \'_fxlequal_\').replace(
                \'&\', \'_fxland_\'), \'.png\')
        if bytes != 0:
            with open(r, \'wb\')as f:
                f.write(bytes)
    except Exception as e:
        print(e)
    return r


import pymysql

h, pt, u, p, db = \'192.168.2.210\', 3306, \'root\', \'joke_\', \'star_media_helper\'


def mysql_fetch(sql, res_type=\'tuple\'):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\')
    except Exception as e:
        print(e)
        return ()
    if res_type == \'dic\':
        cursor = conn.cursor(pymysql.cursors.DictCursor)
    else:

        cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return cursor.fetchall()


def mysql_write(sql):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\')
    except Exception as e:
        print(e)
        return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0


import random

while True:
    logging.info(\'LOOP----\')
    sql = \'SELECT username,password,toutiaoid  FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )\'
    sql = \'SELECT username,password,toutiaoid  FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )\'
    res = mysql_fetch(sql)
    ac_l = [{\'u\': i[0], \'p\': i[1], \'toutiao_uid\': i[2]} for i in res]
    for ac in ac_l:
        myid, mypwd, toutiao_uid = ac[\'u\'], ac[\'p\'], ac[\'toutiao_uid\']
        # 发布限制条件逻辑
        sql = "SELECT * FROM joke__star_helper_relation_wukong_question  WHERE  INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        sql = "SELECT * FROM joke__helper_article_publish  WHERE  INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        print(sql)
        logging.info(sql)
        res_content = mysql_fetch(sql, \'dic\')
        if len(res_content) == 0:
            continue
        id_article_list = [i[\'id_article_list\'] for i in res_content]

        sql = \'SELECT * FROM joke__helper_article WHERE id IN ({}) AND id  NOT IN (SELECT article_id FROM  joke__helper_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; \'.format(
            \',\'.join([i[\'id_article_list\'] for i in res_content]), toutiao_uid)
        # sql = \'SELECT * FROM joke__star_helper_wukong_question WHERE id  NOT IN (SELECT toutiao_uid FROM  joke__star_helper_toutiaouser_wukong_question) LIMIT 1\'
        logging.info(sql)
        res_content = mysql_fetch(sql, \'dic\')
        if len(res_content) == 0:
            continue

        browser = webdriver.Chrome()
        f_url_l = [\'https://www.toutiao.com/group/1589657566362638/\',
                   \'https://www.wukong.com/question/6388670742287876353/\',
                   \'https://www.wukong.com/tag/6215497898671475202/\']
        f_url_l += [\'https://www.wukong.com/question/6512777037948649741/\',
                    \'https://www.wukong.com/question/6469247721038414093/\',
                    \'https://www.wukong.com/question/6481502080249889037/\']
        # f_url_l = []
        f_url_l = [\'https://www.toutiao.com/a6514526304476332552/\', \'https://www.toutiao.com/a6514661446876398088/\',
                   \'https://www.toutiao.com/a6514778729951003150/\']
        f_url_l += [\'https://www.toutiao.com/a6514216125151052291/\', \'https://www.toutiao.com/a6512315164463727111/\',
                    \'https://www.toutiao.com/a6513334304318161411/\']
        f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
        # browser.get(random.choice(f_url_l))
        browser.get(f_url_l_a)
        time.sleep(random.randint(10, 20))

        js = \'window.location.href="https://sso.toutiao.com/login/";\'
        js = \'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";\'
        browser.execute_script(js)
        time.sleep(random.randint(10, 20))

        #  js = \'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";\'
        browser.execute_script(js)

        ac_type = \'qq\'
        if ac_type == \'qq\':
            myid, mypwd = ac[\'u\'], ac[\'p\']
            xp = \'/html/body/div/div/div[2]/div/div/div/ul/li[3]\'
            browser.find_element_by_xpath(xp).click()
            time.sleep(10)
            js = \'%s%s%s\' % (\'document.getElementById("u").value="\', myid, \'"\')
            browser.execute_script(js)
            js = \'%s%s%s\' % (\'document.getElementById("p").value="\', mypwd, \'"\')
            browser.execute_script(js)
            time.sleep(random.randint(5, 15))
            xp_newpage = \'//*[@id="go"]\'
            browser.find_element_by_xpath(xp_newpage).click()
            time.sleep(random.randint(10, 20))
        elif ac_type == \'mail_qq\':
            continue

        time.sleep(5)

        browser.refresh()
        js = \'window.location.href="https://www.toutiao.com/";\'
        browser.execute_script(js)
        browser.refresh()

        time.sleep(6)

        js = \'window.location.href="https://www.wukong.com/";\'
        js = \'window.location.href="https://mp.toutiao.com/profile_v2/publish/";\'
        js = \'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";\'
        browser.execute_script(js)

        time.sleep(6)

        # title
        js = \'%s%s%s\' % (\'document.getElementById("title").value="\', \'林志玲捐款记录被翻出 单笔高达千万\', \'"\')
        js = \'document.getElementById("title").value="{}"\'.format(\'林志玲捐款记录被翻出 单笔高达千万\')

        browser.execute_script(js)
        time.sleep(2)

        fhtml, dbhtml_str = \'toutaio.db.html\', \'\'
        with open(fhtml, \'r\', encoding=\'utf-8\') as fr:
            for hi in fr:
                dbhtml_str = \'{}{}\'.format(dbhtml_str, hi.replace(\'\n\', \'\'))

        db_html = dbhtml_str
      #  db_html = \'<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>\'
        js = \'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"\'.format(
            db_html)
        browser.execute_script(js)
        time.sleep(2)

        xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\'
        xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\'
        browser.find_element_by_xpath(xp).click()
        dd = 9
        # js = \'document.getElementsByClassName("ask")[0].click();\'
        # browser.execute_script(js)
        # time.sleep(12)

        # time.sleep(random.randint(10, 20))
        # # 需要键盘事件 反爬虫
        # tmp_target = browser.find_element_by_class_name(\'input-box\').find_element_by_tag_name(\'input\')
        # tmp_target.send_keys(Keys.SPACE)
        # tmp_target.send_keys(Keys.CONTROL, \'a\')
        # tmp_target.send_keys(Keys.CONTROL, \'x\')
        # tmp_target.send_keys(Keys.CONTROL, \'v\')
        # tmp_target.send_keys(Keys.BACK_SPACE)
        # time.sleep(random.randint(10, 20))


        # res_content = []
        for i in res_content[0:1]:
            dbid, content, img_list = i[\'id\'], i[\'content\'], i[\'img_list\']

            tmp_l = [\'口红\', \'指甲油\', \'护发素\', \'沐浴露\', \'洗手液\', \'洗发水\', \'牙膏\']
            tmp_l_1 = [\'老人\', \'小孩\', \'白领\', \'前台妹子\', \'行政妹子\', \'大学生\', \'高中生\']
            tmp_l_2 = [\'类型\', \'特质\', \'种类\', \'价位\', \'原材料\', \'主要成分\', \'价格\']

            s = \'{}{}{}{}{}{}{}\'.format(str(random.randint(1, 12)), \'月份,\', random.choice(tmp_l_1), \'适合使用什么\',
                                        random.choice(tmp_l_2), \'的\', random.choice(tmp_l))
            js = \'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";\'.format(s)
            browser.execute_script(js)
            time.sleep(12)
            #
           # tmp_target.send_keys(Keys.SPACE)

            js = \'document.getElementsByClassName("step-btn next")[0].click();\'
            browser.execute_script(js)

            # step-btn submit

            js = \'document.getElementsByClassName("step-btn submit")[0].click();\'
            browser.execute_script(js)
            time.sleep(12)

            #
            js = \'window.location.href="https://www.wukong.com/user/?uid={}&type=1";\'.format(toutiao_uid)
            browser.execute_script(js)
            time.sleep(12)
            res_url = browser.find_element_by_class_name(\'question-title\').find_elements_by_tag_name(\'a\')[
                0].get_attribute(\'href\')

            # print(i)
            # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea\'
            # try:
            #     browser.find_element_by_xpath(xp_newpage)
            # except Exception as e:
            #     print(e)
            #     break
            # browser.find_element_by_xpath(xp_newpage).click()
            # words = content
            # # Message: SyntaxError: unterminated string literal
            # mytxt = words.replace(\'\n\', \' \').replace(\'\r\', \' \').replace(\'\\br\', \' \').replace(\'"\', \'“\').replace("\'", \'‘\')
            # # Message: SyntaxError: missing ; before statement
            # mytxt = mytxt.replace("\'", \'‘\')
            # # 2000 头条
            # mytxt = mytxt[0:2000]
            # mytxt = \'好消息\' if len(mytxt.replace(\' \', \'\')) == 0 else mytxt
            #
            # # 需要键盘事件 反爬虫
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'a\')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'x\')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'v\')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
            # time.sleep(random.randint(2, 5))
            #
            # try:
            #     # js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', \'\', \'"\')
            #     # browser.execute_script(js)
            #     js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', mytxt, \'"\')
            #     browser.execute_script(js)
            #     time.sleep(3)
            # except Exception as jse:
            #     print(\'.getElementsByTagName("textarea")--log-\', jse)
            #     continue
            #
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span\'
            # browser.find_element_by_xpath(xp_newpage).click()
            # time.sleep(3)
            # try:
            #     upload = browser.find_element_by_id(\'fileElem\')
            #
            #     logs_img = \'\'
            #     img_url_list = img_list.split(\',\')
            #
            #     for imgid in img_url_list:
            #         img_url = \'http://192.168.2.212:83/file/get?type=star_helper&id=199\'.replace(\'199\', str(imgid))
            #         local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                                 local_default=\'default.DONOT_REMOVE.png\')
            #         print(local_img_path)
            #         time.sleep(random.randint(2, 4))
            #         logs_img += img_url
            #         logs_img += local_img_path
            #         upload.send_keys(local_img_path)
            #         time.sleep(random.randint(3, 7))
            # except Exception as ee:
            #     img_url_default = \'\'
            #     img_url = img_url_default
            #     local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                             local_default=\'default.DONOT_REMOVE.png\')
            #     sleep(2)
            #     logs_img += img_url
            #     logs_img += local_img_path
            #     # upload.send_keys(local_img_path)
            #     logging.exception(ee)
            #
            # try:
            #     xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/ul\'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a\'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #
            #     time.sleep(random.randint(8, 20))
            #     js = \'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"\'
            #     browser.execute_script(js)
            #
            #     time.sleep(random.randint(2, 5))
            #     xp_newpage = \'/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a\'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     time.sleep(random.randint(3, 6))
            #     url_curr = browser.current_url
            #
            #     with open(\'toutiao_success.log\', \'a\', encoding=\'utf-8\') as f:
            #         logs = \'%s%s%s%s%s\n\' % (
            #             time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
            #             logs_img)
            #         print(logs)
            #         f.write(logs)

            sql = \'INSERT INTO  joke__helper_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");\' % (
                dbid, res_url, int(time.time()), toutiao_uid)
            mysql_write(sql)
            print(sql)
            time.sleep(random.randint(20, 30))
            js = \'window.location.href="https://www.wukong.com/"\'
            js = \'window.location.href="https://www.toutiao.com/"\'
            browser.execute_script(js)
            # except Exception as e_url_jump:
            #     print(\'e_url_jump\', e_url_jump)
    try:
        browser.quit()
    except Exception as e1:
        print(e1)
        logging.exception(e1)

time.sleep(random.randint(120, 300))

 

 

 

        xp_newpage = \'//*[@id="title"]\'
        mytxt = \'林志玲捐款记录被翻出 单笔高达千万\'
        browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
        browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)

  

 

 

 

 

<div class=\'article\' id=\'artibody\'>
    <div class=\'img_wrapper\'>
        <img alt=\'林志玲\' src=\'http://n.sinaimg.cn/ent/transform/703/w253h450/20180416/77p2-fzcyxmv1344655.jpg\'>
        <span class=\'img_descr\'>林志玲</span>
    </div>
    <div class=\'img_wrapper\'>
        <img alt=\'林志玲捐款记录\' src=\'http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/CVWm-fzcyxmv1342897.jpg\'>
        <span class=\'img_descr\'>林志玲捐款记录</span>
    </div>
    <div class=\'img_wrapper\'>
        <img alt=\'林志玲捐款记录\' src=\'http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/hXMn-fzcyxmv1342914.jpg\'>
        <span class=\'img_descr\'>林志玲捐款记录</span>
    </div>            <!--video-list-->
    <div class=\'video-2017\' id=\'videoList0\'></div>

    <!--/video-list-->
    <p>  新浪娱乐讯 据台湾媒体报道,林志玲
        自出道以来,热心公益,甚至创立了自己的基金会,每年固定发行公益年历。近日明星从事公益的话题发烧,她也被网友挖出,几乎每个月都在转帐捐款,且其中一笔高达1000万人民币,更让网友惊呼连连。
    </p>
    <div id=\'ad_44124\' class=\'otherContent_01\'
         style=\'display: block; margin: 10px 20px 10px 0px; float: left; overflow: hidden; clear: both; padding: 4px; width: 300px; height: 250px;\'>

    </div>
    <p>
          林志玲被网友翻出,2016年至2018年间的捐款纪录,几乎每个月都有记录,且最低都是人民币万元起跳,其中甚至有一笔高达1000万人民币,捐款项目是“筑巢行动”,不少人看到明细,都惊讶表示,原来志玲姐姐私下默默捐了这么多善款,还有人笑称:“她是不是拿着手机,无聊就转帐的那种人?”、“真的人美心也美”、“太圈粉了”、“志玲姐姐真的太低调了”。</p>
    <p>
          43岁的林志玲1998年出道,从伸展台转战影视圈,尚未出名前就热心公益,更在2011年,主动以个人名义,成立“志玲姐姐慈善基金会”。她也固定每年拍摄公益写真年历,所得全数捐给儿福机构,或是帮助弱势孩童急难救助等,多年从不间断,且义卖期间,她从不公开做宣传,低调行善,受到不少人赞赏。</p>
    <p>
          林志玲2016年受访曾透露,投入公益的契机,是因为身边罹癌友人的一句话,才让她下定决心。当时这位好友问她:“你希望离开后,怎样被大家记得?”她想了一想,认为既然是公众人物,就应该让大家记得自己微笑的样子,要用这样的身分,做些有影响力的事,从此将公益当做自我赋予的使命,一做就是好多年。ETtoday/文</p>
    <p class=\'article-editor\'>(责编:kita)</p>
    <div style=\'font-size: 0px; height: 0px; clear: both;\'></div>

</div>
<!-- 非定向300*250按钮  end -->
</div>

  

 

from selenium import webdriver
from  time import sleep
import time
from selenium.webdriver.common.keys import Keys
import os

import requests
import time
import threading
import logging
import random

start_time = time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
    -1]
logf = this_file_name + \'.log\'
try:
    logging.basicConfig(level=logging.INFO,
                        format=\'%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]\',
                        datefmt=\'%a, %d %b %Y %H:%M:%S\',
                        filename=logf,
                        filemode=\'a\')
except Exception as e:
    s = \'%s%s%s\' % (\'logging.basicConfig EXCEPTION \', time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), e)
    with open(logf, \'a\') as fo:
        fo.write(s)
        print(s)
        os._exit(4002)

logging.info(\'START\')

img_url = \'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png\'
img_dir = \'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\\'


def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default=\'default.DONOT_REMOVE.png\'):
    r = \'%s%s\' % (img_dir, local_default)
    try:
        bytes = requests.get(img_url)._content
        r = \'%s%s%s%s%s\' % (
            img_dir, time.strftime(\'%Y%m%d%H%M%S\', time.localtime(time.time())), str(threading.get_ident()),
            img_url.replace(\'/\', \'_xl_\').replace(\':\', \'_fxl_\').replace(\'?\', \'_fxlquestion_\').replace(\'=\',
                                                                                                     \'_fxlequal_\').replace(
                \'&\', \'_fxland_\'), \'.png\')
        if bytes != 0:
            with open(r, \'wb\')as f:
                f.write(bytes)
    except Exception as e:
        print(e)
    return r


import pymysql

h, pt, u, p, db = \'192.168.2.210\', 3306, \'root\', \'joke\', \'star_media_joke\'


def mysql_fetch(sql, res_type=\'tuple\'):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\')
    except Exception as e:
        print(e)
        return ()
    if res_type == \'dic\':
        cursor = conn.cursor(pymysql.cursors.DictCursor)
    else:

        cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return cursor.fetchall()


def mysql_write(sql):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=\'utf8\')
    except Exception as e:
        print(e)
        return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0


import random

while True:
    logging.info(\'LOOP----\')
    sql = \'SELECT username,password,toutiaoid  FROM joke_star_joke_joke_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )\'
    sql = \'SELECT username,password,toutiaoid  FROM joke_star_joke_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )\'
    res = mysql_fetch(sql)
    ac_l = [{\'u\': i[0], \'p\': i[1], \'toutiao_uid\': i[2]} for i in res]
    for ac in ac_l:
        myid, mypwd, toutiao_uid = ac[\'u\'], ac[\'p\'], ac[\'toutiao_uid\']
        # 发布限制条件逻辑
        sql = "SELECT * FROM joke_star_joke_relation_wukong_question  WHERE  INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        sql = "SELECT * FROM joke_joke_article_publish  WHERE  INSTR(CONCAT(\',\',id_toutiao_uid_list,\',\'),CONCAT(\',\',\'{}\',\',\')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        print(sql)
        logging.info(sql)
        res_content = mysql_fetch(sql, \'dic\')
        if len(res_content) == 0:
            continue
        id_article_list = [i[\'id_article_list\'] for i in res_content]

        sql = \'SELECT * FROM joke_joke_article WHERE id IN ({}) AND id  NOT IN (SELECT article_id FROM  joke_joke_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; \'.format(
            \',\'.join([i[\'id_article_list\'] for i in res_content]), toutiao_uid)
        # sql = \'SELECT * FROM joke_star_joke_wukong_question WHERE id  NOT IN (SELECT toutiao_uid FROM  joke_star_joke_toutiaouser_wukong_question) LIMIT 1\'
        logging.info(sql)
        res_content = mysql_fetch(sql, \'dic\')
        if len(res_content) == 0:
            continue

        browser = webdriver.Chrome()
        f_url_l = [\'https://www.toutiao.com/group/1589657566362638/\',
                   \'https://www.wukong.com/question/6388670742287876353/\',
                   \'https://www.wukong.com/tag/6215497898671475202/\']
        f_url_l += [\'https://www.wukong.com/question/6512777037948649741/\',
                    \'https://www.wukong.com/question/6469247721038414093/\',
                    \'https://www.wukong.com/question/6481502080249889037/\']
        # f_url_l = []
        f_url_l = [\'https://www.toutiao.com/a6514526304476332552/\', \'https://www.toutiao.com/a6514778729951003150/\']
        f_url_l += [\'https://www.toutiao.com/a6514216125151052291/\', \'https://www.toutiao.com/a6512315164463727111/\',
                    \'https://www.toutiao.com/a6513334304318161411/\']
        f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
        # browser.get(random.choice(f_url_l))
        browser.get(f_url_l_a)
        time.sleep(random.randint(10, 20))

        js = \'window.location.href="https://sso.toutiao.com/login/";\'
        js = \'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";\'
        browser.execute_script(js)
        time.sleep(random.randint(10, 20))

        #  js = \'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";\'
        browser.execute_script(js)

        ac_type = \'qq\'
        if ac_type == \'qq\':
            myid, mypwd = ac[\'u\'], ac[\'p\']
            xp = \'/html/body/div/div/div[2]/div/div/div/ul/li[3]\'
            browser.find_element_by_xpath(xp).click()
            time.sleep(10)
            js = \'%s%s%s\' % (\'document.getElementById("u").value="\', myid, \'"\')
            browser.execute_script(js)
            js = \'%s%s%s\' % (\'document.getElementById("p").value="\', mypwd, \'"\')
            browser.execute_script(js)
            time.sleep(random.randint(5, 15))
            xp_newpage = \'//*[@id="go"]\'
            browser.find_element_by_xpath(xp_newpage).click()
            time.sleep(random.randint(10, 20))
        elif ac_type == \'mail_qq\':
            continue

        time.sleep(5)

        browser.refresh()
        js = \'window.location.href="https://www.toutiao.com/";\'
        browser.execute_script(js)
        browser.refresh()

        time.sleep(6)

        js = \'window.location.href="https://www.wukong.com/";\'
        js = \'window.location.href="https://mp.toutiao.com/profile_v2/publish/";\'
        js = \'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";\'
        browser.execute_script(js)

        time.sleep(6)

        # title
        # js = \'%s%s%s\' % (\'document.getElementById("title").value="\', \'林志玲捐款记录被翻出 单笔高达千万\', \'"\')
        # js = \'document.getElementById("title").value="{}"\'.format(\'林志玲捐款记录被翻出 单笔高达千万\')
        # browser.execute_script(js)

        xp_newpage = \'//*[@id="title"]\'
        mytxt = \'林志玲捐款记录被翻出 单笔高达千万\'
        browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
        browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)

        time.sleep(2)

        fhtml, dbhtml_str = \'toutaio.db.html\', \'\'
        with open(fhtml, \'r\', encoding=\'utf-8\') as fr:
            for hi in fr:
                dbhtml_str = \'{}{}\'.format(dbhtml_str, hi.replace(\'\n\', \'\'))

        xp = \'//*[@id="edui18_body"]/div[1]\'
        # //*[@id="edui18_body"]/div[1]
        browser.find_element_by_xpath(xp).click()
        time.sleep(2)
        # //*[@id="images"]/div[1]/div
        xp = \'//*[@id="images"]/div[1]/div\'
        xp = \'//*[@id="images"]/div[1]/div/span\'
        browser.find_element_by_xpath(xp).click()
        time.sleep(1)

        db_html = dbhtml_str
        #  db_html = \'<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>\'
        js = \'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"\'.format(
            db_html)
        browser.execute_script(js)
        xp = \'//*[@id="edui18_body"]/div[1]\'
        # //*[@id="edui18_body"]/div[1]
        browser.find_element_by_xpath(xp).click()
        time.sleep(2)
        # //*[@id="images"]/div[1]/div
        xp = \'//*[@id="images"]/div[1]/div\'
        xp = \'//*[@id="images"]/div[1]/div/span\'
        browser.find_element_by_xpath(xp).click()
        time.sleep(1)
        # xp=\'//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[2]/div[1]/div[2]/i\'
        # browser.find_element_by_xpath(xp)
        # xp=\'//*[@id="pgc-text-img"]/div/div[1]/div[1]\'
        # browser.find_element_by_xpath(xp)
        xp = \'//*[@id="pgc-text-img"]/div/div[2]/div/button[1]\'
        browser.find_element_by_xpath(xp)

        xp = \'//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div/div/label[3]/div/input\'
        browser.find_element_by_xpath(xp)

        time.sleep(2)
        time.sleep(2)
        xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\'
        xp = \'//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]\'
        browser.find_element_by_xpath(xp).click()
        dd = 9
        # js = \'document.getElementsByClassName("ask")[0].click();\'
        # browser.execute_script(js)
        # time.sleep(12)

        # time.sleep(random.randint(10, 20))
        # # 需要键盘事件 反爬虫
        # tmp_target = browser.find_element_by_class_name(\'input-box\').find_element_by_tag_name(\'input\')
        # tmp_target.send_keys(Keys.SPACE)
        # tmp_target.send_keys(Keys.CONTROL, \'a\')
        # tmp_target.send_keys(Keys.CONTROL, \'x\')
        # tmp_target.send_keys(Keys.CONTROL, \'v\')
        # tmp_target.send_keys(Keys.BACK_SPACE)
        # time.sleep(random.randint(10, 20))


        # res_content = []
        for i in res_content[0:1]:
            dbid, content, img_list = i[\'id\'], i[\'content\'], i[\'img_list\']

            tmp_l = [\'口红\', \'指甲油\', \'护发素\', \'沐浴露\', \'洗手液\', \'洗发水\', \'牙膏\']
            tmp_l_1 = [\'老人\', \'小孩\', \'白领\', \'前台妹子\', \'行政妹子\', \'大学生\', \'高中生\']
            tmp_l_2 = [\'类型\', \'特质\', \'种类\', \'价位\', \'原材料\', \'主要成分\', \'价格\']

            s = \'{}{}{}{}{}{}{}\'.format(str(random.randint(1, 12)), \'月份,\', random.choice(tmp_l_1), \'适合使用什么\',
                                        random.choice(tmp_l_2), \'的\', random.choice(tmp_l))
            js = \'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";\'.format(s)
            browser.execute_script(js)
            time.sleep(12)
            #
            # tmp_target.send_keys(Keys.SPACE)

            js = \'document.getElementsByClassName("step-btn next")[0].click();\'
            browser.execute_script(js)

            # step-btn submit

            js = \'document.getElementsByClassName("step-btn submit")[0].click();\'
            browser.execute_script(js)
            time.sleep(12)

            #
            js = \'window.location.href="https://www.wukong.com/user/?uid={}&type=1";\'.format(toutiao_uid)
            browser.execute_script(js)
            time.sleep(12)
            res_url = browser.find_element_by_class_name(\'question-title\').find_elements_by_tag_name(\'a\')[
                0].get_attribute(\'href\')

            # print(i)
            # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea\'
            # try:
            #     browser.find_element_by_xpath(xp_newpage)
            # except Exception as e:
            #     print(e)
            #     break
            # browser.find_element_by_xpath(xp_newpage).click()
            # words = content
            # # Message: SyntaxError: unterminated string literal
            # mytxt = words.replace(\'\n\', \' \').replace(\'\r\', \' \').replace(\'\\br\', \' \').replace(\'"\', \'“\').replace("\'", \'‘\')
            # # Message: SyntaxError: missing ; before statement
            # mytxt = mytxt.replace("\'", \'‘\')
            # # 2000 头条
            # mytxt = mytxt[0:2000]
            # mytxt = \'好消息\' if len(mytxt.replace(\' \', \'\')) == 0 else mytxt
            #
            # # 需要键盘事件 反爬虫
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'a\')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'x\')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, \'v\')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
            # time.sleep(random.randint(2, 5))
            #
            # try:
            #     # js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', \'\', \'"\')
            #     # browser.execute_script(js)
            #     js = \'%s%s%s\' % (\'document.getElementsByTagName("textarea")[0].value="\', mytxt, \'"\')
            #     browser.execute_script(js)
            #     time.sleep(3)
            # except Exception as jse:
            #     print(\'.getElementsByTagName("textarea")--log-\', jse)
            #     continue
            #
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span\'
            # browser.find_element_by_xpath(xp_newpage).click()
            # time.sleep(3)
            # try:
            #     upload = browser.find_element_by_id(\'fileElem\')
            #
            #     logs_img = \'\'
            #     img_url_list = img_list.split(\',\')
            #
            #     for imgid in img_url_list:
            #         img_url = \'http://192.168.2.212:83/file/get?type=star_joke&id=199\'.replace(\'199\', str(imgid))
            #         local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                                 local_default=\'default.DONOT_REMOVE.png\')
            #         print(local_img_path)
            #         time.sleep(random.randint(2, 4))
            #         logs_img += img_url
            #         logs_img += local_img_path
            #         upload.send_keys(local_img_path)
            #         time.sleep(random.randint(3, 7))
            # except Exception as ee:
            #     img_url_default = \'\'
            #     img_url = img_url_default
            #     local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                             local_default=\'default.DONOT_REMOVE.png\')
            #     sleep(2)
            #     logs_img += img_url
            #     logs_img += local_img_path
            #     # upload.send_keys(local_img_path)
            #     logging.exception(ee)
            #
            # try:
            #     xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/ul\'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     xp_newpage = \'/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a\'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #
            #     time.sleep(random.randint(8, 20))
            #     js = \'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"\'
            #     browser.execute_script(js)
            #
            #     time.sleep(random.randint(2, 5))
            #     xp_newpage = \'/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a\'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     time.sleep(random.randint(3, 6))
            #     url_curr = browser.current_url
            #
            #     with open(\'toutiao_success.log\', \'a\', encoding=\'utf-8\') as f:
            #         logs = \'%s%s%s%s%s\n\' % (
            #             time.strftime(\'%Y%m%d_%H%M%S\', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
            #             logs_img)
            #         print(logs)
            #         f.write(logs)

            sql = \'INSERT INTO  joke_joke_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");\' % (
                dbid, res_url, int(time.time()), toutiao_uid)
            mysql_write(sql)
            print(sql)
            time.sleep(random.randint(20, 30))
            js = \'window.location.href="https://www.wukong.com/"\'
            js = \'window.location.href="https://www.toutiao.com/"\'
            browser.execute_script(js)
            # except Exception as e_url_jump:
            #     print(\'e_url_jump\', e_url_jump)
    try:
        browser.quit()
    except Exception as e1:
        print(e1)
        logging.exception(e1)

time.sleep(random.randint(120, 300))

  

<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" alt="pgc-image/152385934210854ceb909ec" _src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" buttonadded="true">

  

 

 

 

            \'\'\'
           <img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true"> 
           \'\'\'
            dbhtml_str_ = dbhtml_str
            img_n = dbhtml_str_.count(\'<img\')
            s = \'<img onload="editor.fireEvent(\\'contentchange\\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">\'
            s = \'<img onload="editor.fireEvent(\\'contentchange\\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">\'
            #     s = "<img onload=\'editor.fireEvent(\\'contentchange\\')\' src=\'https://p1.pstatp.com/large/pgc-image/TTimgCode\' _src=\'https://p1.pstatp.com/large/pgc-image/TTimgCode\' alt=\'pgc-image/TTimgCode\' buttonadded=\'true\'>"
            ss = \'\'
            l = pgc_img_url_l_toutiao
            for i in range(img_n):
                if i == 0:
                    p1 = dbhtml_str.index(\'<img \', 0)
                else:
                    p1 = dbhtml_str.index(\'<img \', p1 + 3)

                tmp = \'{}{}\'.format(dbhtml_str[0:p1].replace(\'>\', \'X\'), dbhtml_str[p1:])
                p2 = tmp.index(\'>\')
                ss = s.replace(\'TTimgCode\', l[0].split(\'/\')[-1])
                dbhtml_str = \'{}{}{}\'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:])
                del l[0]
            print(\'-----------------\')
            print(dbhtml_str)
            time.sleep(2)
            js = \'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";\'
            browser.execute_script(js)
            time.sleep(6)
            xp_newpage = \'//*[@id="title"]\'
            mytxt = d[\'title\']
            browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
            time.sleep(2)

            # SAVE NOT DEL
            """
            xp = \'//*[@id="edui18_body"]/div[1]\'
            # //*[@id="edui18_body"]/div[1]
            browser.find_element_by_xpath(xp).click()
            time.sleep(2)
            xp = \'//*[@id="images"]/div[1]/div/span\'
            browser.find_element_by_xpath(xp).click()
            time.sleep(3)
            \'\'\'
            \'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"\'.format(dbhtml_str.replace(\'onload="editor.fireEvent(\\'contentchange\\')"\',\'\').replace(\'"\',"\'").replace(\'\n\',\'\'))

           \'\'\'
            # 结合浏览器控制台,拼接符合语法的js字符串
            r_d = {\'onload="editor.fireEvent(\\'contentchange\\')"\': \'\', \'"\': "\'", \'\n\': \'\'}
            dbhtml_str_py_js = dbhtml_str
            for k in r_d:
                dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
            dbhtml_str_py_js = dbhtml_str_py_js.replace(\'nbsp;\', \' \')
            js = \'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"\'.format(
                dbhtml_str_py_js)
            browser.execute_script(js)
           """
            # 激活编辑区
            browser.find_element_by_class_name(\'ql-container\').click()
            # 进行上传图片圆圈操作
            browser.find_element_by_class_name(\'icon-pic_tool\').click()
            # 激活目标上传口
            browser.find_element_by_class_name(\'tui-tab-list\').find_elements_by_class_name(\'tui-tab\')[-1].click()
            # 关闭上传口
            browser.find_element_by_class_name(\'tui-tab-panel-active\').find_elements_by_class_name(\'tui-btn\')[
                -1].click()

            # 结合浏览器控制台,拼接符合语法的js字符串
            r_d = {\'onload="editor.fireEvent(\\'contentchange\\')"\': \'\', \'"\': "\'", \'\n\': \'\'}
            dbhtml_str_py_js = dbhtml_str
            for k in r_d:
                dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
            dbhtml_str_py_js = dbhtml_str_py_js.replace(\'nbsp;\', \' \')
            #传入键盘化的html
            # document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML = "44"
            js = \'document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML ="{}"\'.format(
                dbhtml_str_py_js)
            browser.execute_script(js)

  

 

 

 

 

 

 

 

 

  

分类:

技术点:

相关文章: