【问题标题】:discord.py-rewrite - Dynamic Web Scraping using PyQt5 not working properlydiscord.py-rewrite - 使用 PyQt5 的动态 Web 抓取无法正常工作
【发布时间】:2020-04-02 02:52:26
【问题描述】:

简而言之,我正在制作一个不和谐的机器人,它将网站https://growtopiagame.com 中的“今日世界”图片下载为 D:\Kelbot/render.png,然后将图片发送到调用该命令的频道.但是,它不是静态网站,而且源代码中也没有URL,所以我找到了使用PyQt5的解决方案:

import re
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
@client.command()
@commands.cooldown(1, 60, commands.BucketType.user)
async def wotd(ctx):
    class Page(QWebEnginePage):
        def __init__(self, url):
            self.app = QApplication(sys.argv)
            QWebEnginePage.__init__(self)
            self.html = ''
            self.loadFinished.connect(self._on_load_finished)
            self.load(QUrl(url))
            self.app.exec_()

        def _on_load_finished(self):
            self.html = self.toHtml(self.Callable)
            print('Load finished')

        def Callable(self, html_str):
            self.html = html_str
            self.app.quit()

    def main():
        page = Page('https://growtopiagame.com')
        soup = bs.BeautifulSoup(page.html, 'html.parser')
        js_test = soup.find('a', class_='world-of-day-image')
        link = []
        for x in js_test:
            link.append(str(x))
        urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', link[0])
        urllib.request.urlretrieve(urls[0], "D:\Kelbot/render.png")
    if __name__ == '__main__': main()
    await ctx.send(file=discord.File('render.png'))

当我从任务计划程序运行机器人时,它不起作用。因此,我尝试使用我的 Python Shell 和 Visual Studio Code 来运行它,并且它们都有效。但是,当第二次调用该命令时,python shell 和 Visual Studio 代码都重新启动,并且机器人由于某种原因被杀死。是因为类与 discord.py 不兼容吗?我怎么可能解决这个问题。有没有比使用 PyQt5 更好的解决方案?

(有时我没有得到图片,而是得到https://growtopiagame.com/resources/assets/images/load.gif,这是他们在显示实际的每日世界图片之前放置的图像,但当我重新启动我的电脑时它会自行修复)

【问题讨论】:

    标签: python web-scraping pyqt pyqt5 discord.py-rewrite


    【解决方案1】:

    PyQt5 与 asyncio 不兼容,尽管有一些库试图使其与 quamash、asyncqt、qasync 兼容,但在您的情况下,这不是必需的,因为您希望 Qt 执行的唯一任务不是抓取网络以获取图像的 ulr 并下载它,因此解决方法是创建一个功能就是这样的外部应用程序,然后在 wotd 函数中使用它:

    ├── downloader.py
    ├── .env
    └── main.py
    

    ma​​in.py

    import asyncio
    import os
    import sys
    import uuid
    
    import discord
    from discord.ext import commands
    
    from dotenv import load_dotenv
    
    bot = commands.Bot(command_prefix="!")
    
    
    @commands.cooldown(1, 60, commands.BucketType.user)
    @bot.command()
    async def wotd(ctx):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        images_dir = os.path.join(current_dir, "images")
    
        if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
            os.mkdir(images_dir)
    
        output_filename = os.path.join(images_dir, "{}.png".format(uuid.uuid4()))
    
        args = [sys.executable, os.path.join(current_dir, "downloader.py"), output_filename]
        process = await asyncio.create_subprocess_exec(
            *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
        )
        print("Started: %s, pid=%s" % (args, process.pid), flush=True)
        stdout, stderr = await process.communicate()
        if process.returncode == 0:
            print(
                "Done: %s, pid=%s, result: %s"
                % (args, process.pid, stdout.decode().strip()),
                flush=True,
            )
            await ctx.send(file=discord.File(output_filename))
            print("end", output_filename)
        else:
            print(
                "Failed: %s, pid=%s, result: %s"
                % (args, process.pid, stderr.decode().strip()),
                flush=True,
            )
            print("error")
    
    
    @wotd.error
    async def wotd_error(ctx, error):
        if isinstance(error, commands.CommandOnCooldown):
            msg = "This command is ratelimited, please try again in {:.2f}s".format(
                error.retry_after
            )
            await ctx.send(msg)
        print(ctx, error)
    
    
    def main():
        load_dotenv()
        token = os.getenv("DISCORD_TOKEN")
        bot.run(token)
    
    
    if __name__ == "__main__":
        main()
    

    downloader.py

    import sys
    
    from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets
    
    
    class DownLoader(QtCore.QObject):
        def __init__(self, path, parent=None):
            super().__init__(parent)
            self.path = path
    
            url = "https://growtopiagame.com"
            self.manager = QtNetwork.QNetworkAccessManager(self)
    
            profile = QtWebEngineWidgets.QWebEngineProfile(
                QtCore.QUuid.createUuid().toString(QtCore.QUuid.Id128), self
            )
            self.page = QtWebEngineWidgets.QWebEnginePage(profile, self)
            self.page.loadProgress.connect(print)
    
            self.manager.finished.connect(self.on_finished)
            self.page.loadFinished.connect(self.on_load_finished)
    
            self.page.load(QtCore.QUrl(url))
    
        @QtCore.pyqtSlot(bool)
        def on_load_finished(self, ok):
            if ok:
                self.request_url()
            else:
                print("error", ok, file=sys.stderr)
                QtCore.QCoreApplication.exit(-1)
    
        def request_url(self):
            js = """
            function get_url(){
                var elements = document.getElementsByClassName("world-of-day-image")
                if(elements.length){
                    var element = elements[0];
                    if(element.children.length){
                        var e = element.children[0]
                        if(e.tagName == "IMG")
                            return e.src
                    }
                }
                return "";
            }
            get_url();
            """
            self.page.runJavaScript(js, self.download)
    
        def download(self, url):
            if url:
                print(url)
                request = QtNetwork.QNetworkRequest(QtCore.QUrl(url))
                self.manager.get(request)
            else:
                QtCore.QTimer.singleShot(100, self.request_url)
    
        @QtCore.pyqtSlot(QtNetwork.QNetworkReply)
        def on_finished(self, reply):
            if reply.error() == QtNetwork.QNetworkReply.NoError:
                file = QtCore.QFile(self.path)
                if file.open(QtCore.QIODevice.WriteOnly):
                    r = reply.readAll()
                    print(len(r))
                    file.write(r)
                file.close()
                QtCore.QCoreApplication.quit()
            else:
                print(reply.error(), reply.errorString(), file=sys.stderr)
                QtCore.QCoreApplication.exit(-1)
    
    
    if __name__ == "__main__":
        app = QtWidgets.QApplication(sys.argv)
        parser = QtCore.QCommandLineParser()
        parser.addPositionalArgument("path", "Path of image")
        parser.process(app)
        args = parser.positionalArguments()
        if not args:
            print("not path", file=sys.stderr)
            sys.exit(-1)
        path = args[0]
        downloader = DownLoader(path)
        sys.exit(app.exec_())
    

    .env

    DISCORD_TOKEN=YOUR_TOKEN_HERE
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2021-08-29
      • 1970-01-01
      • 2020-05-06
      • 2020-09-25
      • 2021-05-23
      • 2021-02-26
      相关资源
      最近更新 更多