【问题标题】:NodeJS Puppeteer setDownloadBehavior issueNodeJS Puppeteer setDownloadBehavior 问题
【发布时间】:2019-11-16 05:11:46
【问题描述】:

我正在尝试设置自定义下载路径,但 chrome 无论如何都会将文件放入典型的 Downloads 文件夹中。

const puppeteer = require('puppeteer');

(async () => {
   const browser = await puppeteer.launch({
      executablePath: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
      defaultViewport: {
          width: 1920,
          height: 1080
      },
      headless: false,
      userDataDir: "./user_data"
   });

   const page = await browser.newPage();

   await page.goto(
     'https://example.com/page-with-the-file-link',
      { waitUntil: 'domcontentloaded' },
   );

   await page._client.send('Page.setDownloadBehavior', {
         behavior: 'allow',
         downloadPath: 'C:/Users/Me/Downloads/custom/folder/'
   });

   console.log('Start downloading');

   await page.click('a.download-btn');

   await page.waitFor(5000);

   console.log('Complete');
   await browser.close();
})();

因此,它会忽略 downloadPath 选项并将文件放入默认的 C:/Users/Me/Downloads 文件夹。

也不等待5秒,只有时间下载文件,点击下载链接后立即退出。

Start downloading
Complete
(node:51016) UnhandledPromiseRejectionWarning: Error: WebSocket is not open: readyState 3 (CLOSED)
    at WebSocket.send (C:\Users\Me\Downloads\puppeteer\node_modules\ws\lib\websocket.js:329:19)
    at WebSocketTransport.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\WebSocketTransport.js:60:14)
    at Connection._rawSend (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:86:21)
    at Connection.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:72:21)
    at gracefullyCloseChrome (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Launcher.js:194:20)
    at Browser.close (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Browser.js:255:31)
    at Browser.<anonymous> (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\helper.js:112:23)
    at C:\Users\Me\Downloads\puppeteer\test-download-file.js:97:18
    at <anonymous>
(node:51016) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 3)
(node:51016) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

为什么这个脚本在点击下载链接后就退出了,如何正确执行?有没有获取下载状态和监控进度的功能?

任何建议将不胜感激,谢谢!

【问题讨论】:

    标签: javascript node.js google-chrome automation puppeteer


    【解决方案1】:

    如何用拐杖解决这个问题

    1)我们应该捕获/等待将出现在默认'Downloads'目录中的文件

    function checkExistsWithTimeout(filePath, timeout) {
        return new Promise(function (resolve, reject) {
    
            var timer = setTimeout(function () {
                watcher.close();
                reject(new Error('File did not exists and was not created during the timeout.'));
            }, timeout);
    
            fs.access(filePath, fs.constants.R_OK, function (err) {
                if (!err) {
                    clearTimeout(timer);
                    watcher.close();
                    resolve();
                }
            });
    
            var dir = path.dirname(filePath);
            var basename = path.basename(filePath);
            var watcher = fs.watch(dir, function (eventType, filename) {
                if (eventType === 'rename' && filename === basename) {
                    clearTimeout(timer);
                    watcher.close();
                    resolve();
                }
            });
        });
    }
    

    检查文件以防万一(可选)

    function checkFile(path) {
        return new Promise(function (resolve, reject) {
            fs.access(path, fs.F_OK, (err) => {
                if (err) {
                    reject(new Error(err));
                }
        
                //file exists
                console.log('File exists');
                resolve();
            });
        });
    }
    

    下载完成后将文件移动到我们需要的地方(可选)

    function moveFile(fromPath, toPath) {
        return new Promise(function (resolve, reject) {
            fs.rename(fromPath, toPath, function (err) {
                if (err) {
                    reject(new Error('File did not move.'));
                    throw err;
                } else {
                    console.log('File moved');
                    resolve();
                }
            });
        });
    }
    

    示例

    const fs = require('fs');
    const path = require('path');
    
    const fileName = await page.evaluate(() => {
       return document.querySelector('.download-file-btn').textContent.trim();
    });
    await page.click('.download-file-btn');
    await checkExistsWithTimeout('C:/Users/Me/Downloads/'+fileName, 10000);
    await moveFile('C:/Users/Me/Downloads/'+fileName, 'C:/Users/me/Desktop/Videos/'+fileName);
    

    2) 使用 'request' 包从 url 下载文件

    function download(uri, filename, callback) {
        return new Promise(function (resolve, reject) {
            request.head(uri, function (err, res, body) {
                if (!err && res.statusCode == 200) {
                    console.log('content-type:', res.headers['content-type']);
                    console.log('content-length:', res.headers['content-length']);
    
                    request(uri)
                    .pipe(fs.createWriteStream(filename))
                    .on('error', function(response) {
                        console.log(err);
                        reject(new Error(err));
                    })
                    .on('close', function() {
                        callback();
                        resolve();
                    });
                } else {
                    reject(new Error(err));
                }
            });
        });
    }
    

    示例

    const videoSrc = await page.evaluate(() => {
         return document.querySelector('video.vjs-tech').src;
    });
    
    await download(videoSrc, "C:/Users/Me/Downloads/Videos/video.mp4", function() {
       console.log('downloaded');
    });
    

    如果没有'setDownloadBehavior' 选项,它对我有用,否则它会失败。 现在我可以点击下载按钮或直接从某个 URL 下载文件,只需用节点一点点操作文件系统。

    希望对某人有所帮助。

    【讨论】:

    • 如果每次下载都更改文件名,如何获取文件名?它将日期与 hours_minutes_seconds 连接到固定文件名,并尝试将其连接起来,但要匹配秒非常困难
    • 您可以不通过浏览器本身下载文件,而是使用 npm http/request 库下载文件,如示例所示。因此,您可以设置任何您想要分配给某个变量的文件名,然后您只需在您的代码/文件系统中使用它(下载、保存、移动、删除)。
    猜你喜欢
    • 2019-03-15
    • 2021-04-21
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2020-10-09
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多