// 官方文档 https://github.com/puppeteer/puppeteer



const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
const chalk = require("chalk");

const log = console.log;

global.MAX_WSE = 1;   //启动几个浏览器
global.WSE_LIST = [];  //存储browserWSEndpoint列表
global.URL_LIST = {};  //正在请求的url地址的列表
global.WAIT_URL_DATE = 10;  //N秒内不请求重复的url地址
global.MAX_TAG = 20;  //每个浏览器最大标签页
global.REQUEST_BOOL = 0;

module.exports = {
  /**
   * 启动浏览器
   * @return {*}
   */
  async Pupp() {
    global.REQUEST_BOOL = 1;

    for (var i = 0; i < MAX_WSE; i++) {
      if (global.WSE_LIST[i]) {
        continue;
      }
      const browser = await puppeteer.launch({
        headless: false,
        ignoreDefaultArgs: ["--enable-automation"],
        args: [
          "--no-sandbox",
          "--start-maximized",
          "--disable-features=site-per-process",
          "--disable-web-security",
          "--disable-features=IsolateOrigins,site-per-process",
        ],
        timeout: 0,
      });
      var browserWSEndpoint = await browser.wsEndpoint();
      var browserArr = {
        browserWSEndpoint: browserWSEndpoint,
        runNumber: (runNumber = 0),
      };
      await global.WSE_LIST.push(browserArr);
    }
    global.REQUEST_BOOL = 2;
    return;
  },

  /**
   * 打开页面
   * @return {*}
   */
  async Page() {
    // 启动浏览器
    if (global.REQUEST_BOOL != 1) {
      await this.Pupp();
    } else {
      return false;
    }

    try {
      // 如果有打开的浏览器就随机挑选一个
      if (global.WSE_LIST) {
        var tmp = Math.floor(Math.random() * global.MAX_WSE);
        var browserWSEndpoint = global.WSE_LIST[tmp].browserWSEndpoint;
      } else {
        var browserResult = await this.ctx.helper.Page();
      }

      // 连接已经打开的浏览器窗口
      global.WSE_LIST[tmp].runNumber += 1;
      var browserResult = await puppeteer.connect({ browserWSEndpoint });
    } catch (error) {
      // 页面错误删除错误浏览器
      await global.WSE_LIST.splice(tmp, 1);
      // 启动浏览器  将新的浏览器对象返回
      var browserResult = await this.ctx.helper.Page();
    }

    let pageList = await browserResult.pages();

    await pageList.filter((item) => {
      if (item.url() == "about:blank" && pageList.length > 1) {
        item.close();
      }
    });

    if (pageList.length > global.MAX_TAG) {
      return false;
    }
    return browserResult;
  },

  /**
   *打开一个url地址
   * @param {*} url 打开的地址
   * @param {*} conf
   * @param {*} waitFor 等待的时间
   * @param {*} page
   * @returns
   */
  async goto(url, conf = {}, waitFor = 0, page = null) {
    // N秒内不请求重复的url地址
    var url_time = new Date().getTime() - global.URL_LIST[url];
    log(global.URL_LIST);
    if (
      (url_time && url_time > global.WAIT_URL_DATE * 1000) ||
      !global.URL_LIST[url]
    ) {
      global.URL_LIST[url] = new Date().getTime();

      // 选择一个浏览器
      if (!page) {
        var browserResult = await this.ctx.helper.Page();
        var page = await browserResult.newPage();
      }
      try {
        // 设置窗口最大化;
        let currentScreen = await page.evaluate(() => {
          return {
            width: window.screen.availWidth,
            height: window.screen.availHeight,
          };
        });
        await page.setViewport(currentScreen);

        // 设置浏览器用户信息
        await page.setUserAgent(
          "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
        );

        console.clear();
        await page.goto(url, conf);
        await page.waitFor(waitFor * 1000);
        log(chalk.green("链接打开成功"));
        return page;
      } catch (error) {
        delete global.URL_LIST[url];
        await page.close();
        log(chalk.red("链接打开失败"));
        return false;
      }
    } else {
      log(chalk.red("已经在请求的url地址"));
      // return "当前url已在"+global.WAIT_URL_DATE+"秒内请求";
      return false;
    }
  },

  /**
   * 获取浏览器cookie
   * @param {*} page
   */
  async getCookie(page) {
    let cookie = await page.evaluate(() => document.cookie);
    console.log("cookie :" + cookie);
  },

  /**
   * 设置浏览器cookie
   * @param {*} page
   * @param {*} cookie
   */
  async setCookie(page, cookie, domain = "/") {
    let cookies = cookie.split(";").map((pair) => {
      let name = pair.trim().slice(0, pair.trim().indexOf("="));
      let value = pair.trim().slice(pair.trim().indexOf("=") + 1);
      return { name, value, domain };
    });
    await Promise.all(
      cookies.map((pair) => {
        return page.setCookie(pair);
      })
    );
  },

  /**
   *解析化html
   * @param {*} page
   */
  async JqueryHtml(page) {
    var $ = await page
      .evaluate(function () {
        return document.body.innerHTML;
      })
      .then(function (html) {
        let $ = cheerio.load(html);
        return $;
      });
    return $;
  },

  /**
   * @param {执行等待} page
   * @param {*} time
   */
  async waitFor(page, time = 1000) {
    try {
      await page.waitFor(time);
    } catch (error) {
      log(error);
      await page.close();
    }
  },

  /**
   * @param {执行标签等待} page
   * @param {*} time
   */
  async waitForSelector(page, select) {
    try {
      await page.waitForSelector(select);
    } catch (error) {
      await page.close();
      log(error);
    }
  },

  /**
   * 请求拦截
   * @return {*}
   */
  async RequestInterception(page, Type = "image,css,script,stylesheet") {
    await page.setRequestInterception(true);
    page.on("request", (interceptedRequest) => {
      if (Type.indexOf(interceptedRequest.resourceType()) == -1) {
        interceptedRequest.abort();
      } else {
        interceptedRequest.continue();
      }
    });
  },
};

Logo

开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!

更多推荐