pppeteer多URLはって取ります.
2757 ワード
基本的な使い方 await pppeteer.launch()起動 await browser.newPage()page を開く. page.on傍受事件 await Page.gotoジャンプページ await browser.close()クローズ 順番が変わりましたが、Page.on()モニターイベントは傍受できなくなります.
複数のURLの使い方
配列urlの上のすべてのピクチャーをよじ登って、そしてその真実な幅の高さを返します. Crawling multiple URL in a loop using pppeter Looping through a set of urls in Pppeter Pppeter-Prop way to loop through multiple URLs
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
let imgArr = [];
page.on('domcontentloaded', async () => {
imgArr = await page.$$eval('img', img => {
const arr = [];
//
for (let i = 0; i < img.length; i++) {
const obj = {
width: img[i].width,
naturalWidth: img[i].naturalWidth,
height: img[i].height,
naturalHeight: img[i].naturalHeight,
isStandard: !((img[i].width * 10 <= img[i].naturalWidth || img[i].height * 10 <= img[i].naturalHeight)),
url: img[i].src,
level: 3,
imageUrl: img[i].src,
describeUrl: '',
summary: ` ${img[i].width}x${img[i].height} ${img[i].naturalWidth}x${img[i].naturalHeight} `,
};
if (obj.width && obj.height) {
arr.push(obj);
}
}
return arr;
});
});
await page.goto('https://www.npmjs.com/package/puppeteer', { waitUntil: 'networkidle0' });
await browser.close();
console.log('imgArr: ', imgArr);
})();
順序は変えられません複数のURLの使い方
配列urlの上のすべてのピクチャーをよじ登って、そしてその真実な幅の高さを返します.
/* eslint-disable no-undef */
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
let arr = [];
const html = [ 'https://www.npmjs.com/package/puppeteer', 'https://www.iconfont.cn/search/index?searchType=icon&q=test' ];
for (let i = 0; i < html.length; i++) {
await page.goto(html[i], { waitUntil: 'domcontentloaded' });
await page.waitForSelector('img', { timeout: 3000 });
// eslint-disable-next-line no-loop-func
const doms = await page.evaluate(() => {
const arr = [ ...document.querySelectorAll('img') ];
return arr.map(v => {
return {
naturalWidth: v.naturalWidth,
naturalHeight: v.naturalHeight,
width: v.width,
height: v.height,
};
});
});
arr = [ ...arr, ...doms ];
}
await browser.close();
})();
この方法は大体overflowの答えを参考にしました.