pyppetter
# python3 -c 'import pyppeteer; pyppeteer.chromium_downloader.download_chromium()'
# [W:pyppeteer.chromium_downloader] chromium extracted to: /home/ubuntu/.local/share/pyppeteer/local-chromium/575458
demo 1
import requests
from pyquery import PyQuery as pq
url = 'http://quotes.toscrape.com/js/'
response = requests.get(url)
doc = pq(response.text)
print('Quotes:', doc('.quote').length)
0
demo 2
import asyncio
from pyppeteer import launch
from pyquery import PyQuery as pq
async def main():
browser = await launch()
page = await browser.newPage()
await page.goto('http://quotes.toscrape.com/js/')
doc = pq(await page.content())
print('Quotes:', doc('.quote').length)
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
10
demo 3
import asyncio
from pyppeteer import launch
async def main():
browser = await launch()
page = await browser.newPage()
await page.goto('http://quotes.toscrape.com/js/')
await page.screenshot(path='example.png')
await page.pdf(path='example.pdf')
dimensions = await page.evaluate('''() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}''')
print(dimensions)
# >>> {'width': 800, 'height': 600, 'deviceScaleFactor': 1}
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
demo 4
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import asyncio
from pyppeteer import launch
async def main():
# https://github.com/miyakogi/pyppeteer/issues/171
browser = await launch({
'args': ['--no-sandbox'],
}
)
page = await browser.newPage()
url="http://127.0.0.1:5000/tool?1784076_1_1_1_1_1"
await page.goto(url)
#await page.waitFor(3000) # wait for 3 seconds
#===================================================
# JS逻辑, 如果echart图像render finished,则在html页面中动态添加echartReadyDiv
# 表示图像ready,可以download/send to server.
await page.waitForSelector('#echartReadyDiv')
#===================================================
# 页面渲染完毕后,开始截图
# 如果没有加载完毕就生产image了,内容不完整
await page.screenshot(path='example.png')
#await page.pdf(path='example.pdf')
dimensions = await page.evaluate('''() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}''')
print(dimensions)
# >>> {'width': 800, 'height': 600, 'deviceScaleFactor': 1}
await page.close()
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
Page APIs:
- page.waitFor(3000) # wait for 3 seconds
- page.waitForSelector(‘#echartReadyDiv’) # wait for selector
- page.page.waitForXPath(‘//*[@id=”echartReadyDiv”]’) # wait for xpath
Reference
History
- 2020/1/17: created.