Python学習ノート-aiohttp
15962 ワード
前回のメモではasyncioの知識点をまとめましたが、今回は応用してみます.コパスで爬虫類を書く場合、ネットワーク関連のリクエストはrequestsライブラリをaiohttpというライブラリに置き換えます.公式文書 中国語ドキュメント 前回書いた爬虫類で、まず壁紙リンク を登りますこれらの画像をコラボレーションでダウンロード aiohttpライブラリ をインストール文書によるaiohttpライブラリからの要求はaiohttpである.ClientSession()は、ドキュメントが要求するたびにセッションを作成しないことを推奨しているため、ここでは を1つだけ作成します.ダウンロードピクチャのコヒーレンス関数と格納パスを作成する関数を定義する: .補完main関数 計算ダウンロード時間: 結果:26枚の画像用時1.476328 s マルチプロセス方式でダウンロード どちらも何度も実行しましたが、差は1.4~2.7 sと少なく、協程が強いことがわかり、単一スレッドだけでマルチプロセスのような効果を達成しました.
転載先:https://juejin.im/post/5c4ff008f265da617831cb68
一.こうりつひかく
urls = [
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-729560.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-724055.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716644.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716643.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716645.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686220.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686212.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-652608.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639894.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639893.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639892.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639890.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639888.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-468197.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467016.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467012.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467009.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467007.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467005.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466997.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466998.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466993.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466994.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466995.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-729560.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-724055.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716644.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716643.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716645.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686220.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686212.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-652608.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639894.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639893.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639892.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639890.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639888.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-468197.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467016.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467012.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467009.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467007.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467005.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466997.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466998.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466993.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466994.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466995.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466992.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466989.jpg"
]
async def main():
async with aiohttp.ClientSession() as session:
pass
#
async def download_img(session, url):
image_name = url.split('/')[-1]
async with session.get(url, headers=headers) as response:
with open('%s/%s' % (get_store_path('city'), image_name), 'wb') as fd:
while True:
chunk = await response.content.read(200)
if not chunk:
break
fd.write(chunk)
# ,
def get_store_path(dir_name):
current_path = os.path.abspath('.')
target_path = os.path.join(current_path, 'wallpaper/%s' % dir_name)
folder = os.path.exists(target_path)
if not folder:
os.makedirs(target_path)
return target_path
async def main(loop):
async with aiohttp.ClientSession() as session:
tasks = [loop.create_task(download_img(session, url)) for url in urls]
await asyncio.wait(tasks)
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
loop.close()
if __name__ == '__main__':
t1 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
loop.close()
print(' :%fs' % (time.time() - t1))
import requests
import multiprocessing
import os
import time
urls = [
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-729560.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-724055.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716644.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716643.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716645.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686220.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686212.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-652608.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639894.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639893.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639892.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639890.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639888.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-468197.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467016.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467012.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467009.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467007.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467005.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466997.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466998.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466993.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466994.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466995.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-729560.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-724055.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716644.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716643.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-716645.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686220.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-686212.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-652608.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639894.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639893.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639892.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639890.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-639888.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-468197.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467016.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467012.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467009.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467007.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-467005.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466997.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466998.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466993.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466994.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466995.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466992.jpg",
"https://alpha.wallhaven.cc/wallpapers/thumb/small/th-466989.jpg"
]
req_session = requests.Session()
req_session.headers['user-agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
def get_store_path(dir_name):
current_path = os.path.abspath('.')
target_path = os.path.join(current_path, 'wallpaper/%s' % dir_name)
folder = os.path.exists(target_path)
if not folder:
os.makedirs(target_path)
return target_path
def download_img(url):
img = req_session.get(url, stream=True)
image_name = url.split('/')[-1]
with open('%s/%s' % (get_store_path('city'), image_name), 'wb') as fd:
for chunk in img.iter_content(chunk_size=128):
fd.write(chunk)
def main():
p = multiprocessing.Pool()
[p.apply_async(download_img, args=(url,)) for url in urls]
p.close()
p.join()
if __name__ == '__main__':
t1 = time.time()
main()
print(' :%fs' % (time.time() - t1))
結果転載先:https://juejin.im/post/5c4ff008f265da617831cb68