Asynchronous download of national flag picture and country name data
import aiohttp import asyncio import os import time import sys POP20_CC = ('CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR CD FR').split() BASE_URL = 'http://flupy.org/data/flags' DEST_DIR = 'downloads/' class FetchError(Exception): #Used to catch exceptions def __init__(self, country_code): self.country_code = country_code def save_flag(img, filename): #Save picture file path = os.path.join(DEST_DIR, filename) with open(path, 'wb') as fp: fp.write(img) async def http_get(session, url): #The main function responsible for downloading, and the session is passed by download [many] async with session.get(url) as resp: if resp.status == 200: ctype = resp.headers.get('Content-type', '').lower() if 'json' in ctype or url.endswith('.json'): #The country name is json data. If the content type is json data = await resp.json() #Then use the json() method to get the content else: data = await resp.read() #Otherwise, get the metadata directly return data elif resp.status == 404: #Capture exception raise web.HTTPNotFound() else: raise aiohttp.errors.HttpProcessingError(code=res.sstatus, message=res.reason, headers=res.headers) async def get_flag(session, cc): #Get pictures url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower()) image = await http_get(session, url) #Here is I\o request requiring asynchronous operation return image async def get_country(session, cc): #Get country name url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower()) metadata = await http_get(session, url) #Here is I\o request requiring asynchronous operation return metadata['country'] def show(text): print(text + '[OK] ', end='') sys.stdout.flush() #Output the country names one by one. Without this refresh buffer method, all the country names will be output at the last time. async def download_one(session, cc): try: image = await get_flag(session, cc) #Get pictures asynchronously country = await get_country(session, cc) #Get country name asynchronously except web.HTTPNotFound: msg = 'not found' except Exception as exc: raise FetchError(cc) from exc else: #else clause in try runs without except ion country = country.replace(' ', '_') filename = '{}-{}.gif'.format(cc, country) loop = asyncio.get_event_loop() #The purpose of getting the loop here is not to block when saving pictures loop.run_in_executor(None, save_flag, image, filename) #A TheardPollExecutor object is maintained inside the run in executor function [Note 1]. The first parameter defaults to the current.futures.executor instance. show(cc) return cc async def download_many(cc_list): async with aiohttp.ClientSession() as session: #Get ClientSession object res = await asyncio.gather(*[asyncio.ensure_future(download_one(session, cc)) for cc in sorted(cc_list)]) #gather function if the parameter is a co process object, it will be automatically scheduled as a task. Here, we directly generate the task object with the guarantee ﹣ future method. Then run them in parallel and return a list of result aggregation values. return len(res) def main(): t0 = time.time() loop = asyncio.get_event_loop() count = loop.run_until_complete(download_many(POP20_CC)) loop.close() elapsed = time.time() - t0 msg = '\n{} flags download in {:.2f}s' print(msg.format(count, elapsed)) if __name__ == '__main__': main() #def a(*x): #print(x) #a([1,2]) --> ([1,2],) #a(*[1,2]) --> (1,2) #*No. can unpack the elements in a list or tuple, and each element is passed in as a separate parameter
In fact, asynchronous libraries rely on low-level threads (up to kernel level threads), but users of these libraries do not need to create threads or know that they use low-level threads in the infrastructure. In the application, we just need to make sure there is no blocking code, and the event loop will handle concurrency behind it. Asynchronous system can avoid user thread level overhead, which is why it can manage more concurrent connections than multithreaded system.
[Note 1] The loop.run_in_executor() method can be used with a concurrent.futures.ThreadPoolExecutor to execute blocking code in a different OS thread without blocking the OS thread that the event loop runs in