Fluent python example: download the national flag image asynchronously

Keywords: Python Session JSON

Asynchronous download of national flag picture and country name data

import aiohttp
import asyncio
import os
import time
import sys

POP20_CC = ('CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR CD FR').split()
BASE_URL = 'http://flupy.org/data/flags'
DEST_DIR = 'downloads/'

class FetchError(Exception):  #Used to catch exceptions
    def __init__(self, country_code):
        self.country_code = country_code

def save_flag(img, filename):  #Save picture file
    path = os.path.join(DEST_DIR, filename)
    with open(path, 'wb') as fp:
        fp.write(img)

async def http_get(session, url):  #The main function responsible for downloading, and the session is passed by download [many]
    async with session.get(url) as resp:
        if resp.status == 200:
            ctype = resp.headers.get('Content-type', '').lower() 
            if 'json' in ctype or url.endswith('.json'): #The country name is json data. If the content type is json
                data = await resp.json()  #Then use the json() method to get the content
            else:
                data = await resp.read()  #Otherwise, get the metadata directly
            return data
        elif resp.status == 404: #Capture exception
            raise web.HTTPNotFound()
        else:
            raise aiohttp.errors.HttpProcessingError(code=res.sstatus, message=res.reason, headers=res.headers)

async def get_flag(session, cc):  #Get pictures
    url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
    image = await http_get(session, url)  #Here is I\o request requiring asynchronous operation
    return image

async def get_country(session, cc):  #Get country name
    url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
    metadata = await http_get(session, url) #Here is I\o request requiring asynchronous operation
    return metadata['country']

def show(text):
    print(text + '[OK]  ', end='')
    sys.stdout.flush()  #Output the country names one by one. Without this refresh buffer method, all the country names will be output at the last time.

async def download_one(session, cc):
    try:
        image = await get_flag(session, cc)  #Get pictures asynchronously
        country = await get_country(session, cc)  #Get country name asynchronously
    except web.HTTPNotFound:
        msg = 'not found'
    except Exception as exc:
        raise FetchError(cc) from exc
    else:  #else clause in try runs without except ion
        country = country.replace(' ', '_')
        filename = '{}-{}.gif'.format(cc, country)
        loop = asyncio.get_event_loop()  #The purpose of getting the loop here is not to block when saving pictures
        loop.run_in_executor(None, save_flag, image, filename) #A TheardPollExecutor object is maintained inside the run in executor function [Note 1]. The first parameter defaults to the current.futures.executor instance.
    show(cc)
    return cc

async def download_many(cc_list):
    async with aiohttp.ClientSession() as session:  #Get ClientSession object
        res = await asyncio.gather(*[asyncio.ensure_future(download_one(session, cc)) for cc in sorted(cc_list)]) #gather function if the parameter is a co process object, it will be automatically scheduled as a task. Here, we directly generate the task object with the guarantee ﹣ future method. Then run them in parallel and return a list of result aggregation values.
    return len(res)

def main():
    t0 = time.time()
    loop = asyncio.get_event_loop()
    count = loop.run_until_complete(download_many(POP20_CC))
    loop.close()
    elapsed = time.time() - t0
    msg = '\n{} flags download in {:.2f}s'
    print(msg.format(count, elapsed))

if __name__ == '__main__':
    main()

#def a(*x):
    #print(x)
#a([1,2]) --> ([1,2],)
#a(*[1,2]) --> (1,2)
#*No. can unpack the elements in a list or tuple, and each element is passed in as a separate parameter


In fact, asynchronous libraries rely on low-level threads (up to kernel level threads), but users of these libraries do not need to create threads or know that they use low-level threads in the infrastructure. In the application, we just need to make sure there is no blocking code, and the event loop will handle concurrency behind it. Asynchronous system can avoid user thread level overhead, which is why it can manage more concurrent connections than multithreaded system.

[Note 1] The loop.run_in_executor() method can be used with a concurrent.futures.ThreadPoolExecutor to execute blocking code in a different OS thread without blocking the OS thread that the event loop runs in

Posted by sith717 on Fri, 08 Nov 2019 10:52:42 -0800