concurrent.futures of python concurrent modules

Concurrent.futures of python concurrent modules (2)

Last time, we briefly understood some basic methods and usage of the module. Here we further understand and expand concurrent.futures
Last content click here.
Concurrent.futures of python concurrent modules (2)
Take downloading pictures as an example. The following program downloads 24 expressions of http://www.58pic.com/newpic/28660111.html website in sequence.

from requests_html import HTMLSession
import os
import time

BASE_PATH="downloads"
class Get_Image():

    def __init__(self):

        self.timeout=20

        self.session=HTMLSession()

    def getiamge(self,url):

        req=self.session.get(url,timeout=self.timeout)

        if req.status_code==200:

            imgurllist=req.html.xpath("//ul[@class='emoticon-model']/li/img/@data-big")

            for index,url in enumerate(imgurllist):

                print(f"Start downloading the{index+1}Zhang picture")

                self.save_image(url,index+1)

        else:

            print("Download failed")

    def save_image(self,imgurl,index):

        print(f"Current download link:{imgurl}")

        buff=self.session.get(imgurl,timeout=self.timeout).content

        file_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),BASE_PATH)

        if not os.path.exists(file_path):

            os.makedirs(file_path)

        with open(os.path.join(file_path,f"{index}.png"),"wb") as fs:

            fs.write(buff)
if __name__ == '__main__':

    start_url="http://www.58pic.com/newpic/28660111.html"

    start=time.time()

    Get_Image().getiamge(start_url)

    end=time.time()

    print(f"Download 24 pictures in sequence:{end-start}")
#The results of two runs are
#Download 24 pictures in sequence:14.926000356674194
#Time for downloading 24 pictures in sequence: 14.07800030708313

After using concurrent.futures to modify to concurrent

from requests_html import HTMLSession
import os
import time
from concurrent.futures import ThreadPoolExecutor

BASE_PATH="downloads"

MAX_WORKERS = 10 #Up to 10 threads
class Get_Image():

    def __init__(self):

        self.timeout=20

        self.session=HTMLSession()

    def getiamge(self,url):

        req=self.session.get(url,timeout=self.timeout)

        if req.status_code==200:

            imgurllist=req.html.xpath("//ul[@class='emoticon-model']/li/img/@data-big")

            works=min(len(imgurllist),MAX_WORKERS)

            with ThreadPoolExecutor(works) as excutor:

                res=excutor.map(self.save_image,imgurllist,range(1,25))

            return len(list(res))

        else:

            print("Download failed")

    def save_image(self,imgurl,index):

        print(f"Current download link:{imgurl}")

        buff=self.session.get(imgurl,timeout=self.timeout).content

        file_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),BASE_PATH)

        if not os.path.exists(file_path):

            os.makedirs(file_path)

        with open(os.path.join(file_path,f"{index}.png"),"wb") as fs:

            fs.write(buff)
if __name__ == '__main__':

    start_url="http://www.58pic.com/newpic/28660111.html"

    start=time.time()

    Get_Image().getiamge(start_url)

    end=time.time()

    print(f"Download 24 pictures at the same time:{end-start}")
#The results of two runs are
#Download 24 pictures at the same time:7.737000226974487
#Download 24 pictures at the same time: 7.083999872207642

Through observation, it is found that the efficiency is greatly improved after the speed is concurrent.

Posted by SQHell on Sun, 08 Dec 2019 07:17:34 -0800

Programmer Group

concurrent.futures of python concurrent modules

Concurrent.futures of python concurrent modules (2)

Hot Keywords