Concurrent.futures of python concurrent modules (2)
Last time, we briefly understood some basic methods and usage of the module. Here we further understand and expand concurrent.futures
Last content click here.
Concurrent.futures of python concurrent modules (2)
Take downloading pictures as an example. The following program downloads 24 expressions of http://www.58pic.com/newpic/28660111.html website in sequence.
from requests_html import HTMLSession
import os
import time
BASE_PATH="downloads"
class Get_Image():
def __init__(self):
self.timeout=20
self.session=HTMLSession()
def getiamge(self,url):
req=self.session.get(url,timeout=self.timeout)
if req.status_code==200:
imgurllist=req.html.xpath("//ul[@class='emoticon-model']/li/img/@data-big")
for index,url in enumerate(imgurllist):
print(f"Start downloading the{index+1}Zhang picture")
self.save_image(url,index+1)
else:
print("Download failed")
def save_image(self,imgurl,index):
print(f"Current download link:{imgurl}")
buff=self.session.get(imgurl,timeout=self.timeout).content
file_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),BASE_PATH)
if not os.path.exists(file_path):
os.makedirs(file_path)
with open(os.path.join(file_path,f"{index}.png"),"wb") as fs:
fs.write(buff)
if __name__ == '__main__':
start_url="http://www.58pic.com/newpic/28660111.html"
start=time.time()
Get_Image().getiamge(start_url)
end=time.time()
print(f"Download 24 pictures in sequence:{end-start}")
#The results of two runs are
#Download 24 pictures in sequence:14.926000356674194
#Time for downloading 24 pictures in sequence: 14.07800030708313
After using concurrent.futures to modify to concurrent
from requests_html import HTMLSession
import os
import time
from concurrent.futures import ThreadPoolExecutor
BASE_PATH="downloads"
MAX_WORKERS = 10 #Up to 10 threads
class Get_Image():
def __init__(self):
self.timeout=20
self.session=HTMLSession()
def getiamge(self,url):
req=self.session.get(url,timeout=self.timeout)
if req.status_code==200:
imgurllist=req.html.xpath("//ul[@class='emoticon-model']/li/img/@data-big")
works=min(len(imgurllist),MAX_WORKERS)
with ThreadPoolExecutor(works) as excutor:
res=excutor.map(self.save_image,imgurllist,range(1,25))
return len(list(res))
else:
print("Download failed")
def save_image(self,imgurl,index):
print(f"Current download link:{imgurl}")
buff=self.session.get(imgurl,timeout=self.timeout).content
file_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),BASE_PATH)
if not os.path.exists(file_path):
os.makedirs(file_path)
with open(os.path.join(file_path,f"{index}.png"),"wb") as fs:
fs.write(buff)
if __name__ == '__main__':
start_url="http://www.58pic.com/newpic/28660111.html"
start=time.time()
Get_Image().getiamge(start_url)
end=time.time()
print(f"Download 24 pictures at the same time:{end-start}")
#The results of two runs are
#Download 24 pictures at the same time:7.737000226974487
#Download 24 pictures at the same time: 7.083999872207642
Through observation, it is found that the efficiency is greatly improved after the speed is concurrent.