python grabs the list of fantasy novels and stores it in excel
Using requests to get information from novel web pages
First, import the requests library and the beautiful soup Library
import requests from bs4 import BeautifulSoup
Then call the requests.get() method to get the html of the specified url.
url = 'http://top.hengyan.com/xuanhuan/' html = requests.get(url)
Parsing with beautifulsop
soup = BeautifulSoup(html.text, "html.parser")
Extract and store valid information
def generate_info(info): for ul in soup.find_all('ul'): if [] != ul.find_all('li', class_='num'): ha = [] for li in ul.find_all('li', class_='num'): ha.append(li.string) for li in ul.find_all('li', class_='bookname'): if li.string == None: for a in ul.find_all('a', target='_blank'): ha.append(a.string) else: ha.append(li.string) ha.append('Latest chapters') for li in ul.find_all('li', class_='author'): ha.append(li.string) for li in ul.find_all('li', class_='length'): ha.append(li.string) for li in ul.find_all('li', class_='click'): ha.append(li.string) for li in ul.find_all('li', class_='update'): ha.append(li.string) info.append(ha)
Using xlwt module to create Excel
Import xlwt module first
import xlwt
Set excel style, font type size, etc
def set_style(name, height, bold = False): style = xlwt.XFStyle() #Initialize style font = xlwt.Font() #Create fonts for styles font.name = name font.bold = bold font.color_index = 4 font.height = height style.font = font return style
Create excel and store the extracted effective information in Excel
def write_excel(datas,filename): #Create Workbook workbook = xlwt.Workbook(encoding='utf-8') #Create sheet data_sheet = workbook.add_sheet('novel') #row0 = ['Ranking', 'Title', 'author', 'Word number', 'click', 'Update time'] #for i in range(len(row0)): # data_sheet.write(0, i, row0[i], set_style('Times New Roman', 220, True)) nrows=len(datas) for i in range(nrows): for j in range(len(datas[i])): data_sheet.write(i,j,datas[i][j]) # Workbook saved to disk workbook.save(filename)