python grabs the list of fantasy novels and stores it in excel

Keywords: Big Data Excel Python encoding

python grabs the list of fantasy novels and stores it in excel

Using requests to get information from novel web pages

First, import the requests library and the beautiful soup Library

import requests
from bs4 import BeautifulSoup

Then call the requests.get() method to get the html of the specified url.

url = 'http://top.hengyan.com/xuanhuan/'
html = requests.get(url)

Parsing with beautifulsop

soup = BeautifulSoup(html.text, "html.parser")

Extract and store valid information

def generate_info(info):
	for ul in soup.find_all('ul'):
		if [] != ul.find_all('li', class_='num'):
			ha = []
			for li in ul.find_all('li', class_='num'):
				ha.append(li.string)
			for li in ul.find_all('li', class_='bookname'):
				if li.string == None:
					for a in ul.find_all('a', target='_blank'):
						ha.append(a.string)
				else:
					ha.append(li.string)
					ha.append('Latest chapters')
			for li in ul.find_all('li', class_='author'):
				ha.append(li.string)
			for li in ul.find_all('li', class_='length'):
				ha.append(li.string)
			for li in ul.find_all('li', class_='click'):
				ha.append(li.string)
			for li in ul.find_all('li', class_='update'):
				ha.append(li.string)
			info.append(ha)

Using xlwt module to create Excel

Import xlwt module first

import xlwt

Set excel style, font type size, etc

def set_style(name, height, bold = False):
    style = xlwt.XFStyle()   #Initialize style

    font = xlwt.Font()       #Create fonts for styles
    font.name = name
    font.bold = bold
    font.color_index = 4
    font.height = height
    style.font = font
    return style

Create excel and store the extracted effective information in Excel

def write_excel(datas,filename):
    #Create Workbook
    workbook = xlwt.Workbook(encoding='utf-8')
    #Create sheet
    data_sheet = workbook.add_sheet('novel')
    #row0 = ['Ranking', 'Title', 'author', 'Word number', 'click', 'Update time']
    #for i in range(len(row0)):
    #    data_sheet.write(0, i, row0[i], set_style('Times New Roman', 220, True))
    nrows=len(datas)
    for i in range(nrows):
        for j in range(len(datas[i])):
            data_sheet.write(i,j,datas[i][j])
    # Workbook saved to disk
    workbook.save(filename)

Finally, get Excel data

Posted by azunoman on Wed, 13 Nov 2019 11:18:44 -0800