Public opinion monitoring is the behavior of monitoring and predicting the public opinions and opinions on the Internet. Most monitoring technologies are based on crawlers. If we search the keywords of relevant hot events with search engines and save the results locally, the first link of public opinion monitoring is realized: real-time access to Internet data
Public opinion monitoring.png
Preliminary effect
Get data.gif
Implementation code
import requests from lxml import etree import os import sys def getData(wd): # Set user agent header headers = { # Set user agent header (put sheepskin on wolf) "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", } # Construct target web address target_url = "https://www.baidu.com/s?wd="+str(wd) # Get response data = requests.get(target_url, headers = headers) # xpath formatting data_etree = etree.HTML(data.content) # Extract data list content_list = data_etree.xpath('//div[@id="content_left"]/div[contains(@class, "result c-container")]') # Define the returned string result = "" # Get title, content, link for content in content_list: result_title = "<Title> " bd_title = content.xpath('.//h3/a') for bd_t in bd_title: result_title += bd_t.xpath('string(.)') result_content = "<content> " bd_content = content.xpath('.//div[@class="c-abstract"]') for bd_c in bd_content: result_content += bd_c.xpath('string(.)') result_link = "<link> "+str(list(content.xpath('.//div[@class="f13"]/a[@class="c-showurl"]/@href'))[0]) result_list = [result_title, "\n" , result_content , "\n", result_link, "\n", "\n"] for result_l in result_list: result += str(result_l) return result # Save as file def saveDataToFile(file_name, data): # Create folder if os.path.exists("./data/"): pass else: os.makedirs("./data/") with open("./data/"+file_name+".txt", "w+") as f: f.write(data) def main(): wd = "" try: wd = sys.argv[1] except: pass if (len(wd) == 0): wd = "Naruto" str_data = getData(wd) print(str_data) saveDataToFile(wd, str_data) if __name__ == '__main__': main()