Use and proxy configuration based on Python Selenium

Keywords: network Firefox Selenium Python

Catalog

Basic use

Initialize driver

chrome

options = ChromeOptions()
# Ignore https warnings
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-gpu')
options.add_argument('--disable-cache')
# Headless mode
options.headless = True
# window.navigator.webdriver=true modification
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# Open driver
self.driver = webdriver.Chrome(chrome_options=options)
# Set wait for timeout
self.wait = WebDriverWait(self.driver, 10)

firefox

profile = FirefoxProfile()
profile.set_preference('permissions.default.image', 2)  # Disable loading pictures
# Open driver
self.driver = webdriver.Firefox(firefox_profile=profile)
# Setting timeout time
self.wait = WebDriverWait(self.driver, 10)

Open web page

# Empty cookies
self.driver.delete_all_cookies()
# Open web page
self.driver.get(local_url)

Selection Tags

There are many ways to choose, according to the tag id, class, class selector, xpath and so on. If it is not found, an exception message will be reported.

# single
self.driver.find_element_by_class_name("user-tab")
# Multiple
self.driver.find_elements_by_xpath("//div/div[2]/a")
# ...

You can also use wait s with timeouts to get Tags

self.wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='JDJRV-bigimg']/img")))

Action events

click

# Click through the label
element.click()
# Click through js
self.driver.execute_script("arguments[0].click();", element)

# Click and hold a label (not required)
ActionChains(self.driver).click_and_hold(element).perform()
# Lifting key
ActionChains(self.driver).release().perform()

move

# Move to the specified label
ActionChains(self.driver).move_to_element(element).perform()
# Move the specified pixel
ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=y).perform()

input

# input
input_ele.send_keys('123')
# empty
input_ele.clear()

Roll

# js mode
self.driver.execute_script("window.scrollBy(0, 700)")
# Simulation mode # headless may fail, or you may need to click on it before.
ActionChains(self.driver).send_keys(Keys.PAGE_DOWN).perform() 

Grab the network

First, selenium needs to rely on third-party component browsermobproxy to intercept the network list of pages. Through its proxy service interface, it is set to selenium, so that when we operate the page, we can grab some asynchronous request lists (http, https) of the page.

Initialize browsermob-proxy

First, download the browsermob-proey reless package, as the project is implemented in java. So we need to call the corresponding files in bin directory according to our own platform.

Then the basic usage is browsermob-proxy function.

def _init_proxy(self):
    """
    //Initialization of proxy services
    """
    # Proxy service (here is the call file for macOX)
    path = r"/Users/xxx/browsermob-proxy-2.1.4/bin/browsermob-proxy"
    # Initialize a proxy Manager service and listen on port 8180
    self.server = browsermobproxy.Server(path=path, options={'port': 8180})
    # Start the proxy Manager service
    self.server.start()
    # Apply for a proxy service from the proxy Manager Service
    self.proxy = self.server.create_proxy()
    
def _open_proxy(self, ref):
    """
    //Open proxy monitoring (to open monitoring before opening a web page)
    :param ref:Name of registration
    :return:
    """
    options = {'captureContent': True, 'captureHeaders': True}
    self.proxy.new_har(ref, options=options)
    
def _get_network(self):
    """
    //Get the list of requests
    """
    # Take out the request list
    result = self.proxy.har
    # Traversing request list information
    for entry in result['log']['entries']:
        req_url = entry['request']['url']
        resp_content = entry["response"]['content']["text"]

chrome agent

options = ChromeOptions()
# options.add_argument('--proxy-server={0}'.format(self.proxy.proxy))
options.add_argument('--proxy-server={host}:{port}'.format(host="localhost", port=self.proxy.port))

# Other configurations...
self.driver = webdriver.Chrome(chrome_options=options)

firefox proxy

profile = FirefoxProfile()

# http proxy
profile.set_preference("network.proxy.type", 1)
# ip and its port number are configured as http protocol proxy
profile.set_preference("network.proxy.http", "localhost")
profile.set_preference("network.proxy.http_port", self.proxy.port)
# https agent
profile.set_preference('network.proxy.ssl', "localhost")
profile.set_preference('network.proxy.ssl_port', self.proxy.port)
# All protocols share an ip and port. If configured separately, it is not necessary to set this item because it defaults to False.
profile.set_preference("network.proxy.share_proxy_settings", True)

# Other configurations...
self.driver = webdriver.Firefox(firefox_profile=profile)

Reference resources

Posted by vamosbenedikt on Fri, 04 Oct 2019 00:10:01 -0700