Use and proxy configuration based on Python Selenium

Keywords: network Firefox Selenium Python


Basic use

Initialize driver


options = ChromeOptions()
# Ignore https warnings
# Headless mode
options.headless = True
# window.navigator.webdriver=true modification
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# Open driver
self.driver = webdriver.Chrome(chrome_options=options)
# Set wait for timeout
self.wait = WebDriverWait(self.driver, 10)


profile = FirefoxProfile()
profile.set_preference('permissions.default.image', 2)  # Disable loading pictures
# Open driver
self.driver = webdriver.Firefox(firefox_profile=profile)
# Setting timeout time
self.wait = WebDriverWait(self.driver, 10)

Open web page

# Empty cookies
# Open web page

Selection Tags

There are many ways to choose, according to the tag id, class, class selector, xpath and so on. If it is not found, an exception message will be reported.

# single
# Multiple
# ...

You can also use wait s with timeouts to get Tags

self.wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='JDJRV-bigimg']/img")))

Action events


# Click through the label
# Click through js
self.driver.execute_script("arguments[0].click();", element)

# Click and hold a label (not required)
# Lifting key


# Move to the specified label
# Move the specified pixel
ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=y).perform()


# input
# empty


# js mode
self.driver.execute_script("window.scrollBy(0, 700)")
# Simulation mode # headless may fail, or you may need to click on it before.

Grab the network

First, selenium needs to rely on third-party component browsermobproxy to intercept the network list of pages. Through its proxy service interface, it is set to selenium, so that when we operate the page, we can grab some asynchronous request lists (http, https) of the page.

Initialize browsermob-proxy

First, download the browsermob-proey reless package, as the project is implemented in java. So we need to call the corresponding files in bin directory according to our own platform.

Then the basic usage is browsermob-proxy function.

def _init_proxy(self):
    //Initialization of proxy services
    # Proxy service (here is the call file for macOX)
    path = r"/Users/xxx/browsermob-proxy-2.1.4/bin/browsermob-proxy"
    # Initialize a proxy Manager service and listen on port 8180
    self.server = browsermobproxy.Server(path=path, options={'port': 8180})
    # Start the proxy Manager service
    # Apply for a proxy service from the proxy Manager Service
    self.proxy = self.server.create_proxy()
def _open_proxy(self, ref):
    //Open proxy monitoring (to open monitoring before opening a web page)
    :param ref:Name of registration
    options = {'captureContent': True, 'captureHeaders': True}
    self.proxy.new_har(ref, options=options)
def _get_network(self):
    //Get the list of requests
    # Take out the request list
    result = self.proxy.har
    # Traversing request list information
    for entry in result['log']['entries']:
        req_url = entry['request']['url']
        resp_content = entry["response"]['content']["text"]

chrome agent

options = ChromeOptions()
# options.add_argument('--proxy-server={0}'.format(self.proxy.proxy))
options.add_argument('--proxy-server={host}:{port}'.format(host="localhost", port=self.proxy.port))

# Other configurations...
self.driver = webdriver.Chrome(chrome_options=options)

firefox proxy

profile = FirefoxProfile()

# http proxy
profile.set_preference("network.proxy.type", 1)
# ip and its port number are configured as http protocol proxy
profile.set_preference("network.proxy.http", "localhost")
profile.set_preference("network.proxy.http_port", self.proxy.port)
# https agent
profile.set_preference('network.proxy.ssl', "localhost")
profile.set_preference('network.proxy.ssl_port', self.proxy.port)
# All protocols share an ip and port. If configured separately, it is not necessary to set this item because it defaults to False.
profile.set_preference("network.proxy.share_proxy_settings", True)

# Other configurations...
self.driver = webdriver.Firefox(firefox_profile=profile)

Reference resources

Posted by vamosbenedikt on Fri, 04 Oct 2019 00:10:01 -0700