Sesame HTTP: setup of crawler Selenium+Chrome agent

Keywords: Python Selenium Google Pycharm JSON

Micro blog login limits the number of errors... In addition, a large number of Cookie accounts are blocked and need to be removed from the Cookie pool... Need to use a proxy... Rogue Baidu has been special for most of the day??? It turns out that Google can solve the problem in minutes? What else can baidu do besides sell fake medicine

The Selenium+Chrome authentication agent cannot be processed through options. It can only be solved by extension in another way

Original address: https://stackoverflow.com/questions/29983106/how-can-i-set-proxy-with-authentication-in-selenium-chrome-web-driver-using-pythාanswer-30953780 (Stack Overflow is a good place)

Leave you!

​
# -*- coding: utf-8 -*-
# @Time    : 2017/11/15 9:50
# @Author: ouch
# @Site    : 
# @File    : pubilc.py
# @Software: PyCharm

import string
import zipfile

def create_proxyauth_extension(proxy_host, proxy_port,
                               proxy_username, proxy_password,
                               scheme='http', plugin_path=None):
    """Proxy authentication plug-in

    args:
        proxy_host (str): Your proxy address or domain name( str Type)
        proxy_port (int): Agent port number( int Type)
        proxy_username (str):User name (string)
        proxy_password (str): Password (string)
    kwargs:
        scheme (str): Agent mode default http
        plugin_path (str): Absolute path of extension

    return str -> plugin_path
    """
    

    if plugin_path is None:
        plugin_path = 'vimm_chrome_proxyauth_plugin.zip'

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
    """
    var config = {
            mode: "fixed_servers",
            rules: {
              singleProxy: {
                scheme: "${scheme}",
                host: "${host}",
                port: parseInt(${port})
              },
              bypassList: ["foobar.com"]
            }
          };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
                username: "${username}",
                password: "${password}"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )
    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path

​

usage method:

from selenium import webdriver
from common.pubilc import create_proxyauth_extension
 
proxyauth_plugin_path = create_proxyauth_extension(
    proxy_host="XXXXX.com",
    proxy_port=9020,
    proxy_username="XXXXXXX",
    proxy_password="XXXXXXX"
)
 
 
co = webdriver.ChromeOptions()
# co.add_argument("--start-maximized")
co.add_extension(proxyauth_plugin_path)
 
 
driver = webdriver.Chrome(executable_path="C:\chromedriver.exe", chrome_options=co)
driver.get("http://ip138.com/")
print(driver.page_source)
 

​

No authentication agent:

options = webdriver.ChromeOptions()
options.add_argument('--proxy-server=http://ip:port')  
driver = webdriver.Chrome(executable_path="C:\chromedriver.exe", chrome_options=0ptions)
driver.get("http://ip138.com/")
print(driver.page_source)

So Easy

Posted by Arbitus on Sat, 02 May 2020 01:10:23 -0700