Htmalabilitypack + C as IP agent crawler

Keywords: Big Data encoding Database Windows Google

I. search for data and access as many IP proxy data as possible, and store the IP proxy pool
2. Filter the data from the agent pool and add the valid data to another table, and update it in time
III. update IP proxy pool regularly
Because the IP address of the website needs to be updated in real time, and the program needs to filter the crawler IP data in the database in time and insert it into the new table.
1. Check whether IP agent is available

    public static void SelectIP(object ip) //Assign port data
            {
                List<string> ips = ip as List<string>;
                var result = Parallel.ForEach(ips, (row) =>
                  {
                      if (!string.IsNullOrEmpty(row))
                          FilterIP(row);
                  });

            }
       
        public static int i = 0;
        public static void FilterIP(string ip)//Verify IP proxy
        {
           System.GC.Collect();
            var request = (HttpWebRequest)WebRequest.Create(GetStr());
            request.Proxy = new WebProxy(ip.ToString());
request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");//Define gzip compression page support
            request.ContentType = "application/x-www-form-urlencoded";//Define document type and code
            request.AllowAutoRedirect = false;//No auto jump
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36";//Set up user agent and pretend to be Google Chrome browser
            request.KeepAlive = false;
            request.Timeout = 7000;
            request.Method = "POST";
            Random random = new Random();
            try
            {

                var requestStream = request.GetRequestStream();
                Encoding bin = Encoding.GetEncoding("UTF-8");
                var data = Encoding.UTF8.GetBytes("a=10&b=15");
                requestStream.Write(data, 0, data.Length);
                var response = request.GetResponse();

                using (StreamReader reader = new StreamReader(response.GetResponseStream(), bin))
                {
                    i++;
                    Console.ForegroundColor = ConsoleColor.Green;
                    Console.WriteLine(DateTime.Now.ToLongTimeString() + "  " + i +"  Success:  " + ip.ToString());
                 
                }
                response.Close();
                requestStream.Flush();
            }
            catch (Exception e)
            {
                i++;
                Console.ForegroundColor = ConsoleColor.White;
                Console.WriteLine(DateTime.Now.ToLongTimeString()+"  "+i+"  Fail:  " + e.Message);
             
        }

2. Save the available IP proxy to TipPool

   string[] sstring = ip.ToString().Split(':');
                SqlParameter[] sps = new SqlParameter[] {
                    new SqlParameter("@ip",sstring[0]),
                    new SqlParameter("@port",Convert.ToInt32(sstring[1]))
                };
                string str = "[dbo].[S_store_delete_ip]";

                try
                {
                    Action th = () =>
                    {
                        SqlHelper.ExecuteNonquery(str, CommandType.StoredProcedure, sps);
                    };
                    th.BeginInvoke(null, null);
                }
                finally { }
            }

3. When the IP is not available, delete the IP in the new table TipPool

 string[] sstring = ip.ToString().Split(':');
                    SqlParameter[] sps = new SqlParameter[] {
                        new SqlParameter("@ip",sstring[0]),
                        new SqlParameter("@port",Convert.ToInt32(sstring[1]))
                    };
                    string str = "[dbo].[S_store_Insert_ip]";

                    try
                    {
                        Action th = () =>
                        {
                            SqlHelper.ExecuteNonquery(str, CommandType.StoredProcedure, sps);
                        };
                        th.BeginInvoke(null, null);
                    }
                    finally { }

4. The effect of the program is

Posted by paradigmapc on Thu, 21 Nov 2019 12:23:12 -0800