My first web crawler C ා welfare programmer car

Keywords: C# Python encoding

Recently, in conscious python, I saw Zhihu's last article (https://www.zhihu.com/question/20799742), and I climbed the video on welfare net...

I started to follow suit, but the example given by the respondent is based on python2.x, and I started to learn 3.x. after changing the print usage, there are still many modules that can't be imported, and the novice doesn't know how to solve it.

So, in order to learn to learn, I wrote a piece of code in C. With some time delay, the hard disk will occupy more than 3 g in a short time... Students, pay attention to your health

Post the code below.. Some bug s are intentionally left in the code to avoid non programmers getting on the train

class Program
    {
        static void Main(string[] args)
        {
            var baseString = "http://w*w.46ek.c*m/view/{0}.html";
            Regex regex = new Regex(@"http://m4.26ts.com/[.0-9-a-zA-Z]*.mp4");
            WebClient wc = new WebClient();


            uint startIndex = ReadStartIndex();
            uint loop = ReadLoopLen();

            for (int i = 0; i < lop; i++)
            {
                var subUrl = string.Format(baseString, startIndex + i);
                WebRequest wReq = System.Net.WebRequest.Create(subUrl)

                try
                {
                    WebResponse wResp = wReq.GetResponse();
                    Stream respStream = wResp.GetResponseStream();

                    using (StreamReader reader = new StreamReader(respStream, Encoding.GetEncoding("GB18030")))
                    {
                        var htmlString = reader.ReadToEnd();

                        Match m = regex.Match(htmlString);
                        if (m.Success)
                        {
                            DownloadFile(wc, m.Value, string.Format("{0}.mp4", startIndex + i));
                        }
                    }
                }
                catch (Exception exc)
                {
                    Console.WriteLine("Error : {0}", exc.Message);
                }

                Thread.Sleep(5);
            }
            
        }

        private static uint ReadStartIndex()
        {
            while (true)
            {
                Console.Write("Set start index :");

                string line = Console.ReadLine();

                uint index = 0;

                if (UInt32.TryParse(line, out index))
                {
                    Console.WriteLine("Start index setted : "+ index);
                    return index;
                }

                Thread.Sleep(500);
            }
        }

        private static uint ReadLoopLen()
        {
            while (true)
            {
                Console.Write("Set loop len :");

                string line = Console.ReadLine();

                uint index = 0;

                if (UInt32.TryParse(line, out index))
                {
                    Console.WriteLine("Loop len setted : " + index);
                    return index;
                }

                Thread.Sleep(500);
            }
        }

        private static void DownloadFile(WebClient wc, string url, string localname)
        {
            Console.WriteLine("Downloading file {1} to {2}", url, localname);

            wc.DownloadFile(url, localname);

            Console.WriteLine("File {0} download completed!", localname);
        }

Posted by gobbles on Thu, 30 Apr 2020 04:46:48 -0700