【问题标题】:Parse Google SERPs in C#在 C# 中解析 Google SERP
【发布时间】:2019-11-15 04:06:46
【问题描述】:

在 C# 中解析 Google SERP - 我认为正则表达式是问题所在。你能帮助我吗? 它总是返回位置 0。

        public static int GetPosition(Uri url, string searchTerm)
        {

            string text = string.Format("http://www.google.com/search?num=1000&q={0}&btnG=Search", HttpUtility.UrlEncode(searchTerm));
            Console.WriteLine(text);
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(text);
            using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
            {
                using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII))
                {
                    string html = reader.ReadToEnd();
                    return FindPosition(html, url);
                }
            }
        }
        private static int FindPosition(string html, Uri url)
        {
            string lookup = "(<h3 class=\"r\"><a href=\"/url\\?q=)(\\w+[a-zA-Z0-9.\\-?=/:]*)";
            [...]
        }
    }
}

【问题讨论】:

    标签: c# regex parsing


    【解决方案1】:
     public static int GetPosition(Uri url, string searchTerm)
            {
    
                string text = string.Format("http://www.google.com/search?num=1000&q={0}&btnG=Search", HttpUtility.UrlEncode(searchTerm));
                Console.WriteLine(text);
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(text);
                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                {
                    using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII))
                    {
                        string html = reader.ReadToEnd();
                        return FindPosition(html, url);
                    }
                }
            }
            private static int FindPosition(string html, Uri url)
            {
                var reg = new Regex("<a href=\"/url\\?q=\\w+[a-zA-Z0-9.\\-?=/:]*");
                var position = 0;
                var index = 1;
                foreach (var match in reg.Matches(html))
                {
                    if (match.ToString().Contains(url.ToString()))
                    {
                        position = index;
                        break;
                    }
                    index++;
                }
                return position;
            }
    

    【讨论】:

      猜你喜欢
      • 2020-02-28
      • 2017-04-10
      • 2011-06-30
      • 1970-01-01
      • 1970-01-01
      • 2012-07-16
      • 1970-01-01
      • 1970-01-01
      • 2013-01-19
      相关资源
      最近更新 更多