1 //你接下来的所有get和post请求都带上已经获取的cookie,因为稍大些的网站的登陆验证全靠cookie 2 static CookieContainer cc = new CookieContainer(); 3 4 /// <summary> 5 /// 登录新浪微博 6 /// </summary> 7 /// <param name="strUserName">用户名</param> 8 /// <param name="strPassword">密码</param> 9 /// <returns></returns> 10 private int login_weibo(string strUserName, string strPassword) 11 { 12 string strUrl = 13 string.Format( 14 "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=%s&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.15)&_=1400822309846{0}", 15 strUserName); 16 17 //==========================获取servertime , pcid , pubkey , rsakv=========================== 18 //预登陆请求,获取到若干参数 19 var webClient = new WebClientEx(cc); 20 webClient.Headers.Add("Content-Type", "application/x-www-form-urlencoded"); 21 string res = webClient.DownloadString(strUrl); 22 //下面获取的四个值都是接下来要使用的 23 int start = res.IndexOf("servertime"); 24 if (start < 0 || start >= res.Count()) return -1; 25 int end = res.IndexOf(\',\', start); 26 if (end < 0 || end >= res.Count()) return -1; 27 string servertime = res.Substring(start + 12, end - start - 12); 28 29 start = res.IndexOf("nonce"); 30 if (start < 0 || start >= res.Count()) return -1; 31 end = res.IndexOf(\',\', start); 32 if (end < 0 || end >= res.Count()) return -1; 33 string nonce = res.Substring(start + 8, end - start - 9); 34 35 start = res.IndexOf("pubkey"); 36 if (start < 0 || start >= res.Count()) return -1; 37 end = res.IndexOf(\',\', start); 38 if (end < 0 || end >= res.Count()) return -1; 39 string pubkey = res.Substring(start + 9, end - start - 10); 40 41 42 start = res.IndexOf("rsakv"); 43 if (start < 0 || start >= res.Count()) return -1; 44 end = res.IndexOf(\',\', start); 45 if (end < 0 || end >= res.Count()) return -1; 46 string rsakv = res.Substring(start + 8, end - start - 9); 47 //===============对用户名和密码加密================ 48 //好,你已经来到登陆新浪微博最难的一部分了,如果这部分没有大神出来指点一下,那就真是太难了,我也不想多说什么,反正就是各种加密,最后形成了加密后的su和sp 49 string su = GetBase64(strUserName); 50 51 string message = servertime + \'\t\' + nonce + \'\n\' + strPassword; 52 var result = new BigInteger(pubkey, 16); 53 String sp = RsaEncrypt(string.Format("{0}", result), message); 54 55 //=======================登录======================= 56 57 //param就是激动人心的登陆post参数,这个参数用到了若干个上面第一步获取到的数据,可说的不多 58 var data = new NameValueCollection 59 { 60 {"entry", "weibo"}, 61 {"gateway", "1"}, 62 {"from", ""}, 63 {"savestate", "7"}, 64 {"useticket", "1"}, 65 { 66 "pagerefer", 67 "http://login.sina.com.cn/sso/logout.php?entry=miniblog&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D" 68 }, 69 {"vsnf", "1"}, 70 {"su", su}, 71 {"service", "miniblog"}, 72 {"servertime", servertime}, 73 {"nonce", nonce}, 74 {"pwencode", "rsa2"}, 75 {"rsakv", rsakv}, 76 {"sp", sp}, 77 {"sr", "1680*1050"}, 78 {"encoding", "UTF-8"}, 79 {"prelt", "961"}, 80 { 81 "url", 82 "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack" 83 } 84 }; 85 86 var response = webClient.UploadValues(@"http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)", data); 87 88 string strHtml = System.Text.Encoding.Default.GetString(response); 89 90 start = strHtml.IndexOf("location.replace("); 91 if (start < 0 || start >= strHtml.Count()) return -1; 92 end = strHtml.IndexOf(")", start); 93 if (end < 0 || end >= strHtml.Count()) return -1; 94 string strRealUrl = strHtml.Substring(start + 18, end - start - 19); 95 96 webClient.DownloadString(strRealUrl); 97 98 //好了,当你的代码执行到这里时,已经完成了大部分了,可是有很多爬虫童鞋跟我一样还就栽在了这里,假如你跳过这里直接去执行获取粉丝的这几行代码你就会发现你获取的到还是让你登陆的页面,真郁闷啊,我就栽在这里长达一天啊 99 //好了,我们还是继续。这个urll是登陆之后新浪返回的一段脚本中定义的一个进一步登陆的url,之前还都是获取参数和验证之类的,这一步才是真正的登陆,所以你还需要再一次把这个urll获取到并用get登陆即可 100 101 var pageDownLoad = new PageDownLoad {Cookies = cc}; 102 pageDownLoad.GetHtmlCode("http://weibo.com/u/2070572485/home?wvr=5&topnav=1&mod=logo"); 103 104 webBrowser1.DocumentText = pageDownLoad.HtmlCode; 105 return 0; 106 } 107 108 109 private string GetBase64(string mystr) 110 { 111 //先把字符串按照utf-8的编码转换成byte[] 112 Encoding myEncoding = Encoding.GetEncoding("utf-8"); 113 //myByte中获得这样的字节数组:228,184,173,229,141,142,228,186,186,230,176,145,229,133,177,229,146,140,229,155,189 114 byte[] myByte = myEncoding.GetBytes(mystr); 115 //把byte[]转成base64编码,这个例子形成的base64编码的unicode等价字符串为:"5Lit5Y2O5Lq65rCR5YWx5ZKM5Zu9" 116 return Convert.ToBase64String(myByte); 117 } 118 119 /// <summary> 120 /// RSA加密 121 /// </summary> 122 /// <param name="strPublickey"></param> 123 /// <param name="content"></param> 124 /// <returns></returns> 125 public string RsaEncrypt(string strPublickey, string content) 126 { 127 var rsa = new RSACryptoServiceProvider(); 128 var param = new RSAParameters(); 129 byte[] bdata = GetBytes("65537"); 130 param.Exponent = bdata; 131 param.Modulus = GetBytes(strPublickey); 132 rsa.ImportParameters(param); 133 134 byte[] cipherbytes = rsa.Encrypt(Encoding.UTF8.GetBytes(content),false); 135 136 return BitConverter.ToString(cipherbytes).Replace("-", string.Empty); 137 } 138 139 public byte[] GetBytes(String num) 140 { 141 BigInteger n = new BigInteger(num, 10); 142 String s = n.ToString(2); 143 if (s.Length % 8 > 0) 144 { 145 s = new String(\'0\', 8 - s.Length % 8) + s; 146 } 147 byte[] data = new byte[s.Length / 8]; 148 String ocetstr; 149 for (int i = 0; i < data.Length; i++) 150 { 151 ocetstr = s.Substring(8 * i, 8); 152 data[i] = Convert.ToByte(ocetstr, 2); 153 } 154 return data; 155 }