string PageUrl = "http://news.sina.com.cn/china/sz/";
string url = "";
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
Byte[] pageData = wc.DownloadData(PageUrl);
string content = Encoding.Default.GetString(pageData);
MatchCollection mc = Regex.Matches(content, @"<A\shref=(?<url>.*(?<date>(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})).*shtml)\s*TARGET=_blank>(?<subject>.*)</a>", RegexOptions.IgnoreCase);
string OriginalURL;
string Subject;
string date;
string month;
string day;
foreach (Match match in mc)
{
OriginalURL = match.Groups["url"].Value;//新闻内容链接,如"/c/2006-02-19/19568247131s.shtml "
//WebClient wc2 = new WebClient();
//wc2.Credentials = CredentialCache.DefaultCredentials;
/////方法一:
//Byte[] pageData1 = wc2.DownloadData("http://sports.sina.com.cn" + OriginalURL);
//string content1 = Encoding.Default.GetString(pageData1);
///方法二:
WebRequest request = WebRequest.Create("http://news.sina.com.cn" + OriginalURL);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
string HTML = "";
string sLine = "";
int i = 0;
while (sLine != null)
{
i++;
sLine = sr.ReadLine();
if (sLine != null)
HTML += sLine;
}
HTML = HTML.Replace("<", "<");
HTML = HTML.Replace(">", ">");
//Regex.Split(HTML, @"<!--正文内容开始-->", RegexOptions.IgnoreCase);
//int start, stop;
//start = HTML.IndexOf("<!--正文内容开始-->", 0, HTML.Length);
//stop = HTML.IndexOf("<!--正文内容结束-->", start);
string[] strTemp = Regex.Split(HTML, @"<!--正文内容开始-->", RegexOptions.IgnoreCase);
string[] strTemp1 = Regex.Split(strTemp[1].ToString(), @"<!--正文内容结束-->", RegexOptions.IgnoreCase);
string temp = strTemp1[0].ToString();
Subject = match.Groups["subject"].Value;//新闻标题,如"巴基斯坦总统穆沙拉夫抵京访华"
date = match.Groups["year"].Value;//新闻发布时间,从新闻链接中提取,如“2006-02-19”
month = match.Groups["month"].Value;
day = match.Groups["day"].Value;
int rowsAffected;
SqlParameter[] parameters = {
new SqlParameter("@NewsID", SqlDbType.Int,4),
new SqlParameter("@NewsTitle", SqlDbType.NVarChar),
new SqlParameter("@NewsContent", SqlDbType.Text),
new SqlParameter("@AdminID", SqlDbType.Int,4),
new SqlParameter("@Createtime", SqlDbType.DateTime),
new SqlParameter("@categoryid", SqlDbType.Int,4)};
parameters[0].Direction = ParameterDirection.Output;
parameters[1].Value = Subject;
parameters[2].Value = temp;
parameters[3].Value = 1;
parameters[4].Value = System.DateTime.Now;
parameters[5].Value = 2;
DbHelperSQL.RunProcedure("UP_News_ADD", parameters, out rowsAffected);
url += "<tr><td><a href=" + OriginalURL + ">" + Subject + "</a>(" + date + "-" + month + "-" + day + ")</td></tr> ";
flag++;
if (flag >= 10)
break;
}
lblhtml.Text = url;