正则表达式匹配html元素的时候,正则表达式字符串,和源字符串都不要Server.HtmlEncode() 编码了,直接匹配
1.html 内容如下:
<td class="coln_2">
<div class="divTrueName">
<!--{start}-->
<a >每周都来</span></div>
</td>
C#代码如下:
Server.HtmlEncode
string s1, s2, s3, s4;
FileStream fs = new FileStream(Server.MapPath("1.htm"), FileMode.Open);
StreamReader sr = new StreamReader(fs);
string str = sr.ReadToEnd();
sr.Dispose();
string fliter_name = "(?<= <a id=\"tbl_tr_name_qt_1\" href=\"http://shanghai.anjuke.com/shop/view/316462\" class=\"aTrueName\" target=\"_blank\" >)[\\s\\S]*"+"(?=</a>)"; //不要编码
string fliter_company = "(?<=<div class=\"divCompanyStore\" id=\"tbl_tr_cpn_qt_1\">)[\\s\\S]*?[^<](?=</div>)"; //不要编码
string fliter_phone = "(?<=<div class=\"divUserMobile\" id=\"tbl_tr_mb_qt_1\">)[\\s\\S]*?(?=</div>)"; //不要编码
string fliter_area = "(?<=<div class=\"divArea\" id=\"tbl_tr_area_qt_1\">)[\\s\\S]*?(?=</div>)"; //不要编码
Match name = Regex.Match(str, fliter_name);//直接匹配,下面的也是,这样就能匹配到
Match company=Regex.Match(str,fliter_company);
Match area=Regex.Match(str,fliter_area);
Match phone = Regex.Match(str,fliter_phone);
s1 = name.Value;
s2 = company.Value;
s3 = area.Value;
s4 = phone.Value;
Response.Write(s1+s2+s3+s4);