这次开发的博客主要功能或特点:
第一:可以兼容各终端,特别是手机端。
第二:到时会用到大量html5,炫啊。
第三:导入博客园的精华文章,并做分类。(不要封我)
第四:做个插件,任何网站上的技术文章都可以转发收藏 到本博客。
所以打算写个系类:《一步步搭建自己的博客》
- 一步步开发自己的博客 .NET版(1、页面布局、blog迁移、数据加载)
- 一步步开发自己的博客 .NET版(2、评论功能)
- 一步步开发自己的博客 .NET版(3、注册登录功能)
- 一步步开发自己的博客 .NET版(4、文章发布功能)
- 一步步开发自己的博客 .NET版(5、搜索功能)
- 一步步开发自己的博客 .NET版(6、手机端的兼容)
演示地址:http://haojima.net/ 群内共享源码:469075305
今天来分析下 嗨-博客 中的搜索功能。搜索功能在个人网站里面要有这么个东西,但又不是特别重要。所以我们需要有,可以不用太深入了解,毕竟我们不是专门做搜索这块的。
所以,我打算把搜索分两块。一块是,用Lucene.Net实现站内搜索。一块是利用第三方搜索引擎来 实现站内搜索。
Lucene.Net简介
Lucene.net是Lucene的.net移植版本,是一个开源的全文检索引擎开发包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎。开发人员可以基于Lucene.net实现全文检索的功能。Lucene.net是Apache软件基金会赞助的开源项目,基于Apache License协议。Lucene.net并不是一个爬行搜索引擎,也不会自动地索引内容。我们得先将要索引的文档中的文本抽取出来,然后再将其加到Lucene.net索引中。标准的步骤是先初始化一个Analyzer、打开一个IndexWriter、然后再将文档一个接一个地加进去。一旦完成这些步骤,索引就可以在关闭前得到优化,同时所做的改变也会生效。这个过程可能比开发者习惯的方式更加手工化一些,但却在数据的索引上给予你更多的灵活性,而且其效率也很高。(来源百度百科)
Lucene帮助类
其实 在之前 我也是接触到过Lucene.net,那也是自己 做的个小玩意(lucene.net 3.0.3、结合盘古分词进行搜索的小例子(分页功能)》 我随意看了下,这里有个 帮助类 挺不错的,也还符合 我这样想要的效果。这里来分析下这个帮助类。
1.首先创建索引。
IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc);
这里的
directory_luce 是索引创建路径
analyzer 分析器
value 是对应 存入索引额名字和值
2.从索引里面搜索
string[] fileds = { "title", "content" };//查询字段 QueryParser parser = null; parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询 Query query = parser.Parse(keyword); int n = 1000; IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读 TopDocs docs = searcher.Search(query, (Filter)null, n); if (docs == null || docs.TotalHits == 0) { return null; } else { List<SearchResult> list = new List<SearchResult>(); int counter = 1; foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果 { try { Document doc = searcher.Doc(sd.Doc); int id = int.Parse(doc.Get("id")); string title = doc.Get("title"); string content = doc.Get("content"); string blogTag = doc.Get("blogTag"); string url = doc.Get("url"); int flag = int.Parse(doc.Get("flag")); int clickQuantity = int.Parse(doc.Get("clickQuantity")); string createdate = doc.Get("createdate"); PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>"); PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment()); highlighter.FragmentSize = 50; content = highlighter.GetBestFragment(keyword, content); string titlehighlight = highlighter.GetBestFragment(keyword, title); if (titlehighlight != "") title = titlehighlight; list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag)); } catch (Exception ex) { Console.WriteLine(ex.Message); } counter++; } return list;
3.完整代码
public class PanGuLuceneHelper { private PanGuLuceneHelper() { } #region 单一实例 private static PanGuLuceneHelper _instance = null; /// <summary> /// 单一实例 /// </summary> public static PanGuLuceneHelper instance { get { if (_instance == null) _instance = new PanGuLuceneHelper(); return _instance; } } #endregion #region 00一些属性和参数 #region Lucene.Net的目录-参数 private Lucene.Net.Store.Directory _directory_luce = null; /// <summary> /// Lucene.Net的目录-参数 /// </summary> public Lucene.Net.Store.Directory directory_luce { get { if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory); return _directory_luce; } } #endregion #region 索引在硬盘上的目录 private System.IO.DirectoryInfo _directory = null; /// <summary> /// 索引在硬盘上的目录 /// </summary> public System.IO.DirectoryInfo directory { get { if (_directory == null) { string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex"; if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath); else _directory = new System.IO.DirectoryInfo(dirPath); } return _directory; } } #endregion #region 分析器 private Analyzer _analyzer = null; /// <summary> /// 分析器 /// </summary> public Analyzer analyzer { get { { _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();// } return _analyzer; } } #endregion #region 版本号枚举类 private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30; /// <summary> /// 版本号枚举类 /// </summary> public Lucene.Net.Util.Version version { get { return _version; } } #endregion #endregion #region 01创建索引 /// <summary> /// 创建索引(先删 后更新) /// </summary> /// <param name="datalist"></param> /// <returns></returns> public bool CreateIndex(List<SearchResult> datalist) { IndexWriter writer = null; try { writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入) } catch { writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入) } foreach (SearchResult data in datalist) { writer.DeleteDocuments(new Term("id", data.id.ToString()));//新增前 删除 不然会有重复数据 CreateIndex(writer, data); } writer.Optimize(); writer.Dispose(); return true; } public bool CreateIndex(SearchResult data) { List<SearchResult> datalist = new List<SearchResult>(); datalist.Add(data); return CreateIndex(datalist); } public bool CreateIndex(IndexWriter writer, SearchResult data) { try { if (data == null) return false; Document doc = new Document(); Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名 //创建类的实例 //object obj = Activator.CreateInstance(type, true); //获取公共属性 PropertyInfo[] Propertys = type.GetProperties(); for (int i = 0; i < Propertys.Length; i++) { //Propertys[i].SetValue(Propertys[i], i, null); //设置值 PropertyInfo pi = Propertys[i]; string name = pi.Name; object objval = pi.GetValue(data, null); string value = objval == null ? "" : objval.ToString(); //值 if (name == "id" || name == "flag")//id在写入索引时必是不分词,否则是模糊搜索和删除,会出现混乱 { doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词 } else { doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED)); } } writer.AddDocument(doc); } catch (System.IO.FileNotFoundException fnfe) { throw fnfe; } return true; } #endregion #region 02在title和content字段中查询数据 /// <summary> /// 在title和content字段中查询数据 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public List<SearchResult> Search(string keyword) { string[] fileds = { "title", "content" };//查询字段 QueryParser parser = null; parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询 Query query = parser.Parse(keyword); int n = 1000; IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读 TopDocs docs = searcher.Search(query, (Filter)null, n); if (docs == null || docs.TotalHits == 0) { return null; } else { List<SearchResult> list = new List<SearchResult>(); int counter = 1; foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果 { try { Document doc = searcher.Doc(sd.Doc); int id = int.Parse(doc.Get("id")); string title = doc.Get("title"); string content = doc.Get("content"); string blogTag = doc.Get("blogTag"); string url = doc.Get("url"); int flag = int.Parse(doc.Get("flag")); int clickQuantity = int.Parse(doc.Get("clickQuantity")); string createdate = doc.Get("createdate"); PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>"); PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment()); highlighter.FragmentSize = 50; content = highlighter.GetBestFragment(keyword, content); string titlehighlight = highlighter.GetBestFragment(keyword, title); if (titlehighlight != "") title = titlehighlight; list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag)); } catch (Exception ex) { Console.WriteLine(ex.Message); } counter++; } return list; } //st.Stop(); //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>"); } #endregion #region 03在不同的分类下再根据title和content字段中查询数据(分页) /// <summary> /// 在不同的类型下再根据title和content字段中查询数据(分页) /// </summary> /// <param name="_flag">分类,传空值查询全部</param> /// <param name="keyword"></param> /// <param name="PageIndex"></param> /// <param name="PageSize"></param> /// <param name="TotalCount"></param> /// <returns></returns> public List<SearchResult> Search(string _flag, string keyword, int PageIndex, int PageSize) { if (PageIndex < 1) PageIndex = 1; Stopwatch st = Stopwatch.StartNew(); st.Start(); BooleanQuery bq = new BooleanQuery(); if (_flag != "") { QueryParser qpflag = new QueryParser(version, "flag", analyzer); Query qflag = qpflag.Parse(_flag); bq.Add(qflag, Occur.MUST);//与运算 } if (keyword != "") { string[] fileds = { "blogTag", "title", "content" };//查询字段 QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询 parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询 Query queryKeyword = parser.Parse(keyword); bq.Add(queryKeyword, Occur.MUST);//与运算 } TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false); IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读 searcher.Search(bq, collector); if (collector == null || collector.TotalHits == 0) { //TotalCount = 0; return null; } else { int start = PageSize * (PageIndex - 1); //结束数 int limit = PageSize; ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs; List<SearchResult> list = new List<SearchResult>(); int counter = 1; //TotalCount = collector.TotalHits; st.Stop(); //st.ElapsedMilliseconds;//毫秒 foreach (ScoreDoc sd in hits)//遍历搜索到的结果 { try { Document doc = searcher.Doc(sd.Doc); int id = int.Parse(doc.Get("id")); string title = doc.Get("title"); string content = doc.Get("content"); string blogTag = doc.Get("blogTag"); string url = doc.Get("url"); int flag = int.Parse(doc.Get("flag")); int clickQuantity = int.Parse(doc.Get("clickQuantity")); content = Highlight(keyword, content); //string titlehighlight = Highlight(keyword, title); //if (titlehighlight != "") title = titlehighlight; list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag)); } catch (Exception ex) { Console.WriteLine(ex.Message); } counter++; } return list; } } #endregion #region 把content按照keywords进行高亮 /// <summary> /// 把content按照keywords进行高亮 /// </summary> /// <param name="keywords"></param> /// <param name="content"></param> /// <returns></returns> private static string Highlight(string keywords, string content) { SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<strong>", "</strong>"); Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment()); highlighter.FragmentSize = 200; return highlighter.GetBestFragment(keywords, content); } #endregion #region 04删除索引 #region 删除索引数据(根据id) /// <summary> /// 删除索引数据(根据id) /// </summary> /// <param name="id"></param> /// <returns></returns> public bool Delete(string id) { bool IsSuccess = false; Term term = new Term("id", id); IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED); writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query); writer.Commit(); IsSuccess = writer.HasDeletions(); writer.Dispose(); return IsSuccess; } #endregion #region 删除全部索引数据 /// <summary> /// 删除全部索引数据 /// </summary> /// <returns></returns> public bool DeleteAll() { bool IsSuccess = true; try { IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED); writer.DeleteAll(); writer.Commit(); IsSuccess = writer.HasDeletions(); writer.Dispose(); } catch { IsSuccess = false; } return IsSuccess; } #endregion #endregion #region 分词测试 /// <summary> /// 分词测试 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public string Token(string keyword) { string ret = ""; System.IO.StringReader reader = new System.IO.StringReader(keyword); Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader); bool hasNext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasNext) { ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); ret += ita.Term + "|"; hasNext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return ret; } #endregion }
public class SearchResult { public SearchResult() { } public SearchResult(string title, string content, string url, string blogTag, int id, int clickQuantity, int flag) { this.blogTag = blogTag; this.clickQuantity = clickQuantity; this.content = content; this.id = id; this.url = url; this.title = title; this.flag = flag; } /// <summary> /// 标题 /// </summary> public string title { get; set; } /// <summary> /// 正文内容 /// </summary> public string content { get; set; } /// <summary> /// url地址 /// </summary> public string url { get; set; } /// <summary> /// tag标签 /// </summary> public string blogTag { get; set; } /// <summary> /// 唯一id /// </summary> public int id { get; set; } /// <summary> /// 点击量 /// </summary> public int clickQuantity { get; set; } /// <summary> /// 标记(用户) /// </summary> public int flag { get; set; } }