这次开发的博客主要功能或特点:
    第一:可以兼容各终端,特别是手机端。
    第二:到时会用到大量html5,炫啊。
    第三:导入博客园的精华文章,并做分类。(不要封我)
    第四:做个插件,任何网站上的技术文章都可以转发收藏 到本博客。

所以打算写个系类:《一步步搭建自己的博客

 

演示地址:http://haojima.net/      群内共享源码:469075305 

 

一步步开发自己的博客  .NET版(5、Lucenne.Net 和 必应站内搜索)

今天来分析下 嗨-博客 中的搜索功能。搜索功能在个人网站里面要有这么个东西,但又不是特别重要。所以我们需要有,可以不用太深入了解,毕竟我们不是专门做搜索这块的。

所以,我打算把搜索分两块。一块是,用Lucene.Net实现站内搜索。一块是利用第三方搜索引擎来 实现站内搜索。

 

 

Lucene.Net简介

Lucene.net是Lucene的.net移植版本,是一个开源的全文检索引擎开发包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎。开发人员可以基于Lucene.net实现全文检索的功能。
Lucene.net是Apache软件基金会赞助的开源项目,基于Apache License协议。
Lucene.net并不是一个爬行搜索引擎,也不会自动地索引内容。我们得先将要索引的文档中的文本抽取出来,然后再将其加到Lucene.net索引中。标准的步骤是先初始化一个Analyzer、打开一个IndexWriter、然后再将文档一个接一个地加进去。一旦完成这些步骤,索引就可以在关闭前得到优化,同时所做的改变也会生效。这个过程可能比开发者习惯的方式更加手工化一些,但却在数据的索引上给予你更多的灵活性,而且其效率也很高。(来源百度百科)

Lucene帮助类

其实 在之前 我也是接触到过Lucene.net,那也是自己 做的个小玩意(lucene.net 3.0.3、结合盘古分词进行搜索的小例子(分页功能)》 我随意看了下,这里有个 帮助类 挺不错的,也还符合 我这样想要的效果。这里来分析下这个帮助类。 

1.首先创建索引。      

IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(doc);

这里的 

directory_luce 是索引创建路径

analyzer 分析器

value 是对应 存入索引额名字和值

2.从索引里面搜索       

            string[] fileds = { "title", "content" };//查询字段           
            QueryParser parser = null;
            parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
            Query query = parser.Parse(keyword);
            int n = 1000;
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            TopDocs docs = searcher.Search(query, (Filter)null, n);
            if (docs == null || docs.TotalHits == 0)
            {
                return null;
            }
            else
            {
                List<SearchResult> list = new List<SearchResult>();
                int counter = 1;
                foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        int id = int.Parse(doc.Get("id"));
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string blogTag = doc.Get("blogTag");
                        string url = doc.Get("url");
                        int flag = int.Parse(doc.Get("flag"));
                        int clickQuantity = int.Parse(doc.Get("clickQuantity"));

                        string createdate = doc.Get("createdate");
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;

                        list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;

3.完整代码

 public class PanGuLuceneHelper
    {
        private PanGuLuceneHelper() { }

        #region 单一实例
        private static PanGuLuceneHelper _instance = null;
        /// <summary>
        /// 单一实例
        /// </summary>
        public static PanGuLuceneHelper instance
        {
            get
            {
                if (_instance == null) _instance = new PanGuLuceneHelper();
                return _instance;
            }
        }
        #endregion

        #region 00一些属性和参数
        #region Lucene.Net的目录-参数
        private Lucene.Net.Store.Directory _directory_luce = null;
        /// <summary>
        /// Lucene.Net的目录-参数
        /// </summary>
        public Lucene.Net.Store.Directory directory_luce
        {
            get
            {
                if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
                return _directory_luce;
            }
        }
        #endregion

        #region 索引在硬盘上的目录
        private System.IO.DirectoryInfo _directory = null;
        /// <summary>
        /// 索引在硬盘上的目录
        /// </summary>
        public System.IO.DirectoryInfo directory
        {
            get
            {
                if (_directory == null)
                {
                    string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
                    if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
                    else _directory = new System.IO.DirectoryInfo(dirPath);
                }
                return _directory;
            }
        }
        #endregion

        #region 分析器
        private Analyzer _analyzer = null;
        /// <summary>
        /// 分析器
        /// </summary>
        public Analyzer analyzer
        {
            get
            {
                {
                    _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//                   
                }
                return _analyzer;
            }
        }
        #endregion

        #region 版本号枚举类
        private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
        /// <summary>
        /// 版本号枚举类
        /// </summary>
        public Lucene.Net.Util.Version version
        {
            get
            {
                return _version;
            }
        }
        #endregion
        #endregion

        #region 01创建索引
        /// <summary>
        /// 创建索引(先删 后更新)
        /// </summary>
        /// <param name="datalist"></param>
        /// <returns></returns>
        public bool CreateIndex(List<SearchResult> datalist)
        {
            IndexWriter writer = null;
            try
            {
                writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)
            }
            catch
            {
                writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)
            }
            foreach (SearchResult data in datalist)
            {
                writer.DeleteDocuments(new Term("id", data.id.ToString()));//新增前 删除  不然会有重复数据
                CreateIndex(writer, data);
            }
            writer.Optimize();
            writer.Dispose();
            return true;
        }
        public bool CreateIndex(SearchResult data)
        {
            List<SearchResult> datalist = new List<SearchResult>();
            datalist.Add(data);
            return CreateIndex(datalist);
        } 

        public bool CreateIndex(IndexWriter writer, SearchResult data)
        {
            try
            {

                if (data == null) return false;
                Document doc = new Document();
                Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名    

                //创建类的实例    
                //object obj = Activator.CreateInstance(type, true);  
                //获取公共属性    
                PropertyInfo[] Propertys = type.GetProperties();
                for (int i = 0; i < Propertys.Length; i++)
                {
                    //Propertys[i].SetValue(Propertys[i], i, null); //设置值
                    PropertyInfo pi = Propertys[i];
                    string name = pi.Name;
                    object objval = pi.GetValue(data, null);
                    string value = objval == null ? "" : objval.ToString(); //
                    if (name == "id" || name == "flag")//id在写入索引时必是不分词,否则是模糊搜索和删除,会出现混乱
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词
                    }
                    else
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
                    }
                }
                writer.AddDocument(doc);
            }
            catch (System.IO.FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            return true;
        }
        #endregion

        #region 02在title和content字段中查询数据
        /// <summary>
        /// 在title和content字段中查询数据
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public List<SearchResult> Search(string keyword)
        {
            string[] fileds = { "title", "content" };//查询字段           
            QueryParser parser = null;
            parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
            Query query = parser.Parse(keyword);
            int n = 1000;
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            TopDocs docs = searcher.Search(query, (Filter)null, n);
            if (docs == null || docs.TotalHits == 0)
            {
                return null;
            }
            else
            {
                List<SearchResult> list = new List<SearchResult>();
                int counter = 1;
                foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        int id = int.Parse(doc.Get("id"));
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string blogTag = doc.Get("blogTag");
                        string url = doc.Get("url");
                        int flag = int.Parse(doc.Get("flag"));
                        int clickQuantity = int.Parse(doc.Get("clickQuantity"));

                        string createdate = doc.Get("createdate");
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;

                        list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");

        }
        #endregion

        #region 03在不同的分类下再根据title和content字段中查询数据(分页)
        /// <summary>
        /// 在不同的类型下再根据title和content字段中查询数据(分页)
        /// </summary>
        /// <param name="_flag">分类,传空值查询全部</param>
        /// <param name="keyword"></param>
        /// <param name="PageIndex"></param>
        /// <param name="PageSize"></param>
        /// <param name="TotalCount"></param>
        /// <returns></returns>
        public List<SearchResult> Search(string _flag, string keyword, int PageIndex, int PageSize)
        {
            if (PageIndex < 1) PageIndex = 1;
            Stopwatch st = Stopwatch.StartNew();
            st.Start();
            BooleanQuery bq = new BooleanQuery();
            if (_flag != "")
            {
                QueryParser qpflag = new QueryParser(version, "flag", analyzer);
                Query qflag = qpflag.Parse(_flag);
                bq.Add(qflag, Occur.MUST);//与运算
            }
            if (keyword != "")
            {
                string[] fileds = { "blogTag", "title", "content" };//查询字段
                QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询
                parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
                Query queryKeyword = parser.Parse(keyword);
                bq.Add(queryKeyword, Occur.MUST);//与运算
            }

            TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            searcher.Search(bq, collector);

            if (collector == null || collector.TotalHits == 0)
            {
                //TotalCount = 0;
                return null;
            }
            else
            {
                int start = PageSize * (PageIndex - 1);
                //结束数
                int limit = PageSize;
                ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
                List<SearchResult> list = new List<SearchResult>();
                int counter = 1;
                //TotalCount = collector.TotalHits;
                st.Stop();
                //st.ElapsedMilliseconds;//毫秒
                foreach (ScoreDoc sd in hits)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        int id = int.Parse(doc.Get("id"));
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string blogTag = doc.Get("blogTag");
                        string url = doc.Get("url");
                        int flag = int.Parse(doc.Get("flag"));
                        int clickQuantity = int.Parse(doc.Get("clickQuantity"));
                        content = Highlight(keyword, content);
                        //string titlehighlight = Highlight(keyword, title);
                        //if (titlehighlight != "") title = titlehighlight;
                        list.Add(new SearchResult(title, content, url, blogTag, id, clickQuantity, flag));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
        }
        #endregion

        #region 把content按照keywords进行高亮
        /// <summary>
        /// 把content按照keywords进行高亮
        /// </summary>
        /// <param name="keywords"></param>
        /// <param name="content"></param>
        /// <returns></returns>
        private static string Highlight(string keywords, string content)
        {
            SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<strong>", "</strong>");
            Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment());
            highlighter.FragmentSize = 200;
            return highlighter.GetBestFragment(keywords, content);
        }
        #endregion

        #region 04删除索引
        #region 删除索引数据(根据id)
        /// <summary>
        /// 删除索引数据(根据id)
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public bool Delete(string id)
        {
            bool IsSuccess = false;
            Term term = new Term("id", id);
            IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);            
            writer.Commit();
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

        #region 删除全部索引数据
        /// <summary>
        /// 删除全部索引数据
        /// </summary>
        /// <returns></returns>
        public bool DeleteAll()
        {
            bool IsSuccess = true;
            try
            {
                IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteAll();
                writer.Commit();
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
            }
            catch
            {
                IsSuccess = false;
            }
            return IsSuccess;
        }
        #endregion
        #endregion

        #region 分词测试
        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";
            System.IO.StringReader reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return ret;
        }
        #endregion

    }
View Code
public class SearchResult
    {
        public SearchResult() { }

        public SearchResult(string title, string content, string url, string blogTag, int id, int clickQuantity, int flag)
        {
            this.blogTag = blogTag;
            this.clickQuantity = clickQuantity;
            this.content = content;
            this.id = id;
            this.url = url;
            this.title = title;
            this.flag = flag;
        }
        /// <summary>
        /// 标题
        /// </summary>
        public string title { get; set; }
        /// <summary>
        /// 正文内容
        /// </summary>
        public string content { get; set; }
        /// <summary>
        /// url地址
        /// </summary>
        public string url { get; set; }
        /// <summary>
        /// tag标签
        /// </summary>
        public string blogTag { get; set; }
        /// <summary>
        /// 唯一id
        /// </summary>
        public int id { get; set; }
        /// <summary>
        /// 点击量
        /// </summary>
        public int clickQuantity { get; set; }
        /// <summary>
        /// 标记(用户) 
        /// </summary>
        public int flag { get; set; }
    }
View Code

相关文章: