【问题标题】:How activate stemming on my Lucene search code如何在我的 Lucene 搜索代码上激活词干提取
【发布时间】:2015-12-07 15:39:10
【问题描述】:

有人可以帮助激活我的代码中的词干。尝试了很多但没有多大成功:(

我当前的代码

    Directory createIndex(DataTable table)
    {
        var directory = new RAMDirectory();
        using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
        using (var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000)))
        {
            foreach (DataRow row in table.Rows)
            {
                var document = new Document();
                document.Add(new Field("DishName", row["DishName"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                document.Add(new Field("CustomisationID", row["CustomisationID"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                writer.AddDocument(document);
            }

            writer.Optimize();
            writer.Flush(true, true, true);
        }
        return directory;
    }

    private DataTable SearchDishName(string textSearch)
    {
        string MatchingCutomisationIDs = "0"; //There is no Dish with ID zero, this is just to easen the coding.. 
        var ds = new DataSet();
        ds.ReadXml(System.Web.HttpContext.Current.Server.MapPath("~/App_data/MyDataset.xml"));
        DataTable Sample = new DataTable();
        Sample = ds.Tables[0];
        var table = Sample.Clone();
        var Index = createIndex(Sample);
        using (var reader = IndexReader.Open(Index, true))
        using (var searcher = new IndexSearcher(reader))
        {
            using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
            {
                var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "DishName", analyzer);
                var collector = TopScoreDocCollector.Create(1000, true);
                try
                {
                    var query = queryParser.Parse(textSearch);
                    searcher.Search(query, collector);
                }
                catch
                { }
                var matches = collector.TopDocs().ScoreDocs;
                foreach (var item in matches)
                {
                    var id = item.Doc;
                    var doc = searcher.Doc(id);
                    var row = table.NewRow();
                    row["CustomisationID"] = doc.GetField("CustomisationID").StringValue;
                    table.Rows.Add(row);
                }
            }
        }
        return table;
    }

【问题讨论】:

    标签: c# lucene lucene.net stemming


    【解决方案1】:

    StandardAnalyzer 不包括词干。请改用SnowballAnalyzer。比如,对于英文文本:

    Analyzer analyzer = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English")
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2013-03-06
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2023-03-09
      • 2021-05-02
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多