【问题标题】:Lucene field not included in otherwise working searchLucene 字段未包含在其他工作搜索中
【发布时间】:2016-05-19 13:44:59
【问题描述】:

在具有多个字段的 C# 布尔查询 Lucene 搜索中,其中三个字段不包含在搜索中(Sku、VariantSkus 和 Mpc),而其他字段工作正常。

使用 Luke,我可以看到值存储在索引中。在 Luke 中搜索时,我使用搜索器中包含的查询得到正确的结果 (取自 Visual Studio 中的调试器)。 例子: 使用以下查询:(在 Visual Studio 中调试时直接取自查询值)

(+Mpc:B118^5) (+Sku:B118^5) (+Brand:B118) (+VariantSkus:B118^4) (+DisplayName:B118^3) (+DisplayName:B118*) (+DisplayName:B118~0.5) (+MisspelledNames:B118) (+Description:B118^0.4)

在运行代码时不起作用(搜索器上的 totalHits 为 0),但给出了将 Mpc 与 Luke 中的正确产品匹配的预期结果。

我真的很困惑为什么相同的查询在 C# 代码中不起作用。 任何帮助或建议将不胜感激。

创建索引:

        public static String CreateLuceneIndex(string basePath, HttpContext context)
    {
        var stopwatch = new Stopwatch();

        /* get the absolute path to the directory where the indexes will be created (and if it doesn't exist, create it) */
        var dirPath = context.Server.MapPath(basePath);
        if (!Directory.Exists(dirPath)) Directory.CreateDirectory(dirPath);
        var di = new DirectoryInfo(dirPath);
        var directory = FSDirectory.Open(di);

        stopwatch.Start();

        /* Select the standard Lucene analyser */
        var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
        var count = 0;
        var catalog = ProductCatalog.All().First();

        /* Open the index writer using the selected analyser */
        using (var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
        using(var mediaRepository = new ProductMediaRepository())
        {
            var urlService = ObjectFactory.Instance.Resolve<IUrlService>();

            // Get all the visible products from uCommerce we wish to index
            foreach (var product in Product.Find(p => p.DisplayOnSite && p.ParentProduct == null))
            {
                var url = urlService.GetUrl(catalog, product);

                var doc = new Document();
                doc.Add(new Field("id", product.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
                doc.Add(new Field("Url", url ?? String.Empty, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
                doc.Add(new Field("Src", ImageService.GetProductMainImage(mediaRepository, product).Src ?? String.Empty
                    , Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
                doc.Add(new Field("Sku", product.Sku ?? String.Empty, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

                var varianSkus = String.Join(" ", product.Variants.Select(variant => variant.VariantSku));
                doc.Add(new Field("VariantSkus", varianSkus, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

                doc.Add(new Field("DisplayName", product.DisplayName() ?? product.Name ?? String.Empty, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
                var brands = String.Join(" ", product.Variants.Select(variant => variant.GetPropertyValue<String>("Brand")).Where(w => !String.IsNullOrWhiteSpace(w)));
                doc.Add(new Field("Brand", brands ?? String.Empty, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));

                doc.Add(new Field("MisspelledNames", product.GetPropertyValue<String>("MisspelledNames") ?? String.Empty,
                    Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));

                doc.Add(new Field("Description", product.ShortDescription()?.StripHtml() ?? String.Empty, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

                doc.Add(new Field("Mpc", product.GetPropertyValue<String>("MPC") ?? String.Empty, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.YES));

                writer.AddDocument(doc);
                count++;
            }

            writer.Optimize();
            writer.Close();
        }

        stopwatch.Stop();
        return $"Indexed {count} products in {stopwatch.Elapsed}.\n\n";

搜索:

       public static ListItemsDtoModel ProductSearch(String searchTerm, String indexDirPath, Int32 maxResults = Int32.MaxValue)
    {
        searchTerm = searchTerm.Trim().ToLowerInvariant();
        var searchWords = ParseSearchWords(searchTerm);

        indexDirPath = HttpContext.Current.Server.MapPath(indexDirPath);
        var di = new DirectoryInfo(indexDirPath);

        using (var directory = FSDirectory.Open(di))
        using (var searcher = new IndexSearcher(IndexReader.Open(directory, true)))
        {
                var query = new BooleanQuery();

                query.Add(new BooleanClause(AddTermClauseGroup("Mpc", searchWords, 5), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddTermClauseGroup("Sku", searchWords, 5), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddTermClauseGroup("Brand", searchWords), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddTermClauseGroup("VariantSkus", searchWords, 4), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddTermClauseGroup("DisplayName", searchWords, 3), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddWildcardClauseGroup("DisplayName", searchWords), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddFuzzyTermClauseGroup("DisplayName", searchWords), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddTermClauseGroup("MisspelledNames", searchWords), BooleanClause.Occur.SHOULD));
                query.Add(new BooleanClause(AddTermClauseGroup("Description", searchWords, 0.4f), BooleanClause.Occur.SHOULD));

                var searchResults = searcher.Search(query, maxResults);

                return AsListItemsDtoModel(searchResults.ScoreDocs.Select(sd =>
                {
                    var document = searcher.Doc(sd.doc);
                    return new ImageLinkDtoModel
                    {
                        Url = document.Get("Url"),
                        Text = document.Get("DisplayName"),
                        Alt = document.Get("DisplayName"),
                        Src = document.Get("Src"),
                    };
                }).ToList());
        }
    }
   private static String[] ParseSearchWords(string searchTerm)
    {
        return searchTerm.Split(' ', '-')
            .Where(w => !String.IsNullOrWhiteSpace(w))
            .Select(QueryParser.Escape)
            .ToArray();
    }


    private static BooleanQuery AddTermClauseGroup(String field, IEnumerable<String> searchTerms, float boost = 1f)
    {
        var boostStr = Math.Abs(boost-1f) > 0.001 ? "^" + boost.ToString(CultureInfo.InvariantCulture) : String.Empty;

        return AddClauseGroup(searchTerms, word => new TermQuery(new Term(field, word  + boostStr)));
    }

    private static BooleanQuery AddFuzzyTermClauseGroup(String field, IEnumerable<String> searchTerms)
    {
        return AddClauseGroup(searchTerms, word => new FuzzyQuery(new Term(field, word), 0.5f));
    }

    private static BooleanQuery AddWildcardClauseGroup(String field, IEnumerable<String> searchTerms)
    {
        return AddClauseGroup(searchTerms, word => new WildcardQuery(new Term(field, word + "*")));
    }

    private static BooleanQuery AddClauseGroup(IEnumerable<String> searchTerms, Func<String, Query> createSubClause)
    {
        var query = new BooleanQuery();

        foreach (var searchTerm in searchTerms)
        {
            query.Add(new BooleanClause(createSubClause(searchTerm), BooleanClause.Occur.MUST));
        }
        return query;
    }

【问题讨论】:

    标签: c# lucene


    【解决方案1】:

    问题在于您应用提升的方式:

    return AddClauseGroup(searchTerms, word => new TermQuery(new Term(field, word  + boostStr)));
    

    您不能以这种方式将提升融入术语本身。这里没有 QueryParser,所以像“term^4”这样的 QueryParser 语法是行不通的。它只会搜索字符串“term^4”,默认提升为 1.0。带有提升的 TermQuery 如下所示:

    Query query = new TermQuery(new Term(field, word));
    query.Boost = boost;
    

    【讨论】:

    • 我将 TermQuery 代码更改为: private static BooleanQuery AddTermClauseGroup(String field, IEnumerable searchTerms, float boostValue = 1f) { return AddClauseGroup(searchTerms, word => { var termQuery = new TermQuery( new Term(field, word)); if(Math.Abs​​(boostValue - 1f) > 0.001) termQuery.SetBoost(boostValue); return termQuery; }); } ,它解决了这个问题。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多