【问题标题】:ElasticSearch NEST manually map subfields needed for analyzerElasticSearch NEST 手动映射分析器所需的子字段
【发布时间】:2017-04-20 05:06:45
【问题描述】:

我已经能够根据下面的原始请求获得正确的 ElasticSearch 命令来插入和搜索我的记录:

Original ElasticSearch trying to be converted to NEST

PUT /sample
{
  "settings": {
    "index.number_of_shards": 5,
    "index.number_of_replicas": 0,
    "analysis": {
      "filter": {
        "nGram_filter": {
          "type": "nGram",
          "min_gram": 2,
          "max_gram": 20,
          "token_chars": [
            "letter",
            "digit"
          ]
        },
        "edgenGram_filter": {
          "type": "edgeNGram",
          "min_gram": 2,
          "max_gram": 20
        }
      },
      "analyzer": {
        "ngram_index_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "nGram_filter"
          ]
        },
        "edge_ngram_index_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "edgenGram_filter"
          ]
        }
      }
    }
  },
  "mappings": {
    "test": {
      "properties": {
        "name": {
          "type": "string",
          "fields": {
            "prefixes": {
              "type": "string",
              "analyzer": "edge_ngram_index_analyzer",
              "search_analyzer": "standard"
            },
            "substrings": {
              "type": "string",
              "analyzer": "ngram_index_analyzer",
              "search_analyzer": "standard"
            }
          }
        }
      }
    }
  }
}

但是,我现在在尝试使用 NEST 命令转换此样本时遇到问题。这是我到目前为止所做的,它编译并创建索引,但是 prefixessubstrings 的子字段不存在。

Client.CreateIndex("sample", i => i
                    .Settings(s => s
                        .NumberOfShards(10) 
                        .NumberOfReplicas(0)
                        .Analysis(a => a
                            .TokenFilters(tf => tf
                                .NGram("nGram_filter", td => td
                                    .MinGram(2)
                                    .MaxGram(20)
                                 )
                                .EdgeNGram("edgeNGram", td => td
                                    .MinGram(2)
                                    .MaxGram(20)
                                 )
                            )
                            .Analyzers(anz => anz
                                .Custom("ngram_index_analyzer", cc => cc
                                    .Tokenizer("keyword")
                                    .Filters("lowercase", "nGram_filter")
                                )
                                .Custom("edge_ngram_index_analyzer", cc => cc
                                    .Tokenizer("keyword")
                                    .Filters("lowercase", "edgenGram_filter")
                                )
                            )
                        )
                    )
                    .Mappings(m => m
                        .Map<test>(map => map
                            .Properties(ps => ps
                                 .Text(t => t
                                     .Name(n => n.name)                 
                                     .Fields(f => f
                                         .Text(tt => tt
                                            .Name("prefixes")
                                            .Analyzer("edge_ngram_index_analyzer")
                                            .SearchAnalyzer("standard")
                                         )
                                         .Text(tt => tt
                                            .Name("substrings")
                                            .Analyzer("ngram_index_analyzer")
                                            .SearchAnalyzer("standard")
                                         )
                                     )
                                 )
                            )
                        )
                    )
                );

【问题讨论】:

    标签: elasticsearch tokenize nest analysis analyzer


    【解决方案1】:

    EdgeNGram 标记过滤器名称中似乎有错字。一个条目是edgeNGram,而另一个条目是edgenGram_filter。因此,当我使用嵌套运行以下映射时

    client.CreateIndex("sample", i => i
            .Settings(s => s
                .NumberOfShards(10)
                .NumberOfReplicas(0)
                .Analysis(a => a
                    .Analyzers(anz => anz
                        .Custom("ngram_index_analyzer", cc => cc
                            .Tokenizer("keyword")
                            .Filters("lowercase", "nGram_filter")
                        )
                        .Custom("edge_ngram_index_analyzer", cc => cc
                            .Tokenizer("keyword")
                            .Filters("lowercase", "edgeNGram_filter")
                        )
                    )
                    .TokenFilters(tf => tf
                        .NGram("nGram_filter", td => td
                            .MinGram(2)
                            .MaxGram(20)
                            )
                        .EdgeNGram("edgeNGram_filter", td => td
                            .MinGram(2)
                            .MaxGram(20)
                            )
                    )
                )
            )
            .Mappings(m => m
                .Map<test>(map => map
                    .Properties(ps => ps
                            .Text(t => t
                                .Name(n => n.name)
                                .Fields(f => f
                                    .Text(tt => tt
                                    .Name("prefixes")
                                    .Analyzer("edge_ngram_index_analyzer")
                                    .SearchAnalyzer("standard")
                                    )
                                    .Text(tt => tt
                                    .Name("substrings")
                                    .Analyzer("ngram_index_analyzer")
                                    .SearchAnalyzer("standard")
                                    )
                                )
                            )
                    )
                )
            )
        );
    

    它将其转换为包含两个子字段的正确 JSON。希望对您有所帮助。

    【讨论】:

    • 效果很好!我没有意识到我的 EdgeNGram 过滤器名称错误。现在它按预期工作。谢谢
    猜你喜欢
    • 2015-12-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2020-01-29
    • 1970-01-01
    • 2016-06-16
    • 2016-10-14
    • 2016-10-30
    相关资源
    最近更新 更多