【发布时间】:2023-03-04 14:36:01
【问题描述】:
我刚开始使用 Solr,并定义了以下架构:
<schema name="example" version="1.5">
<fields>
<field name="nodeId" type="string" indexed="true" stored="true" />
<field name="_root_" type="string" indexed="true" stored="false" />
<field name="datetime" type="string" indexed="true" stored="true"
multiValued="true" />
<field name="epochSecs" type="string" indexed="true" stored="true"
multiValued="true" />
<field name="subject" type="text_general" indexed="true"
stored="true" />
<field name="body" type="text_general" indexed="true"
stored="true" />
<field name="emailId" type="string" indexed="true"
stored="true" />
<field name="compliantFlag" type="boolean" indexed="true"
stored="true" />
<field name="_version_" type="long" indexed="true" stored="true" />
<field name="text" type="text_general" indexed="true" stored="false"
multiValued="true" />
<field name="ngrams" type="myNGram" indexed="true" stored="false" required="false" />
</fields>
<uniqueKey>nodeId</uniqueKey>
<copyField source="datetime" dest="text" />
<copyField source="epochSecs" dest="text" />
<copyField source="subject" dest="text" />
<copyField source="body" dest="text" />
<copyField source="emailId" dest="text" />
<copyField source="compliantFlag" dest="text" />
<copyField source="text" dest="ngrams"/>
<types>
<fieldType name="string" class="solr.StrField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="long" class="solr.TrieLongField"
precisionStep="0" positionIncrementGap="0" />
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="text_general" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="myNGram" stored="false" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="5"/>
</analyzer>
</fieldType>
</types>
索引时,停用词不会从“正文”字段中删除。
另外,我如何使用 solr 的分析器从以下字段中删除特殊字符,如 \n:
\n \n\n\nThese are the numbers Smurfit has. \n\nP
感谢任何帮助。谢谢。
【问题讨论】:
标签: solr lucene indexing information-retrieval