我在 Lucene 中存储关系数据时遇到了一些问题,但您遇到的问题应该很容易解决。
我猜你对组字段进行了标记,这使得在字段值中搜索子字符串成为可能。只需添加未标记的字段,它应该可以按预期工作。
请检查以下一小段代码:
internal class Program {
private static void Main(string[] args) {
var directory = new RAMDirectory();
var writer = new IndexWriter(directory, new StandardAnalyzer());
AddDocument(writer, "group", "stuff", Field.Index.UN_TOKENIZED);
AddDocument(writer, "group", "other stuff", Field.Index.UN_TOKENIZED);
writer.Close(true);
var searcher = new IndexSearcher(directory);
Hits hits = searcher.Search(new TermQuery(new Term("group", "stuff")));
for (int i = 0; i < hits.Length(); i++) {
Console.WriteLine(hits.Doc(i).GetField("group").StringValue());
}
}
private static void AddDocument(IndexWriter writer, string name, string value, Field.Index index) {
var document = new Document();
document.Add(new Field(name, value, Field.Store.YES, index));
writer.AddDocument(document);
}
}
该示例将两个未标记化的文档添加到索引中,搜索内容并获得一次命中。如果您更改代码以将它们添加标记化,那么您将有两个命中,如您现在所见。
将 Lucene 用于关系数据的问题在于,通配符和范围搜索可能总是有效的。如果由于 Lucene 解析这些查询的方式而导致索引很大,则情况并非如此。
另一个示例来说明行为:
private static void Main(string[] args) {
var directory = new RAMDirectory();
var writer = new IndexWriter(directory, new StandardAnalyzer());
var documentA = new Document();
documentA.Add(new Field("name", "A", Field.Store.YES, Field.Index.UN_TOKENIZED));
documentA.Add(new Field("group", "stuff", Field.Store.YES, Field.Index.UN_TOKENIZED));
documentA.Add(new Field("group", "other stuff", Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.AddDocument(documentA);
var documentB = new Document();
documentB.Add(new Field("name", "B", Field.Store.YES, Field.Index.UN_TOKENIZED));
documentB.Add(new Field("group", "stuff", Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.AddDocument(documentB);
var documentC = new Document();
documentC.Add(new Field("name", "C", Field.Store.YES, Field.Index.UN_TOKENIZED));
documentC.Add(new Field("group", "other stuff", Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.AddDocument(documentC);
writer.Close(true);
var query1 = new TermQuery(new Term("group", "stuff"));
SearchAndDisplay("First sample", directory, query1);
var query2 = new TermQuery(new Term("group", "other stuff"));
SearchAndDisplay("Second sample", directory, query2);
var query3 = new BooleanQuery();
query3.Add(new TermQuery(new Term("group", "stuff")), BooleanClause.Occur.MUST);
query3.Add(new TermQuery(new Term("group", "other stuff")), BooleanClause.Occur.MUST);
SearchAndDisplay("Third sample", directory, query3);
}
private static void SearchAndDisplay(string title, Directory directory, Query query3) {
var searcher = new IndexSearcher(directory);
Hits hits = searcher.Search(query3);
Console.WriteLine(title);
for (int i = 0; i < hits.Length(); i++) {
Console.WriteLine(hits.Doc(i).GetField("name").StringValue());
}
}