ywa*_*wat 5 python lucene pylucene python-3.x
我是 Lucene 的新手。我想用Python 3编写PyLucene 6.5的示例代码。我更改了该版本的示例代码。但是,我找不到几个文档,并且不确定更改是否正确。
# indexer.py
import sys
import lucene
from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, StringField, FieldType
from org.apache.lucene.index import IndexWriter, IndexWriterConfig
from org.apache.lucene.store import SimpleFSDirectory, FSDirectory
from org.apache.lucene.util import Version
if __name__ == "__main__":
lucene.initVM()
indexPath = File("index/").toPath()
indexDir = FSDirectory.open(indexPath)
writerConfig = IndexWriterConfig(StandardAnalyzer())
writer = IndexWriter(indexDir, writerConfig)
print("%d docs in index" % writer.numDocs())
print("Reading lines from sys.stdin...")
tft = FieldType()
tft.setStored(True)
tft.setTokenized(True)
for n, l in enumerate(sys.stdin):
doc = Document()
doc.add(Field("text", l, tft))
writer.addDocument(doc)
print("Indexed %d lines from stdin (%d docs in index)" % (n, writer.numDocs()))
print("Closing index of %d docs..." % writer.numDocs())
writer.close()
Run Code Online (Sandbox Code Playgroud)
此代码读取输入并存储在index目录中。
# retriever.py
import sys
import lucene
from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.index import IndexReader, DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory, FSDirectory
from org.apache.lucene.util import Version
if __name__ == "__main__":
lucene.initVM()
analyzer = StandardAnalyzer()
indexPath = File("index/").toPath()
indexDir = FSDirectory.open(indexPath)
reader = DirectoryReader.open(indexDir)
searcher = IndexSearcher(reader)
query = QueryParser("text", analyzer).parse("hello")
MAX = 1000
hits = searcher.search(query, MAX)
print("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
for hit in hits.scoreDocs:
print(hit.score, hit.doc, hit.toString())
doc = searcher.doc(hit.doc)
print(doc.get("text").encode("utf-8"))
Run Code Online (Sandbox Code Playgroud)
我们应该能够检索(搜索),retriever.py但它不会返回任何内容。它出什么问题了?
In []: tft.indexOptions()
Out[]: <IndexOptions: NONE>
Run Code Online (Sandbox Code Playgroud)
尽管据记录DOCS_AND_FREQS_AND_POSITIONS是默认值,但情况已不再如此。TextField这是;的默认值。必须FieldType明确setIndexOptions。
| 归档时间: |
|
| 查看次数: |
5746 次 |
| 最近记录: |