如何在 Lucene 中实现 tf-idf 和余弦相似度?我正在使用 Lucene 4.2。我创建的程序不使用 tf-idf 和余弦相似性,它只使用 TopScoreDocCollector。
import com.mysql.jdbc.Statement;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriter;
import java.sql.DriverManager;
import java.sql.Connection;
import java.sql.ResultSet;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
public class IndexMysqlDBStemming {
public static void main(String[] args) throws Exception {
// 1. Create Index From Database
Class.forName("com.mysql.jdbc.Driver").newInstance();
Connection connection = …Run Code Online (Sandbox Code Playgroud)