我在CSV文件中有一个很大的网络.它包含450k节点和45,000,000个关系.正如我在neo4j文档中读到的那样,这种类型的数据库可以处理如此庞大的网络.
我还读到我可以使用嵌入式服务器以及独立服务器.
我的问题是它们之间有什么区别?我想拥有一个保存其数据库状态的服务器.
第二个问题是我可以使用REST API对数据库执行操作,这是一种Java API.
性能有何不同?我希望例如将所有节点级别作为输出.
是否可以从CSV加载图表?
什么是我的问题的最佳解决方案?
下面是您将使用Neo4j-Batch-Inserter导入呼叫记录的代码,而不是动态生成数据,您当然会从文件中读取数据并相应地拆分每条记录.
import org.apache.commons.io.FileUtils;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.index.BatchInserterIndex;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.index.impl.lucene.LuceneBatchInserterIndexProvider;
import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import static org.neo4j.helpers.collection.MapUtil.map;
public class CallRecordImportBatch {
public static final int MILLION = 1000000;
public static final int BATCH_SIZE = MILLION;
public static final int CALLS = 45 * MILLION;
public static final int USERS = CALLS / 100;
public static final File STORE_DIR = new File("target/calls_"+ CALLS);
private static final Random rnd = new Random();
enum MyRelationshipTypes implements RelationshipType {CALLED}
private static String randomPhoneNumber() {
final int phoneNumber = rnd.nextInt(USERS);
return String.format("%013d", phoneNumber);
}
public static void main(String[] args) throws IOException {
long time = System.currentTimeMillis();
CallRecordImportBatch importBatch = new CallRecordImportBatch();
importBatch.createGraphDatabase();
System.out.println((System.currentTimeMillis() - time) + " ms: "+ "Create Database");
}
private BatchInserterImpl db;
private BatchInserterIndex phoneNumberIndex;
private void createGraphDatabase() throws IOException {
if (STORE_DIR.exists()) FileUtils.cleanDirectory(STORE_DIR);
STORE_DIR.mkdirs();
db = new BatchInserterImpl(STORE_DIR.getAbsolutePath(),
MapUtil.stringMap("cache_type", "weak",
"neostore.nodestore.db.mapped_memory", "500M",
"neostore.relationshipstore.db.mapped_memory", "2000M",
"neostore.propertystore.db.mapped_memory", "1000M",
"neostore.propertystore.db.strings.mapped_memory", "0M",
"neostore.propertystore.db.arrays.mapped_memory", "0M"
));
final LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
phoneNumberIndex = indexProvider.nodeIndex("Caller", MapUtil.stringMap("type", "exact"));
phoneNumberIndex.setCacheCapacity("Caller", 1000000);
long time = System.currentTimeMillis();
Map<String,Long> cache = new HashMap<String,Long>(USERS);
try {
for (int call=0;call< CALLS;call++) {
if (call % BATCH_SIZE == 0) {
System.out.println((System.currentTimeMillis() - time) + " ms: "+ String.format("calls %d callers %d", call, cache.size()));
time = System.currentTimeMillis();
}
final String callerNumber = randomPhoneNumber();
final int duration = (int) (System.currentTimeMillis() % 3600);
final String calleeNumber = randomPhoneNumber();
long caller = getOrCreateCaller(cache, callerNumber);
long callee = getOrCreateCaller(cache, calleeNumber);
db.createRelationship(caller, callee, MyRelationshipTypes.CALLED, map("duration", duration));
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println((System.currentTimeMillis() - time) + " ms: " + String.format("calls %d callers %d", CALLS, cache.size()));
indexProvider.shutdown();
db.shutdown();
}
private Long getOrCreateCaller(Map<String, Long> cache, String number) {
final Long callerId = cache.get(number);
if (callerId!=null) return callerId;
long caller = createCaller(number);
cache.put(number, caller);
return caller;
}
private long createCaller(String number) {
long caller = db.createNode(map("Number", number));
phoneNumberIndex.add(caller, map("Number", number));
phoneNumberIndex.flush();
return caller;
}
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
3418 次 |
| 最近记录: |