Abe*_*Abe 6 javascript r igraph dplyr
我正在努力将一些 JavaScript 代码(包括异步和图形功能)移植到 R。请帮助!
这是我尝试移植的内容:
import jsonpFetch from "./jsonpFetch";
import bus from '../bus';
/**
* This function builds a graph from google's auto-suggestions.
*/
export default function buildGraph(entryWord, pattern, MAX_DEPTH, progress) {
entryWord = entryWord && entryWord.trim();
if (!entryWord) return;
entryWord = entryWord.toLocaleLowerCase();
const insertPosition = pattern.indexOf('...');
if (insertPosition < 0) {
throw new Error('Query pattern is missing "..."');
}
const queryPosition = pattern.indexOf('[query]');
if (queryPosition < 0) {
throw new Error('Query pattern is missing "[query]" keyword');
}
if (insertPosition < queryPosition) {
throw new Error('[query] should come before ...');
}
let cancelled = false;
let pendingResponse;
let graph = require('ngraph.graph')();
graph.maxDepth = MAX_DEPTH;
let queue = [];
let requestDelay = 300 + Math.random() * 100;
progress.startDownload();
startQueryConstruction();
return {
dispose,
graph
}
function dispose() {
cancelled = true;
if (pendingResponse) {
pendingResponse.cancel();
pendingResponse = null;
}
}
function startQueryConstruction() {
graph.addNode(entryWord, {depth: 0});
fetchNext(entryWord);
}
function loadSiblings(parent, results) {
let q = fullQuery(parent).toLocaleLowerCase();
var parentNode = graph.getNode(parent);
if (!parentNode) {
throw new Error('Parent is missing for ' + parent);
}
results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
.map(x => x.substring(q.length))
.forEach(other => {
const hasOtherNode = graph.hasNode(other);
const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
if (hasOtherNode) {
if (!hasOtherLink) {
graph.addLink(parent, other);
}
return;
}
let depth = parentNode.data.depth + 1;
graph.addNode(other, {depth});
graph.addLink(parent, other);
if (depth < MAX_DEPTH) queue.push(other);
});
setTimeout(loadNext, requestDelay);
}
function loadNext() {
if (cancelled) return;
if (queue.length === 0) {
bus.fire('graph-ready', graph);
return;
}
let nextWord = queue.shift();
fetchNext(nextWord);
progress.updateLayout(queue.length, nextWord);
}
function fetchNext(query) {
pendingResponse = getResponse(fullQuery(query));
pendingResponse
.then(res => onPendingReady(res, query))
.catch((msg) => {
const err = 'Failed to download ' + query + '; Message: ' + msg;
console.error(err);
progress.downloadError(err)
loadNext();
});
}
function onPendingReady(res, query) {
if (res.length >= 2) {
loadSiblings(query, res[1]);
} else {
console.error(res);
throw new Error('Unexpected response');
}
}
function fullQuery(query) {
return pattern.replace('[query]', query).replace('...', '');
}
function getResponse(query) {
return jsonpFetch('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query));
}
}
Run Code Online (Sandbox Code Playgroud)
这就是我到目前为止在 R 中的内容:
# This function builds a graph from Google's Auto-Suggestions
buildGraph <- function(entryWord, pattern) {
graph <- igraph::make_empty_graph() # setup empty graph
entryWord <- trimws(entryWord) #remove leading/trailing whitespace
entryWord <- tolower(entryWord) # lowercase technology name
requestDelay <- 0.3 + runif(1, 0, 1) * 0.1 # 300 milliseconds (0.3 seconds) + some number between 0 and 1 * 100 milliseconds (0.1 seconds)
startQueryConstruction()
dispose <- function() {
cancelled <- TRUE
if (pendingResponse) {
# pendingResponse.cancel();
# pendingResponse = null;
}
}
startQueryConstruction <- function() {
graph %>% igraph::add.vertices(entryWord)
fetchNext(entryWord)
}
loadSiblings <- function(parent, results) {
q = tolower(fullQuery(parent))
parentNode <- igraph::vertex_attr(graph, parent)
if (!parentNode) {
# throw new Error('Parent is missing for ' + parent);
stderr(paste0('Parent is missing for ', parent))
}
# results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
# .map(x => x.substring(q.length))
# .forEach(other => {
# const hasOtherNode = graph.hasNode(other);
# const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
# if (hasOtherNode) {
# if (!hasOtherLink) {
# graph.addLink(parent, other);
# }
# return;
# }
#
# let depth = parentNode.data.depth + 1;
# graph.addNode(other, {depth});
# graph.addLink(parent, other);
# if (depth < MAX_DEPTH) queue.push(other);
# });
#
# setTimeout(loadNext, requestDelay);
# }
loadNext <- function() {
# if (cancelled) return;
if (length(queue) == 0) {
# bus.fire('graph-ready', graph)
# return;
}
nextWord <- queue.shift() # what is queue.shift in R?????
fetchNext(nextWord)
# progress.updateLayout(queue.length, nextWord) -- I think this is only for Vue UI
}
fetchNext <- function(query) {
pendingResponse = getResponse(query)
pendingResponse %...>%
res = onPendingReady(res, query) %...!%
(function(error) {
print(paste("Failed to download: ", query, "; Message: ", error$message))
loadNext()
})
}
onPendingReady <- function(res, query) {
if (length(res) >= 2) {
loadSiblings(query, res[1])
} else {
# catch and print error
# console.error(res)
# throw error
# throw new Error('Unexpected response');
}
}
fullQuery <- function(query) {
# return pattern.replace('[query]', query).replace('...', '')
}
getResponse <- function(query) {
json_response <- future::future(jsonlite::fromJSON('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query)))
return(json_response)
}
}
Run Code Online (Sandbox Code Playgroud)
请注意,我已经包含了一些注释掉的 JavaScript 代码行,我不确定 R 的等价物是什么。对我来说,大部分晦涩的代码都集中在如何igraph在 R 中执行操作以及如何在 R 中异步执行操作(使用promises和/或futures)。
归属:https : //github.com/anvaka/vs/blob/master/src/lib/buildGraph.js
提前致谢!
我最近一直在研究 igraph 和 API,所以这相当新鲜。我认为下面的代码可以满足您的需求,但它确实遗漏了一些复杂性(例如不会使 API 超时)。它不是很快 - 我怀疑这很大程度上与使用 as_data_frame 接口来跟踪顶点有关。
所以我确信它可以被优化,并且我确信在某个时候 API 会以破坏它的编码返回一些东西,但这只是一个开始。
library(igraph)
api_fetch <- function(query){
result <- jsonlite::fromJSON(paste0('http://suggestqueries.google.com/complete/search?client=firefox&q=', httpuv::encodeURIComponent(query)))
return(result)
}
build_query_graph <- function(entry_word, max_depth=2){
# Create an empty graph
graph <- make_empty_graph()
entry_word <- tolower(trimws(entry_word))
graph <- add_vertices(graph, 1, name=entry_word, searched=FALSE)
# Keep on doing this until the graph hits the maximum depth from the entry word
while(TRUE){
# Look up the current vertices and find their depths from the entry word
vertices <- as_data_frame(graph, what='vertices')
vertex_depth <- distances(graph, v=entry_word)
vertices$depth <- vertex_depth[match(colnames(vertex_depth), vertices$name)]
# Find vertices at least one step from the maximum depth and that haven't
# already been searched and sort to get the shallowest at the top
live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
live_vertices <- live_vertices[order(live_vertices$depth),]
# If there are any vertices meeting these criteria, then query the API
# otherwise bail from the while loop
if(nrow(live_vertices)){
# Get the vertex name and query it
this_vertex <- live_vertices$name[1]
res <- api_fetch(this_vertex)
# For each of the daughter results, check it isn't already a vertex
# and add an edge from it to this_vertex
for(daughter in res[[2]]){
if(! daughter %in% get.vertex.attribute(graph, 'name')){
graph <- add_vertices(graph, 1, name=daughter, searched=FALSE)
}
graph <- add_edges(graph, c(this_vertex, daughter))
}
# Don't search this vertex again
graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)
} else {
break
}
}
return(graph)
}
Run Code Online (Sandbox Code Playgroud)
运行:
> g <- build_query_graph('amazon')
> g
IGRAPH 0ec19b6 DN-- 90 100 --
+ attr: name (v/c), searched (v/l)
+ edges from 0ec19b6 (vertex names):
[1] amazon ->amazon amazon ->amazon prime amazon ->amazon prime video
[4] amazon ->amazon uk amazon ->amazon music amazon ->amazon smile
[7] amazon ->amazon india amazon ->amazon jobs amazon ->amazon video
[10] amazon ->amazon customer service amazon prime ->amazon prime amazon prime ->amazon prime video
[13] amazon prime ->amazon prime movies amazon prime ->amazon prime music amazon prime ->amazon prime now
[16] amazon prime ->amazon prime login amazon prime ->amazon prime uk amazon prime ->amazon prime tv
[19] amazon prime ->amazon prime cost amazon prime ->amazon prime student amazon prime video->amazon prime video
[22] amazon prime video->amazon prime video login amazon prime video->amazon prime video app amazon prime video->amazon prime video uk
+ ... omitted several edges
> plot(g)
Run Code Online (Sandbox Code Playgroud)
编辑:想一想,重复地重新计算所有距离并进行大量排序和匹配。在创建单个顶点时保存它们的深度可能会更快:
build_query_graph <- function(entry_word, max_depth=2){
# Create an empty graph
graph <- make_empty_graph()
entry_word <- tolower(trimws(entry_word))
graph <- add_vertices(graph, 1, name=entry_word, depth=0, searched=FALSE)
# Keep on doing this until the graph hits the maximum depth from the entry word
while(TRUE){
# Look up the current vertices and find their depths from the entry word
vertices <- as_data_frame(graph, what='vertices')
# Find vertices at least one step from the maximum depth and that haven't
# already been searched and sort to get the shallowest at the top
live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
live_vertices <- live_vertices[order(live_vertices$depth),]
# If there are any vertices meeting these criteria, then query the API
# otherwise bail from the while loop
if(nrow(live_vertices)){
# Get the vertex name and query it
this_vertex <- live_vertices$name[1]
res <- api_fetch(this_vertex)
# For each of the daughter results, check it isn't already a vertex
# add an edge from it to this_vertex and store the depth from the entry word
for(daughter in res[[2]]){
if(! daughter %in% get.vertex.attribute(graph, 'name')){
graph <- add_vertices(graph, 1, name=daughter, depth=NA, searched=FALSE)
}
graph <- add_edges(graph, c(this_vertex, daughter))
graph <- set_vertex_attr(graph, 'depth', daughter,
distances(graph, v=entry_word, to=daughter))
}
# Don't search this vertex again
graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)
} else {
break
}
}
return(graph)
}
Run Code Online (Sandbox Code Playgroud)