将 JavaScript 异步和 igraph 代码移植到 R?

Abe*_*Abe 6 javascript r igraph dplyr

我正在努力将一些 JavaScript 代码(包括异步和图形功能)移植到 R。请帮助!

这是我尝试移植的内容:

import jsonpFetch from "./jsonpFetch";
import bus from '../bus';

/**
 * This function builds a graph from google's auto-suggestions.
 */
export default function buildGraph(entryWord, pattern, MAX_DEPTH, progress) {
  entryWord = entryWord && entryWord.trim();
  if (!entryWord) return;

  entryWord = entryWord.toLocaleLowerCase();

  const insertPosition = pattern.indexOf('...');
  if (insertPosition < 0) {
    throw new Error('Query pattern is missing "..."');
  }
  const queryPosition = pattern.indexOf('[query]');
  if (queryPosition < 0) {
    throw new Error('Query pattern is missing "[query]" keyword');
  }

  if (insertPosition < queryPosition) {
    throw new Error('[query] should come before ...');
  }

  let cancelled = false;
  let pendingResponse;
  let graph = require('ngraph.graph')();
  graph.maxDepth = MAX_DEPTH;
  let queue = [];
  let requestDelay = 300 + Math.random() * 100;
  progress.startDownload();

  startQueryConstruction();

  return {
    dispose,
    graph
  }

  function dispose() {
    cancelled = true;
    if (pendingResponse) {
      pendingResponse.cancel();
      pendingResponse = null;
    }
  }

  function startQueryConstruction() {
    graph.addNode(entryWord, {depth: 0});
    fetchNext(entryWord);
  }

  function loadSiblings(parent, results) {
    let q = fullQuery(parent).toLocaleLowerCase();
    var parentNode = graph.getNode(parent);

    if (!parentNode) {
      throw new Error('Parent is missing for ' + parent);
    }

    results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
      .map(x => x.substring(q.length))
      .forEach(other => {
        const hasOtherNode = graph.hasNode(other);
        const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
        if (hasOtherNode) {
          if (!hasOtherLink) {
            graph.addLink(parent, other);
          }
          return;
        }

        let depth = parentNode.data.depth + 1;
        graph.addNode(other, {depth});
        graph.addLink(parent, other);
        if (depth < MAX_DEPTH) queue.push(other);
      });

    setTimeout(loadNext, requestDelay);
  }

  function loadNext() {
    if (cancelled) return;
    if (queue.length === 0) {
      bus.fire('graph-ready', graph);
      return;
    }

    let nextWord = queue.shift();
    fetchNext(nextWord);
    progress.updateLayout(queue.length, nextWord);
  }

  function fetchNext(query) {
    pendingResponse = getResponse(fullQuery(query));
    pendingResponse
      .then(res => onPendingReady(res, query))
      .catch((msg) => {
        const err = 'Failed to download ' + query + '; Message: ' + msg;
        console.error(err);
        progress.downloadError(err)
        loadNext();
      });
  }

  function onPendingReady(res, query) {
    if (res.length >= 2) {
      loadSiblings(query, res[1]);
    } else {
      console.error(res);
      throw new Error('Unexpected response');
    }
  }

  function fullQuery(query) {
    return pattern.replace('[query]', query).replace('...', '');
  }

  function getResponse(query) {
    return jsonpFetch('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query));
  }
}
Run Code Online (Sandbox Code Playgroud)

这就是我到目前为止在 R 中的内容:

# This function builds a graph from Google's Auto-Suggestions

buildGraph <- function(entryWord, pattern) {

  graph <- igraph::make_empty_graph() # setup empty graph

  entryWord <- trimws(entryWord) #remove leading/trailing whitespace
  entryWord <- tolower(entryWord) # lowercase technology name

  requestDelay <- 0.3 + runif(1, 0, 1) * 0.1 # 300 milliseconds (0.3 seconds) + some number between 0 and 1 * 100 milliseconds (0.1 seconds)

  startQueryConstruction()

  dispose <- function() {
    cancelled <- TRUE
    if (pendingResponse) {
      # pendingResponse.cancel();
      # pendingResponse = null;
    }
  }

  startQueryConstruction <- function() {
    graph %>% igraph::add.vertices(entryWord)
    fetchNext(entryWord)
  }

  loadSiblings <- function(parent, results) {
    q = tolower(fullQuery(parent))
    parentNode <- igraph::vertex_attr(graph, parent)

    if (!parentNode) {
      # throw new Error('Parent is missing for ' + parent);
      stderr(paste0('Parent is missing for ', parent))
    }

    # results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
  #     .map(x => x.substring(q.length))
  #     .forEach(other => {
  #       const hasOtherNode = graph.hasNode(other);
  #       const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
  #       if (hasOtherNode) {
  #         if (!hasOtherLink) {
  #           graph.addLink(parent, other);
  #         }
  #         return;
  #       }
  #       
  #       let depth = parentNode.data.depth + 1;
  #       graph.addNode(other, {depth});
  #       graph.addLink(parent, other);
  #       if (depth < MAX_DEPTH) queue.push(other);
  #       });
  #     
  #     setTimeout(loadNext, requestDelay);
  # }

  loadNext <- function() {
    # if (cancelled) return;
    if (length(queue) == 0) {
      # bus.fire('graph-ready', graph)
      # return;
    }

    nextWord <- queue.shift() # what is queue.shift in R?????
    fetchNext(nextWord)
    # progress.updateLayout(queue.length, nextWord) -- I think this is only for Vue UI
  }

  fetchNext <- function(query) {
    pendingResponse = getResponse(query)
    pendingResponse %...>%
       res = onPendingReady(res, query) %...!%
        (function(error) {
          print(paste("Failed to download: ", query, "; Message: ", error$message))
          loadNext()
        })
  }

  onPendingReady <- function(res, query) {
    if (length(res) >= 2) {
      loadSiblings(query, res[1])
    } else {
      # catch and print error
      # console.error(res)
      # throw error
      # throw new Error('Unexpected response');
    }
  }

  fullQuery <- function(query) {
    # return pattern.replace('[query]', query).replace('...', '')
  }

  getResponse <- function(query) {
    json_response <- future::future(jsonlite::fromJSON('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query)))
    return(json_response)
  }


}
Run Code Online (Sandbox Code Playgroud)

请注意,我已经包含了一些注释掉的 JavaScript 代码行,我不确定 R 的等价物是什么。对我来说,大部分晦涩的代码都集中在如何igraph在 R 中执行操作以及如何在 R 中异步执行操作(使用promises和/或futures)。

归属:https : //github.com/anvaka/vs/blob/master/src/lib/buildGraph.js

提前致谢!

Dav*_*d_O 3

我最近一直在研究 igraph 和 API,所以这相当新鲜。我认为下面的代码可以满足您的需求,但它确实遗漏了一些复杂性(例如不会使 API 超时)。它不是很快 - 我怀疑这很大程度上与使用 as_data_frame 接口来跟踪顶点有关。

所以我确信它可以被优化,并且我确信在某个时候 API 会以破坏它的编码返回一些东西,但这只是一个开始。

library(igraph)

api_fetch <- function(query){
    result <- jsonlite::fromJSON(paste0('http://suggestqueries.google.com/complete/search?client=firefox&q=', httpuv::encodeURIComponent(query)))
    return(result)
}


build_query_graph <- function(entry_word, max_depth=2){

    # Create an empty graph
    graph <- make_empty_graph()
    entry_word <- tolower(trimws(entry_word))
    graph <- add_vertices(graph, 1, name=entry_word, searched=FALSE)

    # Keep on doing this until the graph hits the maximum depth from the entry word
    while(TRUE){

        # Look up the current vertices and find their depths from the entry word
        vertices <- as_data_frame(graph, what='vertices')
        vertex_depth <- distances(graph, v=entry_word)
        vertices$depth <- vertex_depth[match(colnames(vertex_depth), vertices$name)]

        # Find vertices at least one step from the maximum depth and that haven't 
        # already been searched and sort to get the shallowest at the top
        live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
        live_vertices <- live_vertices[order(live_vertices$depth),]

        # If there are any vertices meeting these criteria, then query the API
        # otherwise bail from the while loop
        if(nrow(live_vertices)){

            # Get the vertex name and query it
            this_vertex <- live_vertices$name[1]
            res <- api_fetch(this_vertex)

            # For each of the daughter results, check it isn't already a vertex
            # and add an edge from it to this_vertex
            for(daughter in res[[2]]){

                if(! daughter %in% get.vertex.attribute(graph, 'name')){
                    graph <- add_vertices(graph, 1, name=daughter, searched=FALSE)
                }

                graph <- add_edges(graph, c(this_vertex, daughter))
            }

            # Don't search this vertex again
            graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)

        } else {
            break
        }       
    }
    return(graph)
}
Run Code Online (Sandbox Code Playgroud)

运行:

> g <- build_query_graph('amazon')
> g
IGRAPH 0ec19b6 DN-- 90 100 -- 
+ attr: name (v/c), searched (v/l)
+ edges from 0ec19b6 (vertex names):
 [1] amazon            ->amazon                        amazon            ->amazon prime                  amazon            ->amazon prime video           
 [4] amazon            ->amazon uk                     amazon            ->amazon music                  amazon            ->amazon smile                 
 [7] amazon            ->amazon india                  amazon            ->amazon jobs                   amazon            ->amazon video                 
[10] amazon            ->amazon customer service       amazon prime      ->amazon prime                  amazon prime      ->amazon prime video           
[13] amazon prime      ->amazon prime movies           amazon prime      ->amazon prime music            amazon prime      ->amazon prime now             
[16] amazon prime      ->amazon prime login            amazon prime      ->amazon prime uk               amazon prime      ->amazon prime tv              
[19] amazon prime      ->amazon prime cost             amazon prime      ->amazon prime student          amazon prime video->amazon prime video           
[22] amazon prime video->amazon prime video login      amazon prime video->amazon prime video app        amazon prime video->amazon prime video uk        
+ ... omitted several edges
> plot(g)
Run Code Online (Sandbox Code Playgroud)

命令的输出

编辑:想一想,重复地重新计算所有距离并进行大量排序和匹配。在创建单个顶点时保存它们的深度可能会更快:

build_query_graph <- function(entry_word, max_depth=2){

    # Create an empty graph
    graph <- make_empty_graph()
    entry_word <- tolower(trimws(entry_word))
    graph <- add_vertices(graph, 1, name=entry_word, depth=0, searched=FALSE)

    # Keep on doing this until the graph hits the maximum depth from the entry word
    while(TRUE){

        # Look up the current vertices and find their depths from the entry word
        vertices <- as_data_frame(graph, what='vertices')

        # Find vertices at least one step from the maximum depth and that haven't 
        # already been searched and sort to get the shallowest at the top
        live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
        live_vertices <- live_vertices[order(live_vertices$depth),]

        # If there are any vertices meeting these criteria, then query the API
        # otherwise bail from the while loop
        if(nrow(live_vertices)){

            # Get the vertex name and query it
            this_vertex <- live_vertices$name[1]
            res <- api_fetch(this_vertex)

            # For each of the daughter results, check it isn't already a vertex
            # add an edge from it to this_vertex and store the depth from the entry word
            for(daughter in res[[2]]){

                if(! daughter %in% get.vertex.attribute(graph, 'name')){
                    graph <- add_vertices(graph, 1, name=daughter, depth=NA, searched=FALSE)
                }

                graph <- add_edges(graph, c(this_vertex, daughter))
                graph <- set_vertex_attr(graph, 'depth', daughter,
                                         distances(graph, v=entry_word, to=daughter))
            }

            # Don't search this vertex again
            graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)

        } else {
            break
        }       
    }

    return(graph)
}
Run Code Online (Sandbox Code Playgroud)