Cla*_*eri 7 r list unnest tidyr
我通过读取存储视频游戏日志信息的 JSON 获得了一个嵌套列表。列表的时间元素是一个简单的向量,而 inputManagerStates 和syncedProperties 是可能包含 0 个或多个元素的列表。
这是这个问题的后续内容,在一些帮助下,我设法将数据转换为矩形格式。不幸的是,我有很多这样的 JSON 文件,并且unnest_wider运行速度似乎相当慢。
列表:
test_list <-
list(list(time = 9.92405605316162, inputManagerStates = list(),
syncedProperties = list()), list(time = 9.9399995803833,
inputManagerStates = list(list(inputId = "InputY", buttonState = FALSE,
axisValue = 0), list(inputId = "InputX", buttonState = FALSE,
axisValue = 0.0501395985484123), list(inputId = "xPos",
buttonState = FALSE, axisValue = 5), list(inputId = "yPos",
buttonState = FALSE, axisValue = 0.0799999982118607),
list(inputId = "zPos", buttonState = FALSE, axisValue = 0),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.70664256811142),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.707570731639862)),
syncedProperties = list(list(name = "timeStamp", value = "97,2"))),
list(time = 9.95659446716309, inputManagerStates = list(list(
inputId = "InputY", buttonState = FALSE, axisValue = 0),
list(inputId = "InputX", buttonState = FALSE, axisValue = 0.0993990004062653),
list(inputId = "xPos", buttonState = FALSE, axisValue = 5),
list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0799999982118607),
list(inputId = "zPos", buttonState = FALSE, axisValue = 0),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.705721318721771),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.708489596843719)),
syncedProperties = list(list(name = "timeStamp", value = "97,21667"))),
list(time = 20.0626411437988, inputManagerStates = list(list(
inputId = "InputY", buttonState = FALSE, axisValue = 0.601816594600677),
list(inputId = "InputX", buttonState = FALSE, axisValue = 0),
list(inputId = "xPos", buttonState = FALSE, axisValue = -1.31777036190033),
list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0800001174211502),
list(inputId = "zPos", buttonState = FALSE, axisValue = 6.08214092254639),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.391442984342575),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.920202374458313)),
syncedProperties = list(list(name = "timeStamp", value = "107,3167"),
list(name = "previousGameState", value = "1"), list(
name = "newGameState", value = "2"))))
Run Code Online (Sandbox Code Playgroud)
我用来矩形化列表的代码:
test_list <-
list(list(time = 9.92405605316162, inputManagerStates = list(),
syncedProperties = list()), list(time = 9.9399995803833,
inputManagerStates = list(list(inputId = "InputY", buttonState = FALSE,
axisValue = 0), list(inputId = "InputX", buttonState = FALSE,
axisValue = 0.0501395985484123), list(inputId = "xPos",
buttonState = FALSE, axisValue = 5), list(inputId = "yPos",
buttonState = FALSE, axisValue = 0.0799999982118607),
list(inputId = "zPos", buttonState = FALSE, axisValue = 0),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.70664256811142),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.707570731639862)),
syncedProperties = list(list(name = "timeStamp", value = "97,2"))),
list(time = 9.95659446716309, inputManagerStates = list(list(
inputId = "InputY", buttonState = FALSE, axisValue = 0),
list(inputId = "InputX", buttonState = FALSE, axisValue = 0.0993990004062653),
list(inputId = "xPos", buttonState = FALSE, axisValue = 5),
list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0799999982118607),
list(inputId = "zPos", buttonState = FALSE, axisValue = 0),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.705721318721771),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.708489596843719)),
syncedProperties = list(list(name = "timeStamp", value = "97,21667"))),
list(time = 20.0626411437988, inputManagerStates = list(list(
inputId = "InputY", buttonState = FALSE, axisValue = 0.601816594600677),
list(inputId = "InputX", buttonState = FALSE, axisValue = 0),
list(inputId = "xPos", buttonState = FALSE, axisValue = -1.31777036190033),
list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0800001174211502),
list(inputId = "zPos", buttonState = FALSE, axisValue = 6.08214092254639),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.391442984342575),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.920202374458313)),
syncedProperties = list(list(name = "timeStamp", value = "107,3167"),
list(name = "previousGameState", value = "1"), list(
name = "newGameState", value = "2"))))
Run Code Online (Sandbox Code Playgroud)
创建于 2022-08-24,使用reprex v2.0.2
因为我的数据unnest相当快,但unnest_wider也很慢。第一个unnest_wider(value)可以很容易地用基础 R 编写 - cbind(., do.call("rbind", .$value))- 并且速度要快得多:
library(tidyverse)
output_df <-
test_list %>%
tibble::enframe(name = "frame", value = "value") %>%
tidyr::unnest_wider(value) %>%
tidyr::unnest(inputManagerStates, keep_empty = TRUE) %>%
tidyr::unnest(syncedProperties, keep_empty = TRUE) %>%
tidyr::unnest_wider(syncedProperties) %>%
tidyr::unnest_wider(inputManagerStates)
output_df
#> # A tibble: 46 x 7
#> frame time inputId buttonState axisValue name value
#> <int> <dbl> <chr> <lgl> <dbl> <chr> <chr>
#> 1 1 9.92 <NA> NA NA <NA> <NA>
#> 2 2 9.94 InputY FALSE 0 timeStamp 97,2
#> 3 2 9.94 InputX FALSE 0.0501 timeStamp 97,2
#> 4 2 9.94 xPos FALSE 5 timeStamp 97,2
#> 5 2 9.94 yPos FALSE 0.0800 timeStamp 97,2
#> 6 2 9.94 zPos FALSE 0 timeStamp 97,2
#> 7 2 9.94 xRot FALSE 0 timeStamp 97,2
#> 8 2 9.94 yRot FALSE -0.707 timeStamp 97,2
#> 9 2 9.94 zRot FALSE 0 timeStamp 97,2
#> 10 2 9.94 wRot FALSE 0.708 timeStamp 97,2
#> # ... with 36 more rows
Run Code Online (Sandbox Code Playgroud)
创建于 2022-08-24,使用reprex v2.0.2
我正在寻找一种%>% tidyr::unnest_wider(syncedProperties) %>% tidyr::unnest_wider(inputManagerStates)用更快的代码替换的方法,但cbind由于行数不同,该解决方案不起作用。
编辑:认为这可能是可能的,unnest::unnest()但无法实现所需的结构(虽然tidytable::unnest_wider.目前仅支持向量)。
您可以尝试在-package 中rrapply使用(重新访问 base )。当取消嵌套复制列表时,此选项非常有效,例如:how = "bind"rrapplyrapplyinputManagerStates
library(rrapply)\n\ngetStates <- function(lst) {\n rrapply(\n lst, \n condition = \\(x, .xparents) "inputManagerStates" %in% .xparents, \n how = "bind", \n options = list(coldepth = 4, namecols = TRUE)\n )\n}\n\ngetStates(test_list) |>\n head()\n#> L1 L2 L3 inputId buttonState axisValue\n#> 1 2 inputManagerStates 1 InputY FALSE 0.0000000\n#> 2 2 inputManagerStates 2 InputX FALSE 0.0501396\n#> 3 2 inputManagerStates 3 xPos FALSE 5.0000000\n#> 4 2 inputManagerStates 4 yPos FALSE 0.0800000\n#> 5 2 inputManagerStates 5 zPos FALSE 0.0000000\n#> 6 2 inputManagerStates 6 xRot FALSE 0.0000000\n\nmicrobenchmark::microbenchmark(\n getStates(test_list)\n)\n#> Unit: microseconds\n#> expr min lq mean median uq max neval\n#> getStates(test_list) 518.573 556.7075 839.9312 587.677 675.444 18465.56 100\nRun Code Online (Sandbox Code Playgroud)\n要获得问题中的矩形格式,一种可能的方法是首先单独取消嵌套time,inputManagerStates和syncedProperties列表,然后将它们合并回单个 data.frame 中。与问题中的方法相比,这已经提供了 \xc2\xb110 倍的加速:
library(tidyr)\nlibrary(tibble)\nlibrary(rrapply)\n\nunnest_rrapply <- function() {\n \n ## bind individual data.frames\n states <- rrapply(\n test_list, \n condition = \\(x, .xparents) "inputManagerStates" %in% .xparents, \n how = "bind", \n options = list(coldepth = 4, namecols = TRUE)\n )\n \n properties <- rrapply(\n test_list, \n condition = \\(x, .xparents) "syncedProperties" %in% .xparents, \n how = "bind", \n options = list(coldepth = 4, namecols = TRUE)\n )\n \n times <- rrapply(\n test_list,\n condition = \\(x, .xname) .xname == "time",\n how = "bind",\n options = list(namecols = TRUE)\n )\n \n ## merge into single data.frame\n out <- merge(times, properties[, -c(2, 3)], all = TRUE, by = "L1") |>\n merge(states[, -c(2, 3)], all = TRUE, by = "L1")\n \n return(out)\n \n}\n\nunnest_rrapply() |>\n tibble::as_tibble()\n#> # A tibble: 46 \xc3\x97 7\n#> L1 time name value inputId buttonState axisValue\n#> <chr> <dbl> <chr> <chr> <chr> <lgl> <dbl>\n#> 1 1 9.92 <NA> <NA> <NA> NA NA \n#> 2 2 9.94 timeStamp 97,2 InputY FALSE 0 \n#> 3 2 9.94 timeStamp 97,2 InputX FALSE 0.0501\n#> 4 2 9.94 timeStamp 97,2 xPos FALSE 5 \n#> 5 2 9.94 timeStamp 97,2 yPos FALSE 0.0800\n#> 6 2 9.94 timeStamp 97,2 zPos FALSE 0 \n#> 7 2 9.94 timeStamp 97,2 xRot FALSE 0 \n#> 8 2 9.94 timeStamp 97,2 yRot FALSE -0.707 \n#> 9 2 9.94 timeStamp 97,2 zRot FALSE 0 \n#> 10 2 9.94 timeStamp 97,2 wRot FALSE 0.708 \n#> # \xe2\x80\xa6 with 36 more rows\n#> # \xe2\x84\xb9 Use `print(n = ...)` to see more rows\n\nunnest_tidyr <- function() {\n \n test_list |>\n enframe(name = "frame", value = "value") |>\n unnest_wider(value) |>\n unnest(inputManagerStates, keep_empty = TRUE) |>\n unnest(syncedProperties, keep_empty = TRUE) |>\n unnest_wider(syncedProperties) |>\n unnest_wider(inputManagerStates)\n \n}\n\nmicrobenchmark::microbenchmark(\n unnest_tidyr(),\n unnest_rrapply()\n)\n#> Unit: milliseconds\n#> expr min lq mean median uq max neval\n#> unnest_tidyr() 17.993309 20.33476 23.13348 22.14034 24.767693 47.35227 100\n#> unnest_rrapply() 2.354847 2.60518 2.90372 2.74223 2.888071 13.38620 100\nRun Code Online (Sandbox Code Playgroud)\n注意:在末尾丢失合并unnest_rrapply()并返回例如 data.frames 列表(这可能足以满足 OP 的目的)可以进一步减少计算时间 \xc2\xb12 倍。