在 map2 中映射 - 如何正确引用参数(purrr)

jak*_*kes 3 nested r purrr

ex <- structure(list(group = c("group B", "group B", "group C", "group B","group C", "group B", "group B", "group A", "group C", "group C", "group C", "group B", "group A", "group A", "group A", "group B", "group A", "group A", "group B", "group C", "group B", "group A", "group C", "group C", "group C", "group C", "group B", "group A", "group A", "group C", "group B", "group A", "group A", "group B", "group C", "group C", "group A", "group C", "group C", "group A", "group B", "group B", "group A", "group B", "group C", "group C","group A", "group B", "group C", "group C"), A1 = c(0.765913072274998, 0.167720616329461, 0.282011203467846, 0.16467465297319, 0.407501850277185, 0.33958561392501, 0.117573569528759, 0.267871993361041, 0.930967768887058, 0.286146199563518, 0.741841563722119, 0.637853658990934, 0.137378493556753, 0.820813736645505, 0.249520575627685, 0.275153698632494, 0.916794545250013, 0.316050065914169, 0.393918378278613, 0.342175736324862, 0.0177193265408278, 0.178873546421528, 0.376545072998852, 0.411527326330543, 0.904074088903144, 0.487975180381909, 0.491365089081228, 0.591370195383206, 0.319207336986437, 0.98943907325156, 0.916014631278813, 0.0347612821497023, 0.323899461887777, 0.155270972754806, 0.436683354899287, 0.316902073565871, 0.734995431266725, 0.584133808733895, 0.515310257440433, 0.921727291075513, 0.0689518100116402, 0.659549278207123, 0.894137248862535, 0.00174906081520021, 0.873320956015959, 0.77207364118658, 0.637504813494161, 0.473099726485088, 0.557896945858374, 0.632965805241838), A2 = c(0.782154354499653, 0.718993512215093, 0.391234505455941, 0.337346265325323, 0.141482090810314, 0.587817938998342, 0.384924706770107, 0.0679492244962603, 0.0509498412720859, 0.786300176288933, 0.00685039279051125, 0.361857839627191, 0.851737944642082, 0.333896369440481, 0.521961389342323, 0.761324436869472, 0.486214824952185, 0.249763275263831, 0.536617708392441, 0.982582966331393, 0.879302836721763, 0.0212801641318947, 0.999207010492682, 0.661623647902161, 0.514440550701693, 0.748157452791929, 0.609151393873617, 0.581557413795963, 0.495366840157658, 0.595225095050409, 0.694380027009174, 0.419036868494004, 0.618371620541438, 0.406731882831082, 0.947823651600629, 0.182527825701982, 0.365398081485182, 0.307149735512212, 0.905119536910206, 0.657605888554826, 0.706386201782152, 0.461993521312252, 0.637554163113236, 0.280387100065127, 0.454221101710573, 0.0712104975245893, 0.914795317919925, 0.951028517214581, 0.645093881059438, 0.754043457563967), A3 = c(0.590488174697384, 0.876135899219662, 0.349565496202558, 0.365676332963631, 0.709230658365414, 0.584304825868458, 0.391973132034764, 0.464247716590762, 0.00831679091788828, 0.282901889178902, 0.842566592851654, 0.141866789199412, 0.278708242345601, 0.680587171344087, 0.256092368392274, 0.535304376389831, 0.803430012892932, 0.336343225324526, 0.320332229137421, 0.809689761372283, 0.588527292944491, 0.767302295425907, 0.124350237427279, 0.605355758452788, 0.619420127244666, 0.326774680987, 0.917224677512422, 0.710018905811012, 0.892817938234657, 0.149181636283174, 0.65066168922931, 0.433064805110916, 0.167979725869372, 0.809581968234852, 0.803237372776493, 0.703188817715272, 0.507392750121653, 0.372131450567394, 0.0688441153615713, 0.928956841118634, 0.960712827509269, 0.37454927386716, 0.753415656508878, 0.687665716046467, 0.05052674934268, 0.155349446227774, 0.806162646971643, 0.725155076943338, 0.537310504587367, 0.674253351520747), A4 = c(0.426875792676583, 0.168233293108642, 0.38692078506574, 0.673673333134502, 0.221049380488694, 0.142470651771873, 0.505352358799428, 0.579006788786501, 0.809476702939719, 0.343090934911743, 0.136329119792208, 0.881694708252326, 0.142607795307413, 0.658202062360942, 0.0624804550316185, 0.938871977152303, 0.477995269699022, 0.989794839406386, 0.307003591908142, 0.40553830191493, 0.0249065780080855, 0.321581491269171, 0.432656849268824, 0.578710418893024, 0.482647196389735, 0.72430428257212, 0.611029474530369, 0.748521578731015, 0.939656358910725, 0.803305297158659, 0.339922665851191, 0.919090943178162, 0.0926963407546282, 0.671128012472764, 0.634122629882768, 0.219061656622216, 0.376445228001103, 0.468331813113764, 0.131768246181309, 0.258267979836091, 0.651934198103845, 0.678243630565703, 0.663701833924279, 0.678762876661494, 0.524524878012016, 0.380242201732472, 0.433922954136506, 0.795754680642858, 0.383180371485651, 0.160383063135669)), .Names = c("group", "A1", "A2", "A3", "A4"), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"))
Run Code Online (Sandbox Code Playgroud)

有了上面的示例数据,我想msClustering在组内执行。这种聚类需要调整参数,h所以我在列中定义了它的几个值h.cand。然后我想msClustering用随后的值调用h并将输出存储在列表列中。从理论上讲,它应该是可行的purrr,但我认为它需要嵌套map,并且确切地说是mapmap2. 这是我的问题,我不确定如何引用不同的列表参数。我尝试过如下所示:

ex %>% 
  group_by(group) %>% 
  nest() %>% 
  h.cand = map(data, ~quantile(dist(.x), seq(0.05, 0.40, by = 0.05))) %>% 
  mutate(cluster = map2(h.cand, data, ~map(.x, ~msClustering(
    .y, # data (second argument of outter map2)
    h = .x # h.cand element (first argument of inner map)
  ))))
Run Code Online (Sandbox Code Playgroud)

并最终出现错误:

错误:无法分配大小为 1681.9 Gb 的向量

我应该如何引用外部和内部元素map以便h.cand为每个元素执行 8 个(向量长度)聚类group

Nat*_*ate 6

对于像这样的复杂匿名函数,最好使用function(x)代替 lambda/~语法来传递 tomap().f参数。

清理数据:

map(ex, length)
# make element5 same length
ex[[5]] <- c(ex[[5]], runif(16))
# make into data frame
ex <- dplyr::bind_cols(ex) 
Run Code Online (Sandbox Code Playgroud)

使用function(x)代替~

ex2 <- ex %>% 
    group_by(group) %>% 
    nest() %>%
    mutate(h.cand = map(data,
                        ~ quantile(dist(.), seq(0.05, 0.40, by = 0.05))),
           cluster = map2(h.cand, data,
                          function(x, y) { map(x,
                                               function(x2) { msClustering(y, x2) }) } ) )
Run Code Online (Sandbox Code Playgroud)

结果检查:

unnest(ex2, cluster)
# A tibble: 24 x 2
   group   cluster   
   <chr>   <list>    
 1 group B <list [2]>
 2 group B <list [2]>
 3 group B <list [2]>
 4 group B <list [2]>
 5 group B <list [2]>
 6 group B <list [2]>
 7 group B <list [2]>
 8 group B <list [2]>
 9 group C <list [2]>
10 group C <list [2]>
# ... with 14 more rows
Run Code Online (Sandbox Code Playgroud)