R:具有重复的数据帧上的扩展函数

Ale*_*tov 3 aggregate r spread dplyr tidyr

我有一个数据框,我需要转动,但数据框有重复的标识符,所以spread函数给出一个错误Error: Duplicate identifiers for rows (5, 6)

Dimension = c("A","A","B","B","A","A")   
Date = c("Mon","Tue","Mon","Wed","Fri","Fri")    
Metric = c(23,25,7,9,7,8)
df = data.frame(Dimension,Date,Metric)
df

  Dimension Date Metric
1         A  Mon     23
2         A  Tue     25
3         B  Mon      7
4         B  Wed      9
5         A  Fri      7
6         A  Fri      8

library(tidyr)
df1 = spread(df, Date, Metric, fill = " ")

Error: Duplicate identifiers for rows (5, 6)
Run Code Online (Sandbox Code Playgroud)

然后我合并了行并粘贴了Metric:

dfa = aggregate(df[3], df[-3], 
                FUN = function(X) paste(unique(X), collapse=", "))

  Dimension Date Metric
1         A  Fri   7, 8
2         A  Mon     23
3         B  Mon      7
4         A  Tue     25
5         B  Wed      9
Run Code Online (Sandbox Code Playgroud)

然后重复,当然它现在有效:

df1 = spread(dfa, Date, Metric, fill = " ")
df1
  Dimension  Fri Mon Tue Wed
1         A 7, 8  23  25    
2         B        7       9
Run Code Online (Sandbox Code Playgroud)

问题:是否有一种"更容易"的方法,或者我的方法是否足够有效,所以我不需要失去睡眠?谢谢!

编辑.所有代码 - 我和2 akrun都可以使用这个小数据集.但是,akrun的dplyr版本打破了我的真实数据集.这是dput.

structure(list(Dimension = c(10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12303485675, 12303485675, 12303485675, 12303485675, 12303485675, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437), Date = structure(c(1L, 3L, 5L, 7L, 9L, 10L, 11L, 
12L, 13L, 14L, 16L, 18L, 19L, 20L, 22L, 23L, 24L, 26L, 27L, 28L, 
30L, 32L, 33L, 34L, 40L, 41L, 42L, 47L, 48L, 49L, 51L, 52L, 53L, 
54L, 55L, 58L, 59L, 60L, 61L, 62L, 63L, 65L, 66L, 68L, 69L, 70L, 
74L, 75L, 76L, 2L, 3L, 5L, 7L, 8L, 10L, 11L, 15L, 17L, 20L, 21L, 
24L, 25L, 28L, 30L, 31L, 34L, 36L, 42L, 43L, 46L, 48L, 49L, 53L, 
54L, 56L, 65L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 7L, 24L, 30L, 
38L, 65L, 4L, 6L, 7L, 24L, 28L, 29L, 30L, 35L, 37L, 39L, 44L, 
45L, 50L, 57L, 64L, 65L), .Label = c("16", "analog tuner", "aspect ratio", 
"assembled in country of origin", "backlight technology", "battery type", 
"brand", "brightness", "color class", "component video", "composite video", 
"country of origin", "depth w/ stand", "digital audio output", 
"digital tuner", "display technology", "features", "green compliance certificate/authority", 
"green compliant", "hdmi", "headphone jack", "height w/ stand", 
"limited warranty", "manufacturer", "maximum resolution", "media player", 
"motion interpolation technology", "mpn", "multi pack indicator", 
"name", "native contrast ratio", "number of hdmi ports", "number of usb ports", 
"operating power consumption", "origin of components", "package contents", 
"primary color", "product dimensions", "product in in (l x w x h)", 
"product model", "product series", "product type", "remote control incl", 
"remote included", "resolution", "response time", "rms output power", 
"scan format", "screen size", "shipping weight (in lb)", "sound system", 
"speaker output power (w)", "speakers", "standard refresh rate", 
"standby power consumption", "total number of hdmi ports", "tv definition", 
"tv features", "tv refresh rate (hz)", "tv resolution", "tv screen size (in)", 
"tv screen size range", "tv speakers", "tv technology", "unspsc", 
"usb", "vertical viewing angle", "vesa mount standard", "vga", 
"video signal standard", "viewing angle", "warranty length", 
"wattage", "weight (approx)", "weight w/ stand (approx)", "width w/ stand"
), class = "factor"), Metric = structure(c(40L, 13L, 57L, 69L, 
43L, 72L, 72L, 45L, 38L, 72L, 55L, 44L, 72L, 72L, 15L, 3L, 69L, 
72L, 46L, 26L, 70L, 27L, 1L, 29L, 26L, 54L, 58L, 12L, 39L, 25L, 
42L, 11L, 72L, 37L, 28L, 52L, 36L, 39L, 24L, 19L, 72L, 33L, 72L, 
18L, 72L, 49L, 6L, 10L, 23L, 62L, 13L, 48L, 64L, 31L, 72L, 72L, 
41L, 66L, 72L, 72L, 64L, 16L, 63L, 65L, 4L, 32L, 21L, 58L, 71L, 
35L, 8L, 20L, 72L, 37L, 17L, 33L, 14L, 7L, 72L, 50L, 14L, 2L, 
34L, 59L, 59L, 60L, 5L, 33L, 51L, 47L, 67L, 67L, 53L, 61L, 68L, 
51L, 43L, 30L, 72L, 9L, 22L, 49L, 56L, 33L), .Label = c("1", 
"1-year limited", "1 Year", "1,000:1", "1,140 x 145 x 705 in ; 65.6 lb", 
"10.40 lb", "100 x 100", "1080p", "1080p (HDTV)", "11.20 lb", 
"14", "14 W", "16:9", "178 degrees", "18.30 in", "1920 x 1080", 
"2", "200 x 100", "21", "22 in", "22 in FHD LED TV; Remote Control", 
"25.4", "26.20 in", "29", "29 in", "29L1350U", "3", "300 mW", 
"33.80 W", "36.5 x 6.5 x 23.0", "365 Nit", "50 W", "52161505", 
"6 W", "6.50 ms", "60", "60 Hz", "7.10 in", "720p", "9", "ATSC", 
"Audyssey EQ", "Black", "CEC", "China", "ClearScan 120 Hz", "Does Not Contain a Battery", 
"Edge LED", "HDTV", "HDTV 1080p", "Imported", "Internet Apps", 
"KDL40W600B", "L1350U", "LCD", "LCD, Internet Connected, LED", 
"LED", "LED-LCD TV", "LG", "LG 47LY340C - 47 in - commercial use LED-backlit L", 
"No", "NTSC", "PLED2243A", "ProScan", "PROSCAN PLED2243A 22 in 1080p 60 Hz LED HDTV - PTR", 
"Sleep Timer; Auto Program", "Sony", "Sony KDL40W600B 40 in 1080p 60 Hz Smart LED TV (20", 
"Toshiba", "Toshiba 29L1350U 29 in 720p LED-LCD TV - 16:9 - HD", 
"yes", "Yes"), class = "factor")), .Names = c("Dimension", "Date", 
"Metric"), class = c("data.table", "data.frame"), row.names = c(NA, 
-104L), .internal.selfref = <pointer: 0x00000000003d0788>)
Run Code Online (Sandbox Code Playgroud)

akr*_*run 6

你可以使用dcastdevel版本的data.tableie.v1.9.5.安装说明是here

library(data.table)#v1.9.5+
dcast(setDT(df), Dimension~Date, value.var='Metric', 
               fun.aggregate=function(x) toString(unique(x)))
#   Dimension  Fri Mon Tue Wed
#1:         A 7, 8  23  25    
#2:         B        7       9
Run Code Online (Sandbox Code Playgroud)

要么

library(dplyr)
library(tidyr)
df %>%
   group_by(Dimension, Date) %>% 
   summarise(Metric=toString(unique(Metric))) %>% 
   spread(Date, Metric, fill='')
#   Dimension  Fri Mon Tue Wed
#1         A 7, 8  23  25    
#2         B        7       9
Run Code Online (Sandbox Code Playgroud)

更新

使用来自OP的帖子的新数据集

 setDF(df2)
 df2 %>% 
     group_by(Dimension, Date) %>% 
     summarise(Metric=toString(unique(Metric))) %>%
     spread(Date, Metric, fill='') %>%
     head(2) %>%
     select(1:3)
 #    Dimension 16 analog tuner
 #1 10994030020  9             
 #2 12300245685            NTSC
Run Code Online (Sandbox Code Playgroud)