计算R中data.table中每组计数变量的比例

Dan*_*nka 3 r data.table

我有以下数据:

dput(mydata)
structure(list(groupSize = structure(c(2L, 1L, 2L, 1L, 4L, 4L, 
3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 4L, 4L, 2L, 2L, 1L, 1L, 3L, 3L, 
4L, 4L, 3L, 3L, 2L, 2L, 1L, 1L, 4L, 4L, 2L, 2L, 4L, 4L, 3L, 3L, 
1L, 1L), .Label = c("small", "intermediate", "large", "huge"), class = "factor"), 
    gender = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
    1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 
    1L), .Label = c("F", "M", "U"), class = "factor"), startYear = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("2014", "2015", 
    "2016", "2017", "2018"), class = "factor"), count = c(7546, 
    3500, 5930, 7668, 18114, 13826, 11943, 10083, 147, 2791, 
    17158, 19389, 2063, 17901, 11007, 1660, 6660, 15198, 496, 
    18716, 17385, 12726, 11409, 4711, 16140, 244, 15251, 6485, 
    5014, 1104, 438, 10930, 15582, 15626, 2121, 6339, 135, 15432, 
    12263, 10607)), row.names = c(NA, -40L), class = c("data.table", 
"data.frame"))
Run Code Online (Sandbox Code Playgroud)

我想计算一年中每个 groupSize 的男性和女性的比例。所以我会得到例如,在 2014 年,对于“小型”团体,有 45% 的女性和 55% 的男性。这如何在 R 中的 data.table 中完成?

Ron*_*hah 5

如果您正在寻找比率,您可以这样做:

library(data.table)
mydata[, prop := count/sum(count) * 100, by = .(startYear, groupSize)]


#       groupSize gender startYear count       prop
# 1: intermediate      F      2014  7546 55.9958445
# 2:        small      F      2014  3500 31.3395415
# 3: intermediate      M      2014  5930 44.0041555
# 4:        small      M      2014  7668 68.6604585
# 5:         huge      F      2014 18114 56.7125861
# 6:         huge      M      2014 13826 43.2874139
# 7:        large      F      2014 11943 54.2222828
# 8:        large      M      2014 10083 45.7777172
#....
Run Code Online (Sandbox Code Playgroud)