我有以下数据集,其中包含 ID 和每个 ID 的数值。我需要显示前 10 个最大的 ID,并将其余的分组到“其他”类别下。
我已经研究了这个问题/答案,但它适用于计数而不是总和。所以这种方法不适用于我的情况
我想要的结果
ID Value
63366849 42084408
63475529 21879648
6774736 21321888
91274582 17393328
63445915 15215002
1097911044 9926442
1095812405 8971332
1097910557 5388376
37548920 5114854
OTHERS 153587848
Run Code Online (Sandbox Code Playgroud)
我的数据:
df=structure(list(ID= c("63366849", "63475529", "6774736",
"91274582", "63445915", "1097911044", "1095812405", "1097910557",
"37548920", "1005153076", "13513021", "51991938", "1010090155",
"91265898", "91237574", "91344448", "1095956598", "28065538",
"63341531", "63335642", "5637749", "17419836", "5567029", "5651301",
"37839500", "63450446", "28424298", "29264885", "63328148", "63562603",
"51702988", "7416450", "1005234045", "91520220", "91159937",
"13801492", "9260536", "37722978", "63355924", "1098711718",
"63443769", "60306461", "28485149", "2151526", "2127233", "1096063398",
"27948572", "13834945", "27938888", "91179848", "41678031", "37837953",
"91233367", "5557908", "28012681", "33446782", "91243709", "1007790961",
"1095826946", "1095926858", "20189860", "24049724", "27903426",
"28133395", "41446577", "37319481", "2033552", "27950302", "91200190",
"91205127", "9527919", "1095303440", "13826841", "91218008",
"63280472", "24074310", "37805433", "13801074", "28037341", "63353740",
"1095793583", "1102371309", "1116493966", "2127316", "2153230",
"37316948", "51778219", "5503745", "1099735096", "1222254600",
"27786180", "2162536", "27964677", "2020408", "91349686", "37514200",
"30208806", "1102351224", "28354311", "27929976", "63523710",
"13838438", "28296329", "28410262", "28494063", "5721778", "91275248",
"91353706", "37751608", "1097610166", "1098080648", "13810090",
"63444976", "1221464992", "1097497475", "91100475", "1095312933",
"28131889", "63506738", "7922341", "28238547", "27985819", "27958501",
"23912889", "5707630", "1102388693", "24028654", "28128637",
"5554160", "63369230", "36521093", "91525097", "27929902", "63441041",
"1097919703", "40984557", "1097129811", "13843871", "63301433",
"28236969", "37831774", "1000942168", "1005220322", "1098661759",
"1095843775", "28399609", "37211106", "5683927", "28254346",
"91268076", "28295325", "28496274", "13820294", "28354968", "37793321",
"63495244", "63299126", "13849979", "63357743", "60262321", "1098753025",
"91435400", "2195064", "63345546", "63251135", "13827671", "2007126",
"40378506", "1097121371", "37828831", "5707059", "63324477",
"1102644458", "37796841", "28334234", "28010824", "28495363",
"27929493", "5558783", "51974763", "28296333", "63489129", "1102385538",
"13814150", "13829893", "28377189", "91342964", "164497637",
"52326169", "63286143", "1099738639", "63503298", "1097913686",
"91070753", "37652636", "60352132"), Value= c(42084408,
21879648, 21321888, 17393328, 15215002, 9926442, 8971332, 5388376,
5114854, 5093730, 5004621, 3696756, 3424872, 3347432, 3337895,
3105759, 3104409, 3056238, 2748870, 2738808, 2662730, 2555784,
2268480, 2126700, 2051811, 2035200, 2025495, 1962000, 1926012,
1829736, 1822980, 1657659, 1574901, 1546320, 1539355, 1480530,
1456140, 1372302, 1372302, 1367235, 1350330, 1330560, 1327761,
1289520, 1265556, 1258350, 1245060, 1228770, 1228545, 1209705,
1196100, 1180533, 1175274, 1170286, 1144792, 1106820, 1106820,
1080621, 1080621, 1080621, 1080621, 1080621, 1080621, 1080621,
1080621, 1080264, 1067400, 1067400, 1003000, 993510, 992520,
988560, 957408, 957408, 922350, 889893, 888300, 883800, 855150,
835920, 833481, 833481, 833481, 833481, 833481, 833481, 833481,
833481, 823458, 775300, 741420, 734760, 734760, 729882, 672660,
666740, 631020, 621000, 621000, 600651, 598050, 553410, 553410,
553410, 553410, 553410, 553410, 553410, 552690, 503340, 468120,
439800, 416040, 415136, 403235, 398700, 383629, 383629, 383629,
367380, 364941, 360207, 360088, 346680, 339582, 339201, 339201,
339201, 339201, 303120, 300450, 289080, 282042, 238080, 234060,
219531, 211300, 195447, 195447, 192960, 192960, 169000, 169000,
169000, 163868, 163868, 156540, 143460, 136080, 132048, 127818,
127818, 124920, 124920, 124920, 124920, 105120, 93690, 93690,
77580, 66024, 64650, 62460, 62460, 62376, 58800, 58338, 57240,
50524, 49260, 49260, 48240, 46528, 44100, 43110, 42930, 41640,
40755, 38715, 37050, 35685, 33012, 30687, 28620, 28620, 24678,
24570, 15189, 15120, 15120, 14504, 14310, 11424, 6000, 5940,
5428)), row.names = c(NA, -196L), class = c("tbl_df", "tbl",
"data.frame"))
Run Code Online (Sandbox Code Playgroud)
使用该forcats包的一个稍微干净的选项:
df %>%\n mutate(ID = fct_lump_n(factor(ID), n = 9, w = Value, other_level = "OTHERS")) %>%\n group_by(ID) %>%\n summarize(Value = sum(Value)) %>%\n arrange(ID == "OTHERS", desc(Value))\n\n# A tibble: 10 \xc3\x97 2\n# ID Value\n# <fct> <dbl>\n# 63366849 42084408\n# 63475529 21879648\n# 6774736 21321888\n# 91274582 17393328\n# 63445915 15215002\n# 1097911044 9926442\n# 1095812405 8971332\n# 1097910557 5388376\n# 37548920 5114854\n# OTHERS 153587848\nRun Code Online (Sandbox Code Playgroud)\n