我有这个数据:
merged_dat1
# A tibble: 4 x 35
# Groups: Product.Name [4]
Product.Name also apps battery better big camera case cheap day definitely enough even however
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 BLU Studio ~ 498 545 1552 465 306 839 406 161 413 225 156 590 178
2 iphone 4s 71 37 380 43 19 13 21 38 43 25 16 128 52
3 Motorola Mo~ 160 221 253 123 69 428 39 125 101 49 157 133 79
4 Samsung Gal~ 76 70 122 49 25 69 22 17 31 15 53 71 31
# ... with 21 more variables: issues <int>, life <int>, little <int>, long <int>, lot <int>, low <int>,
# many <int>, memory <int>, much <int>, overall <int>, phones <int>, pictures <int>, pretty <int>,
# quality <int>, right <int>, screen <int>, size <int>, still <int>, use <int>, way <int>, well <int>
Run Code Online (Sandbox Code Playgroud)
我想将一行的每个值除以它的行总和。
例如,第一行的行总和为 15044。第一行的每个值都应该除以这个值。这可能是重复的,但我找不到关于此的主题。如果有人有dplyr解决方案,那就太好了。
structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s",
"Motorola Moto E", "Samsung Galaxy II"), also = c(498L, 71L,
160L, 76L), apps = c(545L, 37L, 221L, 70L), battery = c(1552L,
380L, 253L, 122L), better = c(465L, 43L, 123L, 49L), big = c(306L,
19L, 69L, 25L), camera = c(839L, 13L, 428L, 69L), case = c(406L,
21L, 39L, 22L), cheap = c(161L, 38L, 125L, 17L), day = c(413L,
43L, 101L, 31L), definitely = c(225L, 25L, 49L, 15L), enough = c(156L,
16L, 157L, 53L), even = c(590L, 128L, 133L, 71L), however = c(178L,
52L, 79L, 31L), issues = c(334L, 49L, 60L, 23L), life = c(649L,
60L, 136L, 25L), little = c(283L, 45L, 156L, 44L), long = c(197L,
49L, 65L, 25L), lot = c(316L, 35L, 107L, 39L), low = c(203L,
25L, 116L, 24L), many = c(207L, 32L, 77L, 51L), memory = c(200L,
10L, 148L, 48L), much = c(421L, 79L, 165L, 53L), overall = c(206L,
35L, 77L, 8L), phones = c(749L, 84L, 214L, 63L), pictures = c(263L,
12L, 94L, 32L), pretty = c(332L, 25L, 97L, 31L), quality = c(669L,
40L, 186L, 49L), right = c(189L, 49L, 45L, 33L), screen = c(1359L,
71L, 252L, 82L), size = c(244L, 7L, 93L, 55L), still = c(416L,
48L, 107L, 28L), use = c(650L, 126L, 256L, 140L), way = c(218L,
40L, 44L, 12L), well = c(605L, 103L, 205L, 114L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E",
"Samsung Galaxy II")), class = "data.frame", row.names = c(NA,
-4L), vars = "Product.Name", drop = TRUE))
Run Code Online (Sandbox Code Playgroud)
我们可以先创建一个rowSums列,然后执行mutate_if
library(tidyverse)
merged_dat1 %>%
as.data.frame %>%
mutate(newSum = select_if(., is.numeric) %>%
reduce(`+`)) %>%
mutate_if(is.numeric, list(~ ./newSum)) %>%
select(-newSum)
# Product.Name also apps battery better big camera case cheap
#1 BLU Studio 5.0 0.03310290 0.03622707 0.10316405 0.03090933 0.020340335 0.055769742 0.02698750 0.01070194
#2 iphone 4s 0.03717277 0.01937173 0.19895288 0.02251309 0.009947644 0.006806283 0.01099476 0.01989529
#3 Motorola Moto E 0.03450507 0.04766013 0.05456114 0.02652577 0.014880311 0.092301057 0.00841061 0.02695708
#4 Samsung Galaxy II 0.04662577 0.04294479 0.07484663 0.03006135 0.015337423 0.042331288 0.01349693 0.01042945
# day definitely enough even however issues life little long lot
#1 0.02745281 0.014956129 0.010369583 0.03921829 0.01183196 0.02220154 0.04314012 0.01881149 0.01309492 0.02100505
#2 0.02251309 0.013089005 0.008376963 0.06701571 0.02722513 0.02565445 0.03141361 0.02356021 0.02565445 0.01832461
#3 0.02178132 0.010567177 0.033858098 0.02868234 0.01703688 0.01293940 0.02932931 0.03364244 0.01401768 0.02307526
#4 0.01901840 0.009202454 0.032515337 0.04355828 0.01901840 0.01411043 0.01533742 0.02699387 0.01533742 0.02392638
# low many memory much overall phones pictures pretty quality right
#1 0.01349375 0.01375964 0.013294337 0.02798458 0.013693167 0.04978729 0.017482053 0.02206860 0.04446956 0.01256315
#2 0.01308901 0.01675393 0.005235602 0.04136126 0.018324607 0.04397906 0.006282723 0.01308901 0.02094241 0.02565445
#3 0.02501617 0.01660556 0.031917188 0.03558335 0.016605564 0.04615053 0.020271727 0.02091870 0.04011214 0.00970455
#4 0.01472393 0.03128834 0.029447853 0.03251534 0.004907975 0.03865031 0.019631902 0.01901840 0.03006135 0.02024540
# screen size still use way well
#1 0.09033502 0.016219091 0.02765222 0.04320659 0.014490827 0.04021537
#2 0.03717277 0.003664921 0.02513089 0.06596859 0.020942408 0.05392670
#3 0.05434548 0.020056071 0.02307526 0.05520811 0.009488894 0.04420962
#4 0.05030675 0.033742331 0.01717791 0.08588957 0.007361963 0.06993865
Run Code Online (Sandbox Code Playgroud)
注意:这也会进行类型检查
此外,如果我们base R,这可以紧凑地完成为
merged_dat1[-1] <- merged_dat1[-1]/rowSums(merged_dat1[-1])
Run Code Online (Sandbox Code Playgroud)
使用dplyr,您还可以尝试:
df %>%\n ungroup() %>%\n mutate(across(-1)/rowSums(across(-1)))\n\n Product.Name also apps battery better big camera case cheap day\n <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>\n1 BLU Studio \xe2\x80\xa6 0.0331 0.0362 0.103 0.0309 0.0203 0.0558 0.0270 0.0107 0.0275\n2 iphone 4s 0.0372 0.0194 0.199 0.0225 0.00995 0.00681 0.0110 0.0199 0.0225\n3 Motorola Mo\xe2\x80\xa6 0.0345 0.0477 0.0546 0.0265 0.0149 0.0923 0.00841 0.0270 0.0218\n4 Samsung Gal\xe2\x80\xa6 0.0466 0.0429 0.0748 0.0301 0.0153 0.0423 0.0135 0.0104 0.0190\nRun Code Online (Sandbox Code Playgroud)\n或者按列类型求和:
\ndf %>%\n ungroup() %>%\n mutate(across(where(is.numeric))/rowSums(across(where(is.numeric))))\nRun Code Online (Sandbox Code Playgroud)\n