将每一行除以其总和

Ban*_*njo 4 r dplyr

我有这个数据:

 merged_dat1
# A tibble: 4 x 35
# Groups:   Product.Name [4]
  Product.Name  also  apps battery better   big camera  case cheap   day definitely enough  even however
  <chr>        <int> <int>   <int>  <int> <int>  <int> <int> <int> <int>      <int>  <int> <int>   <int>
1 BLU Studio ~   498   545    1552    465   306    839   406   161   413        225    156   590     178
2 iphone 4s       71    37     380     43    19     13    21    38    43         25     16   128      52
3 Motorola Mo~   160   221     253    123    69    428    39   125   101         49    157   133      79
4 Samsung Gal~    76    70     122     49    25     69    22    17    31         15     53    71      31
# ... with 21 more variables: issues <int>, life <int>, little <int>, long <int>, lot <int>, low <int>,
#   many <int>, memory <int>, much <int>, overall <int>, phones <int>, pictures <int>, pretty <int>,
#   quality <int>, right <int>, screen <int>, size <int>, still <int>, use <int>, way <int>, well <int>
Run Code Online (Sandbox Code Playgroud)

我想将一行的每个值除以它的行总和。

例如,第一行的行总和为 15044。第一行的每个值都应该除以这个值。这可能是重复的,但我找不到关于此的主题。如果有人有dplyr解决方案,那就太好了。

structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s", 
"Motorola Moto E", "Samsung Galaxy II"), also = c(498L, 71L, 
160L, 76L), apps = c(545L, 37L, 221L, 70L), battery = c(1552L, 
380L, 253L, 122L), better = c(465L, 43L, 123L, 49L), big = c(306L, 
19L, 69L, 25L), camera = c(839L, 13L, 428L, 69L), case = c(406L, 
21L, 39L, 22L), cheap = c(161L, 38L, 125L, 17L), day = c(413L, 
43L, 101L, 31L), definitely = c(225L, 25L, 49L, 15L), enough = c(156L, 
16L, 157L, 53L), even = c(590L, 128L, 133L, 71L), however = c(178L, 
52L, 79L, 31L), issues = c(334L, 49L, 60L, 23L), life = c(649L, 
60L, 136L, 25L), little = c(283L, 45L, 156L, 44L), long = c(197L, 
49L, 65L, 25L), lot = c(316L, 35L, 107L, 39L), low = c(203L, 
25L, 116L, 24L), many = c(207L, 32L, 77L, 51L), memory = c(200L, 
10L, 148L, 48L), much = c(421L, 79L, 165L, 53L), overall = c(206L, 
35L, 77L, 8L), phones = c(749L, 84L, 214L, 63L), pictures = c(263L, 
12L, 94L, 32L), pretty = c(332L, 25L, 97L, 31L), quality = c(669L, 
40L, 186L, 49L), right = c(189L, 49L, 45L, 33L), screen = c(1359L, 
71L, 252L, 82L), size = c(244L, 7L, 93L, 55L), still = c(416L, 
48L, 107L, 28L), use = c(650L, 126L, 256L, 140L), way = c(218L, 
40L, 44L, 12L), well = c(605L, 103L, 205L, 114L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
    0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E", 
    "Samsung Galaxy II")), class = "data.frame", row.names = c(NA, 
-4L), vars = "Product.Name", drop = TRUE))
Run Code Online (Sandbox Code Playgroud)

akr*_*run 6

我们可以先创建一个rowSums列,然后执行mutate_if

library(tidyverse)
merged_dat1 %>% 
   as.data.frame %>%
   mutate(newSum = select_if(., is.numeric) %>% 
                       reduce(`+`)) %>% 
   mutate_if(is.numeric, list(~ ./newSum)) %>% 
   select(-newSum)
#  Product.Name       also       apps    battery     better         big      camera       case      cheap
#1    BLU Studio 5.0 0.03310290 0.03622707 0.10316405 0.03090933 0.020340335 0.055769742 0.02698750 0.01070194
#2         iphone 4s 0.03717277 0.01937173 0.19895288 0.02251309 0.009947644 0.006806283 0.01099476 0.01989529
#3   Motorola Moto E 0.03450507 0.04766013 0.05456114 0.02652577 0.014880311 0.092301057 0.00841061 0.02695708
#4 Samsung Galaxy II 0.04662577 0.04294479 0.07484663 0.03006135 0.015337423 0.042331288 0.01349693 0.01042945
#         day  definitely      enough       even    however     issues       life     little       long        lot
#1 0.02745281 0.014956129 0.010369583 0.03921829 0.01183196 0.02220154 0.04314012 0.01881149 0.01309492 0.02100505
#2 0.02251309 0.013089005 0.008376963 0.06701571 0.02722513 0.02565445 0.03141361 0.02356021 0.02565445 0.01832461
#3 0.02178132 0.010567177 0.033858098 0.02868234 0.01703688 0.01293940 0.02932931 0.03364244 0.01401768 0.02307526
#4 0.01901840 0.009202454 0.032515337 0.04355828 0.01901840 0.01411043 0.01533742 0.02699387 0.01533742 0.02392638
#         low       many      memory       much     overall     phones    pictures     pretty    quality      right
#1 0.01349375 0.01375964 0.013294337 0.02798458 0.013693167 0.04978729 0.017482053 0.02206860 0.04446956 0.01256315
#2 0.01308901 0.01675393 0.005235602 0.04136126 0.018324607 0.04397906 0.006282723 0.01308901 0.02094241 0.02565445
#3 0.02501617 0.01660556 0.031917188 0.03558335 0.016605564 0.04615053 0.020271727 0.02091870 0.04011214 0.00970455
#4 0.01472393 0.03128834 0.029447853 0.03251534 0.004907975 0.03865031 0.019631902 0.01901840 0.03006135 0.02024540
#      screen        size      still        use         way       well
#1 0.09033502 0.016219091 0.02765222 0.04320659 0.014490827 0.04021537
#2 0.03717277 0.003664921 0.02513089 0.06596859 0.020942408 0.05392670
#3 0.05434548 0.020056071 0.02307526 0.05520811 0.009488894 0.04420962
#4 0.05030675 0.033742331 0.01717791 0.08588957 0.007361963 0.06993865
Run Code Online (Sandbox Code Playgroud)

注意:这也会进行类型检查


此外,如果我们base R,这可以紧凑地完成为

merged_dat1[-1] <- merged_dat1[-1]/rowSums(merged_dat1[-1])
Run Code Online (Sandbox Code Playgroud)

  • 代替 `reduce("+")`,你可以使用 `rowSums()`,它更具可读性,虽然不太通用(使用 `reduce` 你可以使用任意函数)。 (2认同)

tmf*_*mnk 4

使用dplyr,您还可以尝试:

\n
df %>%\n ungroup() %>%\n mutate(across(-1)/rowSums(across(-1)))\n\n  Product.Name   also   apps battery better     big  camera    case  cheap    day\n  <chr>         <dbl>  <dbl>   <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>\n1 BLU Studio \xe2\x80\xa6 0.0331 0.0362  0.103  0.0309 0.0203  0.0558  0.0270  0.0107 0.0275\n2 iphone 4s    0.0372 0.0194  0.199  0.0225 0.00995 0.00681 0.0110  0.0199 0.0225\n3 Motorola Mo\xe2\x80\xa6 0.0345 0.0477  0.0546 0.0265 0.0149  0.0923  0.00841 0.0270 0.0218\n4 Samsung Gal\xe2\x80\xa6 0.0466 0.0429  0.0748 0.0301 0.0153  0.0423  0.0135  0.0104 0.0190\n
Run Code Online (Sandbox Code Playgroud)\n

或者按列类型求和:

\n
df %>%\n ungroup() %>%\n mutate(across(where(is.numeric))/rowSums(across(where(is.numeric))))\n
Run Code Online (Sandbox Code Playgroud)\n