累积计数粘贴

use*_*037 10 r dplyr

我有这个数据集:

  ID Set Type Count
1  1   1    A    NA
2  2   1    R    NA
3  3   1    R    NA
4  4   1    U    NA
5  5   1    U    NA
6  6   1    U    NA
7  7   2    A    NA
8  8   3    R    NA
9  9   3    R    NA
Run Code Online (Sandbox Code Playgroud)

作为dputs:

mystart <- structure(list(ID = 1:9, Set = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
3L, 3L), Type = structure(c(1L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L
), .Label = c("A", "R", "U"), class = "factor"), Count = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("ID", "Set", "Type", 
"Count"), class = "data.frame", row.names = c(NA, -9L))
Run Code Online (Sandbox Code Playgroud)

通过使用dplyr包我怎么能得到这个:

  ID Set Type  Count
1  1   1    A     A1
2  2   1    R   A1R1
3  3   1    R   A1R2
4  4   1    U A1R2U1
5  5   1    U A1R2U2
6  6   1    U A1R2U3
7  7   2    A     A1
8  8   3    R     R1
9  9   3    R     R2
Run Code Online (Sandbox Code Playgroud)

再次dputs:

myend <- structure(list(ID = 1:9, Set = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
3L, 3L), Type = structure(c(1L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L
), .Label = c("A", "R", "U"), class = "factor"), Count = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 1L, 7L, 8L), .Label = c("A1", "A1R1", "A1R2", 
"A1R2U1", "A1R2U2", "A1R2U3", "R1", "R2"), class = "factor")), .Names = c("ID", 
"Set", "Type", "Count"), class = "data.frame", row.names = c(NA, 
-9L))
Run Code Online (Sandbox Code Playgroud)


简而言之,我想计算列"type"中列的观察结果"set"count(text)累计打印.

检查类似的帖子,我得到了密切关注:

myend <- structure(list(ID = 1:9, Set = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
3L, 3L), Type = structure(c(1L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L
), .Label = c("A", "R", "U"), class = "factor"), Count = c(1L, 
1L, 2L, 1L, 2L, 3L, 1L, 1L, 2L)), .Names = c("ID", "Set", "Type", 
"Count"), class = "data.frame", row.names = c(NA, -9L))
Run Code Online (Sandbox Code Playgroud)

随着代码:

library(dplyr)
myend <- read.table("mydata.txt", header=TRUE, fill=TRUE)
    myend %>%
    group_by(Set, Type) %>%
    mutate(Count = seq(n())) %>%
    ungroup(myend)
Run Code Online (Sandbox Code Playgroud)

非常感谢您的帮助,

dig*_*All 7

Base R版本:

aggregateGroup <- function(x){

  vecs <- Reduce(f=function(a,b){ a[b] <- sum(a[b],1L,na.rm=TRUE); a },
                 init=integer(0),
                 as.character(x), 
                 accumulate = TRUE)
  # vecs is a list with something like this :
  # [[1]]
  # integer(0)
  # [[2]]
  # A 
  # 1 
  # [[3]]
  # A R 
  # 1 1 
  # ...
  # so we simply turn those vectors into characters using vapply and paste 
  # (excluding the first)
  vapply(vecs,function(y) paste0(names(y),y,collapse=''),FUN.VALUE='')[-1]
}

split(mystart$Count,mystart$Set) <- lapply(split(mystart$Type,mystart$Set), aggregateGroup)

> mystart
  ID Set Type  Count
1  1   1    A     A1
2  2   1    R   A1R1
3  3   1    R   A1R2
4  4   1    U A1R2U1
5  5   1    U A1R2U2
6  6   1    U A1R2U3
7  7   2    A     A1
8  8   3    R     R1
9  9   3    R     R2
Run Code Online (Sandbox Code Playgroud)


Jan*_*Jan 6

一个dplyr版本:

mystart %>%
  group_by(Set) %>%
  mutate(Count = paste0('A', cumsum(Type == 'A'), 
                        'R', cumsum(Type == 'R'),
                        'U', cumsum(Type == 'U'))) %>%
  ungroup()
Run Code Online (Sandbox Code Playgroud)

哪个收益率

# A tibble: 9 x 4
     ID   Set Type  Count 
  <int> <int> <chr> <chr> 
1     1     1 A     A1R0U0
2     2     1 R     A1R1U0
3     3     1 R     A1R2U0
4     4     1 U     A1R2U1
5     5     1 U     A1R2U2
6     6     1 U     A1R2U3
7     7     2 A     A1R0U0
8     8     3 R     A0R1U0
9     9     3 R     A0R2U0
Run Code Online (Sandbox Code Playgroud)


如果你想省略计数为零的变量,你需要像这样包装一个函数

mygroup <- function(lst) {
  name <- names(lst)
  vectors <- lapply(seq_along(lst), function(i) {
    x <- lst[[i]]
    char <- name[i]
    x <- ifelse(x == 0, "", paste0(char, x))
    return(x)
  })
  return(do.call("paste0", vectors))
}

mystart %>%
  group_by(Set) %>%
  mutate(Count = mygroup(list(A = cumsum(Type == 'A'),
                               R = cumsum(Type == 'R'), 
                               U = cumsum(Type == 'U')))) %>%
  ungroup()
Run Code Online (Sandbox Code Playgroud)

这产生了

# A tibble: 9 x 4
     ID   Set Type  Count 
  <int> <int> <chr> <chr> 
1     1     1 A     A1    
2     2     1 R     A1R1  
3     3     1 R     A1R2  
4     4     1 U     A1R2U1
5     5     1 U     A1R2U2
6     6     1 U     A1R2U3
7     7     2 A     A1    
8     8     3 R     R1    
9     9     3 R     R2  
Run Code Online (Sandbox Code Playgroud)