为什么Rle不接受因素作为输入?

Cha*_*aff 5 r r-factor

我无法在上传递此rle功能data.frame。函数在另一组上效果很好:

fgroup <- aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), function(x) rle(x)$values)
Run Code Online (Sandbox Code Playgroud)

产生错误:

Error in rle(x) : 'x' must be an atomic vector
Run Code Online (Sandbox Code Playgroud)

样本数据:

> dput(fevents2[1:20,])
structure(list(weeks = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), A1M.Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("2012-05-09", "2012-05-10", "2012-05-11",
"2012-05-14", "2012-05-15", "2012-05-17", "2012-05-18", "2012-05-21",
"2012-05-22", "2012-05-24", "2012-05-25", "2012-05-28", "2012-05-29",
"2012-05-30", "2012-05-31", "2012-06-04", "2012-06-05", "2012-06-07",
"2012-06-08", "2012-06-11", "2012-06-12", "2012-06-14", "2012-06-15",
"2012-06-18", "2012-06-19", "2012-06-21", "2012-06-22"), class = "factor"),
    vv = structure(c(8L, 8L, 8L, 20L, 24L, 24L, 24L, 1L, 13L,
    13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 24L), .Label = c("C AA",
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "nil"), class = "factor"),
    rv = structure(c(25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
    10L, 10L, 22L, 22L, 22L, 25L, 10L, 22L, 22L, 22L, 22L, 25L
    ), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
    "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
    "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
    "G VR", "G VV", "nil"), class = "factor"), ja = structure(c(12L,
    12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 25L, 25L,
    12L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("C AA", "C AJ",
    "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
    "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
    "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
    aa = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 25L, 25L,
    25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), .Label = c("C AA",
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
    ), class = "factor"), bv = structure(c(25L, 11L, 11L, 11L,
    23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
    23L, 23L, 23L, 23L), .Label = c("C AA", "C AJ", "C BB", "C BV",
    "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
    "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
    "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
    aj = structure(c(7L, 7L, 7L, 25L, 25L, 25L, 25L, 25L, 9L,
    9L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 25L, 25L), .Label = c("C AA",
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
    ), class = "factor"), vb = structure(c(1L, 1L, 1L, 25L, 25L,
    25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 2L,
    25L, 2L, 2L), .Label = c("C AA", "C AJ", "C BB", "C BV",
    "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
    "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
    "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
    rj = structure(c(5L, 5L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
    16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("C AA",
    "C AJ", "C BB", "C BV", "C JR", "C RJ", "C RR", "C RV", "C VB",
    "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JR", "G RJ",
    "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
    rr = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("C AA",
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
    ), class = "factor"), vr = structure(c(5L, 5L, 5L, 25L, 25L,
    7L, 7L, 7L, 7L, 7L, 25L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
    7L), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
    "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
    "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
    "G VR", "G VV", "nil"), class = "factor"), bb = structure(c(4L,
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
    4L, 4L, 4L, 4L), .Label = c("C AA", "C AJ", "C BB", "C BV",
    "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
    "G AA", "G AJ", "G BB", "G BV", "G JA", "G RJ", "G RR", "G RV",
    "G VB", "G VR", "G VV", "nil"), class = "factor"), jr = structure(c(25L,
    25L, 10L, 10L, 22L, 22L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
    25L, 25L, 25L, 5L, 5L, 5L, 5L), .Label = c("C AA", "C AJ",
    "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
    "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
    "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor")),
.Names = c("weeks",
"A1M.Date", "vv", "rv", "ja", "aa", "bv", "aj", "vb", "rj", "rr",
"vr", "bb", "jr"), row.names = c(NA, 20L), class = "data.frame")
Run Code Online (Sandbox Code Playgroud)

数据结构:

str(fevents2)
data.frame':   1430 obs. of  14 variables:
 $ weeks   : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ A1M.Date: Factor w/ 27 levels "2012-05-09","2012-05-10",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ vv      : Factor w/ 24 levels "C AA","C AJ",..: 8 8 8 20 24 24 24 1 13 13 ..
 $ rv      : Factor w/ 25 levels "C AA","C AJ",..: 25 25 25 25 25 25 25 25 10 10 ...
 $ ja      : Factor w/ 25 levels "C AA","C AJ",..: 12 12 12 12 12 12 12 12 12 12 ...
 $ aa      : Factor w/ 25 levels "C AA","C AJ",..: 2 2 2 2 2 2 2 2 25 25 ...
 $ bv      : Factor w/ 25 levels "C AA","C AJ",..: 25 11 11 11 23 23 23 23 23 23 ...
 $ aj      : Factor w/ 25 levels "C AA","C AJ",..: 7 7 7 25 25 25 25 25 9 9 ...
 $ vb      : Factor w/ 25 levels "C AA","C AJ",..: 1 1 1 25 25 25 25 25 25 25 ...
 $ rj      : Factor w/ 23 levels "C AA","C AJ",..: 5 5 16 16 16 16 16 16 16 16 ...
 $ rr      : Factor w/ 25 levels "C AA","C AJ",..: 3 3 3 3 3 3 3 3 3 3 ...
 $ vr      : Factor w/ 25 levels "C AA","C AJ",..: 5 5 5 25 25 7 7 7 7 7 ...
 $ bb      : Factor w/ 24 levels "C AA","C AJ",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ jr      : Factor w/ 25 levels "C AA","C AJ",..: 25 25 10 10 22 22 25 25 25 25 ...
NULL
Run Code Online (Sandbox Code Playgroud)

我明白,我有factorS,但转换factors到numeric

as.numeric(as.character(fevents2))
Run Code Online (Sandbox Code Playgroud)

要么:

sapply(fevents2, function(x) as.numeric(as.character(x)))
Run Code Online (Sandbox Code Playgroud)

无法解决我的问题:

Error in fevents3[, 3:14] : incorrect number of dimensions
In addition: Warning message:
In eval.with.vis(expr, envir, enclos) : NAs introduced by coercion
Run Code Online (Sandbox Code Playgroud)

这里有一个样品data.frame上的rle功能的工作原理:

    dput(fevents[1:20,]
structure(list(weeks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), A1M.Date = c("2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09"), vv = c("C RR", "C RR",
"C RR", "G RR", "nil", "nil", "nil", "C AA", "G AA", "G AA",
"G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA",
"G AA", "nil"), rv = c("nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "C VB", "C VB", "G VB", "G VB", "G VB", "nil",
"G VB", "G VB", "G VB", "G VB", "G VB", "nil"), ja = c("C VV",
"C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV",
"C VV", "C VV", "nil", "nil", "G VV", "G VV", "G VV", "G VV",
"G VV", "G VV", "G VV"), aa = c("C AJ", "C AJ", "C AJ", "C AJ",
"C AJ", "C AJ", "C AJ", "C AJ", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "nil"), bv = c("nil",
"C VR", "C VR", "C VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR"), aj = c("C RJ", "C RJ", "C RJ", "nil",
"nil", "nil", "nil", "nil", "C RV", "C RV", "G RV", "G RV", "G RV",
"G RV", "G RV", "G RV", "G RV", "G RV", "nil", "nil"), vb = c("C AA",
"C AA", "C AA", "nil", "nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "C AJ", "nil", "C AJ",
"C AJ"), rj = c("C JR", "C JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR"), rr = c("C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB"), vr = c("C JA", "C JA", "C JA", "nil",
"nil", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "nil", "C RJ",
"C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ"
), bb = c("C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV"), jr = c("nil", "nil",
"C VB", "C VB", "G VB", "G VB", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "C JA", "C JA", "C JA", "C JA"
)), .Names = c("weeks", "A1M.Date", "vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), row.names = c(NA,
20L), class = "data.frame")

str(fevents)
'data.frame':   1430 obs. of  14 variables:
 $ weeks   : num  1 1 1 1 1 1 1 1 1 1 ...
 $ A1M.Date: chr  "2012-05-09" "2012-05-09" "2012-05-09" "2012-05-09" ...
 $ vv      : chr  "C RR" "C RR" "C RR" "G RR" ...
 $ rv      : chr  "nil" "nil" "nil" "nil" ...
 $ ja      : chr  "C VV" "C VV" "C VV" "C VV" ...
 $ aa      : chr  "C AJ" "C AJ" "C AJ" "C AJ" ...
 $ bv      : chr  "nil" "C VR" "C VR" "C VR" ...
 $ aj      : chr  "C RJ" "C RJ" "C RJ" "nil" ...
 $ vb      : chr  "C AA" "C AA" "C AA" "nil" ...
 $ rj      : chr  "C JR" "C JR" "G JR" "G JR" ...
 $ rr      : chr  "C BB" "C BB" "C BB" "C BB" ...
 $ vr      : chr  "C JA" "C JA" "C JA" "nil" ...
 $ bb      : chr  "C BV" "C BV" "C BV" "C BV" ...
 $ jr      : chr  "nil" "nil" "C VB" "C VB" ...
Run Code Online (Sandbox Code Playgroud)

我发现了一个真正的“不太优雅”的解决方法。data.frame以CSV格式写入文件并使用导入stringsAsFactors = FALSE。这不是我想要写在我的代码......必须有重新排列的结构更简单的方式data.frame来取悦rle

Rei*_*son 5

问题是,一个因素 *不*原子向量误差明确表示。无论是第一次转换所有字符的因素(而不是强迫他们向数字!),或者做转换里面你所申请的匿名函数。

因此,这实现了第二个想法,可以工作:

aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
          function(x) rle(as.character(x))$values)
Run Code Online (Sandbox Code Playgroud)

流行之后:

> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
+           function(x) rle(as.character(x))$values)
  weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1
1     1 C RR G RR  nil C AA G AA  nil  nil C VB G VB  nil C VB G VB  nil C VV
  ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2
1  nil C VV G VV C AJ  nil  nil C VR G VR C RJ  nil C RV G RV  nil C AA  nil
  vb.3 vb.4 vb.5 rj.1 rj.2   rr vr.1 vr.2 vr.3 vr.4 vr.5   bb jr.1 jr.2 jr.3
1 C AJ  nil C AJ C JR G JR C BB C JA  nil C RJ  nil C RJ C BV  nil C VB G VB
  jr.4 jr.5
1  nil C JA
Run Code Online (Sandbox Code Playgroud)

尽管我不确定您希望得到什么-这里只有一个星期,aggregate并将rle所有的价值都凝聚在一起。您是否要$values针对每个变量fevents2分别进行汇总?

另一件事:

as.numeric(as.character(fevents2))由于数据不是数字,因此可能无法工作!并且您无法将这些功能应用于数据框并获得与您期望的功能一样的东西-如果它​​们完全起作用的话。

sapply()事应该工作。这是一个检查每个变量是否为因数的版本,如果存在,则将其强制:

fevents3 <- sapply(fevents2,
                   function(x) if(is.factor(x)) { as.character(x) } else { x })
Run Code Online (Sandbox Code Playgroud)

但是note sapply()简化为一个矩阵,该矩阵将更改aggregate()调度的方法:

> class(fevents3)
[1] "matrix"
Run Code Online (Sandbox Code Playgroud)

相反,也许

fevents3 <- lapply(fevents2,
                   function(x) if(is.factor(x)) { as.character(x) } else { x })
fevents3 <- data.frame(fevents3, stringsAsFactors = FALSE)
Run Code Online (Sandbox Code Playgroud)

现在,如果您想应用 rle()到拆分数据的每一列,并保持单独

spl <- split(fevents3, list(weeks = fevents3[, 1]))
res <- lapply(spl, function(x) lapply(x[, 3:14], function(y) rle(y)$values))
Run Code Online (Sandbox Code Playgroud)

这使

> res
$`1`
$`1`$vv
[1] "C RR" "G RR" "nil"  "C AA" "G AA" "nil" 

$`1`$rv
[1] "nil"  "C VB" "G VB" "nil"  "C VB" "G VB" "nil" 

$`1`$ja
[1] "C VV" "nil"  "C VV" "G VV"

$`1`$aa
[1] "C AJ" "nil" 

$`1`$bv
[1] "nil"  "C VR" "G VR"

$`1`$aj
[1] "C RJ" "nil"  "C RV" "G RV" "nil" 

$`1`$vb
[1] "C AA" "nil"  "C AJ" "nil"  "C AJ"

$`1`$rj
[1] "C JR" "G JR"

$`1`$rr
[1] "C BB"

$`1`$vr
[1] "C JA" "nil"  "C RJ" "nil"  "C RJ"

$`1`$bb
[1] "C BV"

$`1`$jr
[1] "nil"  "C VB" "G VB" "nil"  "C JA"
Run Code Online (Sandbox Code Playgroud)

这与aggregate()上面的答案相同,但是每个rle()输出保持独立:

> unlist(res)
 1.vv1  1.vv2  1.vv3  1.vv4  1.vv5  1.vv6  1.rv1  1.rv2  1.rv3  1.rv4  1.rv5 
"C RR" "G RR"  "nil" "C AA" "G AA"  "nil"  "nil" "C VB" "G VB"  "nil" "C VB" 
 1.rv6  1.rv7  1.ja1  1.ja2  1.ja3  1.ja4  1.aa1  1.aa2  1.bv1  1.bv2  1.bv3 
"G VB"  "nil" "C VV"  "nil" "C VV" "G VV" "C AJ"  "nil"  "nil" "C VR" "G VR" 
 1.aj1  1.aj2  1.aj3  1.aj4  1.aj5  1.vb1  1.vb2  1.vb3  1.vb4  1.vb5  1.rj1 
"C RJ"  "nil" "C RV" "G RV"  "nil" "C AA"  "nil" "C AJ"  "nil" "C AJ" "C JR" 
 1.rj2   1.rr  1.vr1  1.vr2  1.vr3  1.vr4  1.vr5   1.bb  1.jr1  1.jr2  1.jr3 
"G JR" "C BB" "C JA"  "nil" "C RJ"  "nil" "C RJ" "C BV"  "nil" "C VB" "G VB" 
 1.jr4  1.jr5 
 "nil" "C JA" 
> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]),
+           function(x) rle(as.character(x))$values)
  weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1
1     1 C RR G RR  nil C AA G AA  nil  nil C VB G VB  nil C VB G VB  nil C VV
  ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2
1  nil C VV G VV C AJ  nil  nil C VR G VR C RJ  nil C RV G RV  nil C AA  nil
  vb.3 vb.4 vb.5 rj.1 rj.2   rr vr.1 vr.2 vr.3 vr.4 vr.5   bb jr.1 jr.2 jr.3
1 C AJ  nil C AJ C JR G JR C BB C JA  nil C RJ  nil C RJ C BV  nil C VB G VB
  jr.4 jr.5
1  nil C JA
Run Code Online (Sandbox Code Playgroud)

[ 注意:这仅适用于此处,因为您显示的数据片段只有一个星期。我不记得unlist(res))如果有超过一周的时间会怎样。]