我有一个庞大的数据框,最后30行如下:
libary(data.table)
Run Code Online (Sandbox Code Playgroud)
dput(p)的
structure(list(DATE = structure(c(1367516015, 1367516045, 1367516075,
1367516105, 1367516135, 1367516165, 1367516195, 1367516225, 1367516255,
1367516285, 1367516315, 1367516345, 1367516375, 1367516405, 1367516435,
1367516465, 1367516495, 1367516525, 1367516555, 1367516585, 1367516615,
1367516645, 1367516675, 1367516705, 1367516735, 1367516765, 1367516795,
1367516825, 1367516855, 1367516885), class = c("POSIXct", "POSIXt"
), tzone = ""), LPAR = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("cigp01a4a004", "cigp01b4a002",
"cigp01b4a004", "cigp04a4a002", "cigp04a4a004", "cigp04b4a002",
"cigp04b4a004"), class = "factor"), ENT = c(0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5
), USR_SYS_CPU_PCT = c(79L, 80L, 77L, 77L, 77L, 76L, 79L, 82L,
81L, 80L, 79L, 77L, 77L, 77L, 79L, 79L, 80L, 82L, 82L, 83L, 80L,
81L, 80L, 78L, 78L, 83L, 86L, 87L, 88L, 87L), ENT_PCT = c(706.8,
693.8, 570.1, 641.5, 558.5, 601.5, 674.3, 742.3, 668.9, 722.6,
679.1, 677.2, 548.5, 644.6, 689.3, 716.1, 709.5, 767.3, 753.7,
786.4, 684.2, 735.1, 688.2, 676.6, 645.6, 788, 859.5, 832.6,
883.1, 872.2), PHYSICAL_CPU_USED = c(3.53, 3.47, 2.85, 3.21,
2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 2.74, 3.22, 3.45,
3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 3.38, 3.23, 3.94,
4.3, 4.16, 4.42, 4.36), PROC_QUE = c(12L, 13L, 19L, 16L, 11L,
13L, 17L, 14L, 9L, 10L, 12L, 13L, 16L, 14L, 22L, 17L, 17L, 17L,
26L, 26L, 15L, 43L, 9L, 11L, 12L, 7L, 31L, 26L, 27L, 23L), RELATIVE_CORES = c(3.53,
3.47, 2.85, 3.21, 2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39,
2.74, 3.22, 3.45, 3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44,
3.38, 3.23, 3.94, 4.3, 4.16, 4.42, 4.36), USED_CORES = c(2.7887,
2.776, 2.1945, 2.4717, 2.1483, 2.2876, 2.6623, 3.0422, 2.7054,
2.888, 2.686, 2.6103, 2.1098, 2.4794, 2.7255, 2.8282, 2.84, 3.1488,
3.0914, 3.2619, 2.736, 2.9808, 2.752, 2.6364, 2.5194, 3.2702,
3.698, 3.6192, 3.8896, 3.7932)), .Names = c("DATE", "LPAR", "ENT",
"USR_SYS_CPU_PCT", "ENT_PCT", "PHYSICAL_CPU_USED", "PROC_QUE",
"RELATIVE_CORES", "USED_CORES"), class = "data.frame", row.names = c(NA,
-30L))
Run Code Online (Sandbox Code Playgroud)
当我喜欢使用data.table计算一些值时,如下所示:
p<-data.table(p)
p<-p[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, USR_SYS_CPU_PCT), by= c("DATE", "LPAR")]
Run Code Online (Sandbox Code Playgroud)
我收到此错误:
Error in `[.data.table`(x, , `:=`(RELATIVE_PERCENT, ifelse(ENT_PCT > 100, :
Type of RHS ('integer') must match LHS ('double'). To check and coerce would
impact performance too much for the fastest cases. Either change the type of
the target column, or coerce the RHS of := yourself (e.g. by using 1L instead
of 1)
Run Code Online (Sandbox Code Playgroud)
这个错误是什么意思?我怎样才能解决这个错误?
Aru*_*run 10
问题是你的ifelse语句integer为某些值返回类型,而numeric对于其他一些条目则返回(double).并且data.table抱怨列类型不匹配,因为它期望用户执行强制(出于性能原因,如错误中给出).因此,只需将其包裹起来,as.numeric以便将所有值转换为double.
p <- p[,RELATIVE_PERCENT := as.numeric(ifelse(ENT_PCT>100, (USED_CORES/ENT)*100,
USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]
Run Code Online (Sandbox Code Playgroud)