给定矩阵m:
# [,1] [,2] [,3] [,4]
# [1,] 2 1 3 4
# [2,] 4 3 2 1
# [3,] 2 3 1 4
# [4,] 1 2 3 4
# [5,] 4 2 3 1
# [6,] 4 3 1 2
# [7,] 2 4 3 1
# [8,] 4 3 2 1
# [9,] 3 2 1 4
# [10,] 1 2 3 4
# [11,] 3 2 4 1
# [12,] 4 3 2 1
# [13,] 2 1 3 4
# [14,] 2 1 3 4
# [15,] 1 2 3 4
# [16,] 4 3 2 1
# [17,] 2 1 3 4
# [18,] 1 4 3 2
# [19,] 3 2 1 4
# [20,] 1 2 3 4
m <- structure(c(2L, 4L, 2L, 1L, 4L, 4L, 2L, 4L, 3L, 1L, 3L, 4L, 2L,
2L, 1L, 4L, 2L, 1L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 3L, 4L, 3L, 2L,
2L, 2L, 3L, 1L, 1L, 2L, 3L, 1L, 4L, 2L, 2L, 3L, 2L, 1L, 3L, 3L,
1L, 3L, 2L, 1L, 3L, 4L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 4L,
1L, 4L, 4L, 1L, 2L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 1L, 4L,
2L, 4L, 4L), .Dim = c(20L, 4L))
Run Code Online (Sandbox Code Playgroud)
我们可以用这种方式提取排序的行:
apply(m, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))
#[1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
#FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
Run Code Online (Sandbox Code Playgroud)
矩阵不大也没关系.但我在谈论数百万行的矩阵.我们可以做得更好吗?我们可以用矢量化的方式来做吗?Matrix m仅作为玩具数据给出.我正在寻找一个通用的解决方案.
这很难看,但你可以通过检查每列中的所有差异是否为正或负来达到目的.
colSums(sign(diff(t(m)))) %in% c(-3,3)
# [1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
#[13] FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
Run Code Online (Sandbox Code Playgroud)
我的快速测试表明它的执行速度要快得多.
您可以通过检查矩阵的大小来概括它m:
colSums(sign(diff(t(m)))) %in% c(-(ncol(m)-1), ncol(m)-1)
Run Code Online (Sandbox Code Playgroud)
如果您已经对c(1,1,2,3)具有重复值的行进行了排序,则可以使用稍微冗长的方法:
sdm <- diff(t(m))
nc <- ncol(m) - 1
colSums(sdm <= 0)==nc | colSums(sdm >= 0)==nc
# [1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
#[13] FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
Run Code Online (Sandbox Code Playgroud)
一些快速基准测试(记住这些在处理重复值方面并不完全相同):
set.seed(1)
m2 <- m[sample(1:nrow(m),1e6,replace=T),]
## original apply code
system.time({
apply(m2, 1, function(x) !is.unsorted(x) | !is.unsorted(rev(x)))
})
# user system elapsed
# 14.888 0.272 15.153
Run Code Online (Sandbox Code Playgroud)
比较运行:
system.time({
n <- t(m2)
forwards <- colSums(n == sort(m2[1,])) == ncol(m2)
backwards <- colSums(n == rev(sort(m2[1,]))) == ncol(m2)
vec <- forwards | backwards
})
# user system elapsed
# 0.104 0.020 0.123
system.time({
sdm <- diff(t(m2))
nc <- ncol(m) - 1
colSums(sdm <= 0)==nc | colSums(sdm >= 0)==nc
})
# user system elapsed
# 0.248 0.032 0.279
system.time({
apply(m2[,-1] - m2[,-ncol(m2)], 1, function(x) all(x>=0) || all(x <= 0))
})
# user system elapsed
# 3.724 0.004 3.731
library(matrixStats)
system.time(rowVarDiffs(m2) == 0)
# user system elapsed
# 40.176 1.156 42.071
Run Code Online (Sandbox Code Playgroud)