wen*_*wen 13 replace numbers r sequence
我有一串数字:
n1 = c(1, 1, 0, 6, 0, 0, 10, 10, 11, 12, 0, 0, 19, 23, 0, 0)
Run Code Online (Sandbox Code Playgroud)
我需要用它前面的相应数字替换0来得到:
n2 = c(1, 1, 1, 6, 6, 6, 10, 10, 11, 12, 12, 12, 19, 23, 23, 23)
Run Code Online (Sandbox Code Playgroud)
我如何从n1到n2?
提前致谢!
flo*_*del 12
n2 <- n1[cummax(seq_along(n1) * (n1 != 0))]
Run Code Online (Sandbox Code Playgroud)
gag*_*ews 10
试试na.locf()包zoo:
library(zoo)
n1 <- c(1, 1, 0, 6, 0, 0, 10, 10, 11, 12, 0, 0, 19, 23, 0, 0)
n1[n1 == 0] <- NA
na.locf(n1)
## [1] 1 1 1 6 6 6 10 10 11 12 12 12 19 23 23 23
Run Code Online (Sandbox Code Playgroud)
此函数将每个函数替换NA为最新的非函数NA.这就是为什么我在应用它之前替换了所有0s NA.
这里有一个讨论的一个类似(但不相同)的问题.
编辑:如果n1最终由NAs 组成,请尝试例如
n1 <- c(1, 1, 0, 6, 0, 0, 10, NA, 11, 12, 0, 0, 19, NA, 0, 0)
wh_na <- which(is.na(n1))
n1[n1 == 0] <- NA
n2 <- na.locf(n1)
n2[wh_na] <- NA
n2
## [1] 1 1 1 6 6 6 10 NA 11 12 12 12 19 NA 19 19
Run Code Online (Sandbox Code Playgroud)
EDIT2:这种c(1,NA,0)退货方式c(1,NA,1).另外两个玩家给出了c(1,NA,NA).换句话说,这里我们用最后一个非缺失的非零值替换0.选择您喜欢的选项.
编辑3:受@ Thell的Rcpp解决方案的启发,我想添加另一个 - 这次使用"纯"R/C API.
library('inline')
sexp0 <- cfunction(signature(x="numeric"), "
x = Rf_coerceVector(x, INTSXP); // will not work for factors
R_len_t n = LENGTH(x);
SEXP ret;
PROTECT(ret = Rf_allocVector(INTSXP, n));
int lval = NA_INTEGER;
int* xin = INTEGER(x);
int* rin = INTEGER(ret);
for (R_len_t i=0; i<n; ++i, ++xin, ++rin) {
if (*xin == 0)
*rin = lval;
else {
lval = *xin;
*rin = lval;
}
}
UNPROTECT(1);
return ret;
", language="C++")
Run Code Online (Sandbox Code Playgroud)
在这种情况下,我们会得到c(1,NA,NA)的c(1,NA,0).一些基准:
library(microbenchmark)
set.seed(1L)
n1 <- sample(c(0:10), 1e6, TRUE)
microbenchmark(sexp0(n1), rollValue(n1), n1[cummax(seq_along(n1) * (n1 != 0))])
## Unit: milliseconds
## expr min lq median uq max neval
## sexp0(n1) 2.468588 2.494233 3.198711 4.216908 63.21236 100
## rollValue(n1) 8.151000 9.359731 10.603078 12.760594 75.88901 100
## n1[cummax(seq_along(n1) * (n1 != 0))] 32.899420 36.956711 39.673726 45.419449 106.48180 100
Run Code Online (Sandbox Code Playgroud)
这是一个解决方案data.table:
require(data.table) ## >= 1.9.2
idx = which(!n1 %in% 0L)
DT <- data.table(val=n1[idx], idx=idx)
setattr(DT, 'sorted', "idx")
n1 = DT[J(seq_along(n1)), roll=Inf]$val
# [1] 1 1 1 6 6 6 10 10 11 12 12 12 19 23 23 23
Run Code Online (Sandbox Code Playgroud)
require(zoo)
require(data.table)
set.seed(1L)
n1 = sample(c(0:10), 1e6, TRUE)
## data.table
dt_fun <- function(n1) {
idx = which(!n1 %in% 0L)
DT <- data.table(val=n1[idx], idx=idx)
setattr(DT, 'sorted', "idx")
DT[J(seq_along(n1)), roll=Inf]$val
}
# na.locf from zoo - gagolews
zoo_fun <- function(n1) {
wh_na <- which(is.na(n1))
n1[n1 == 0] <- NA
n2 <- na.locf(n1)
n2[wh_na] <- NA
n2
}
## rle - thelatemail
rle_fun <- function(n1) {
r <- rle(n1)
r$values[which(r$values==0)] <- r$values[which(r$values==0)-1]
inverse.rle(r)
}
flodel_fun <- function(n1) n1[cummax(seq_along(n1) * (n1 != 0))]
require(microbenchmark)
microbenchmark(a1 <- dt_fun(n1),
a2 <- zoo_fun(n1),
a3 <- rle_fun(n1),
a4 <- flodel_fun(n1), times=10L)
Run Code Online (Sandbox Code Playgroud)
这是基准测试结果:
# Unit: milliseconds
# expr min lq median uq max neval
# a1 <- dt_fun(n1) 155.49495 164.04133 199.39133 243.22995 289.80908 10
# a2 <- zoo_fun(n1) 596.33039 632.07841 671.51439 682.85950 697.33500 10
# a3 <- rle_fun(n1) 356.95103 377.61284 383.63109 406.79794 495.09942 10
# a4 <- flodel_fun(n1) 51.52259 55.54499 56.20325 56.39517 60.15248 10
Run Code Online (Sandbox Code Playgroud)
因为rle是一切的答案:
#make an example including an NA value
n1 <- c(1, 1, 0, 6, NA, 0, 10, 10, 11, 12, 0, 0, 19, 23, 0, 0)
r <- rle(n1)
r$values[which(r$values==0)] <- r$values[which(r$values==0)-1]
inverse.rle(r)
# [1] 1 1 1 6 NA NA 10 10 11 12 12 12 19 23 23 23
Run Code Online (Sandbox Code Playgroud)
跳过NAs 的版本是:
n1 <- c(1, 1, 0, 6, NA, 0, 10, 10, 11, 12, 0, 0, 19, 23, 0, 0)
r <- rle(n1[!is.na(n1)])
r$values[which(r$values==0)] <- r$values[which(r$values==0)-1]
n1[!is.na(n1)] <- inverse.rle(r)
n1
# [1] 1 1 1 6 NA 6 10 10 11 12 12 12 19 23 23 23
Run Code Online (Sandbox Code Playgroud)
不要忘记Rcpp的简单性和性能提升......
使用Arun的样本量我得到......
Unit: milliseconds
expr min lq median uq max neval
rollValue(n1) 3.998953 4.105954 5.803294 8.774286 36.52492 100
n1[cummax(seq_along(n1) * (n1 != 0))] 17.634569 18.295344 20.698524 23.104847 74.72795 100
Run Code Online (Sandbox Code Playgroud)
.cpp来源的文件很简单......
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::plugins("cpp11")]]
// [[Rcpp::export]]
NumericVector rollValue(const NumericVector v) {
auto out = clone(v);
auto tmp = v[0];
for( auto & e : out) {
if( e == 0 ) {
e = tmp;
continue;
}
tmp = e;
}
return out;
}
Run Code Online (Sandbox Code Playgroud)