是否有一个R函数可以在文本文件中搜索s字符串?像unix grep这样的东西?
我想替代方法是逐行读取文件但是想知道是否可以通过这样的函数绕过它?
我正在尝试向ggplot两个直方图中的一个添加图例,这些图可能会重叠,因此希望它们稍微透明:
library(ggplot2)
set.seed(1)
plot.df <- data.frame(x=c(rnorm(1000,30,1),rnorm(10000,40,5)),
group=c(rep("a",1000),rep("b",10000)))
Run Code Online (Sandbox Code Playgroud)
使用:
ggplot(plot.df,aes(x=x,fill=factor(group)))+
geom_histogram(data=subset(plot.df,group=='a'),fill="red",alpha=0.5)+
geom_histogram(data=subset(plot.df,group=='b'),fill="darkgray",alpha=0.5)+
scale_colour_manual(name="group",values=c("red","darkgray"),labels=c("a","b"))+scale_fill_manual(name="group",values=c("red","darkgray"),labels=c("a","b"))
Run Code Online (Sandbox Code Playgroud)
但我得到的是:
少了什么东西?
假设我有以下rle对象:
r = rle(c(rep("M",28),rep("N",4265),rep("M",16),rep("S",2),rep("N",400),rep("M",10)));
Run Code Online (Sandbox Code Playgroud)
我想将其分解为以下字符串向量:
a = c("28M","4265N","16M2S","400N","10M");
Run Code Online (Sandbox Code Playgroud)
含义我将"N"值和非"N"值及其对应的长度分隔为向量中的单独元素.
请注意,所有非Ns都是粘贴在一起,这就是结果为"16M2S"而不是"16M""2S"分开的原因.
最有效的方法是什么?
我有这个data.frame:
my.df = data.frame(mean = c(0.045729661,0.030416531,0.043202944,0.025600973,0.040526913,0.046167044,0.029352414,0.021477789,0.027580529,0.017614864,0.020324659,0.027547972,0.0268722,0.030804717,0.021502093,0.008342398,0.02295506,0.022386184,0.030849534,0.017291356,0.030957321,0.01871551,0.016945678,0.014143042,0.026686185,0.020877973,0.028612298,0.013227244,0.010710895,0.024460647,0.03704981,0.019832982,0.031858501,0.022194059,0.030575241,0.024632496,0.040815748,0.025595652,0.023839083,0.026474704,0.033000706,0.044125751,0.02714219,0.025724641,0.020767752,0.026480009,0.016794441,0.00709195), std.dev = c(0.007455271,0.006120299,0.008243454,0.005552582,0.006871527,0.008920899,0.007137174,0.00582671,0.007439398,0.005265133,0.006180637,0.008312494,0.006628951,0.005956211,0.008532386,0.00613411,0.005741645,0.005876588,0.006640122,0.005339993,0.008842722,0.006246828,0.005532832,0.005594483,0.007268493,0.006634795,0.008287031,0.00588119,0.004479003,0.006333063,0.00803285,0.006226441,0.009681048,0.006457784,0.006045368,0.006293256,0.008062195,0.00857954,0.008160441,0.006830088,0.008095485,0.006665062,0.007437581,0.008599525,0.008242957,0.006379928,0.007168385,0.004643819), parent.origin = c("paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal"), group = c("F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F"), replicate = c(1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6))
Run Code Online (Sandbox Code Playgroud)
为此,我使用以下代码生成多面ggplot:
p1 = ggplot(data = my.df, aes(factor(replicate), color = factor(parent.origin)))
p1 = p1 + geom_boxplot(aes(fill = factor(parent.origin),lower = mean - std.dev, upper = mean + std.dev, middle = mean, ymin = mean - 3*std.dev, ymax = mean + 3*std.dev), position = position_dodge(width = 0), width = 0.5, alpha = 0.5, stat="identity") + facet_wrap(~group, ncol = 4)+scale_fill_manual(values = c("red","blue"),labels = c("maternal","paternal"),name = "parental …Run Code Online (Sandbox Code Playgroud) 我得到了这个 Rcpp实现到包的rmvnorm功能mvtnorm,我想知道我需要添加什么才能使用openmp,因此它可以利用多个内核.
我虽然应该这样做:
library(Rcpp)
library(RcppArmadillo)
library(inline)
settings <- getPlugin("RcppArmadillo")
settings$env$PKG_CXXFLAGS <- paste('-fopenmp', settings$env$PKG_CXXFLAGS)
settings$env$PKG_LIBS <- paste('-fopenmp -lgomp', settings$env$PKG_LIBS)
code <- '
#include <omp.h>
using namespace Rcpp;
int cores = 1;
cores = as<int>(cores_);
omp_set_num_threads(cores);
int n = as<int>(n_);
arma::vec mu = as<arma::vec>(mu_);
arma::mat sigma = as<arma::mat>(sigma_);
int ncols = sigma.n_cols;
#pragma omp parallel for schedule(static)
arma::mat Y = arma::randn(n, ncols);
return wrap(arma::repmat(mu, 1, n).t() + Y * arma::chol(sigma));
'
rmvnorm.rcpp <- cxxfunction(signature(n_="integer", mu_="numeric", sigma_="matrix", …Run Code Online (Sandbox Code Playgroud) 假设我想绘制我的数据:
my.df <- data.frame(mean = c(0.045729661,0.030416531,0.043202944,0.025600973,0.040526913,0.046167044,0.029352414,0.021477789,0.027580529,0.017614864,0.020324659,0.027547972,0.0268722,0.030804717,0.021502093,0.008342398,0.02295506,0.022386184,0.030849534,0.017291356,0.030957321,0.01871551,0.016945678,0.014143042,0.026686185,0.020877973,0.028612298,0.013227244,0.010710895,0.024460647,0.03704981,0.019832982,0.031858501,0.022194059,0.030575241,0.024632496,0.040815748,0.025595652,0.023839083,0.026474704,0.033000706,0.044125751,0.02714219,0.025724641,0.020767752,0.026480009,0.016794441,0.00709195), std.dev = c(0.007455271,0.006120299,0.008243454,0.005552582,0.006871527,0.008920899,0.007137174,0.00582671,0.007439398,0.005265133,0.006180637,0.008312494,0.006628951,0.005956211,0.008532386,0.00613411,0.005741645,0.005876588,0.006640122,0.005339993,0.008842722,0.006246828,0.005532832,0.005594483,0.007268493,0.006634795,0.008287031,0.00588119,0.004479003,0.006333063,0.00803285,0.006226441,0.009681048,0.006457784,0.006045368,0.006293256,0.008062195,0.00857954,0.008160441,0.006830088,0.008095485,0.006665062,0.007437581,0.008599525,0.008242957,0.006379928,0.007168385,0.004643819), parent.origin = c("paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal","maternal","maternal","maternal","maternal","maternal","maternal","paternal","paternal","paternal","paternal","paternal","paternal"), group = c("F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:M","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1r:F","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:M","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F","F1i:F"), replicate = c(1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6))
Run Code Online (Sandbox Code Playgroud)
如下:
library(ggplot2)
p1 <- ggplot(data = my.df, aes(factor(replicate), color = factor(parent.origin)))
p1 <- p1 + geom_boxplot(aes(fill = factor(parent.origin),lower = mean - std.dev, upper = mean + std.dev, middle = mean, ymin = mean - 3*std.dev, ymax = mean + 3*std.dev), position = position_dodge(width = 0), width = 0.5, alpha = 0.5, stat="identity") + facet_wrap(~group, ncol = 4)+scale_fill_manual(values = c("red","blue"),labels = c("maternal","paternal"),name = …Run Code Online (Sandbox Code Playgroud) 我有这个例子data.frame:
df <- data.frame(id=c("a","a,b,c","d,e","d","h","e","i","b","c"), start=c(100,100,400,400,800,500,900,200,300), end=c(150,350,550,450,850,550,950,250,350), level = c(1,5,2,3,6,4,2,1,1))
> df
id start end level
1 a 100 150 1
2 a,b,c 100 350 5
3 d,e 400 550 2
4 d 400 450 3
5 h 800 850 6
6 e 500 550 4
7 i 900 950 2
8 b 200 250 1
9 c 300 350 1
Run Code Online (Sandbox Code Playgroud)
其中每一行是一个线性区间.如此示例所示,某些行是合并的间隔(第2行和第3行).
我想要做的是,每个合并的间隔要么消除所有单独的部分,df如果df$level合并的间隔大于其所有部分的部分,或者df$level合并的间隔小于其至少一个部分消除合并的间隔.
因此,对于此示例,输出应为:
> res.df
id start end level
1 a,b,c 100 …Run Code Online (Sandbox Code Playgroud) 假设我生成了这个散点图:
plot(x = runif(20,-10,10), y = runif(20,-10,10), xlim = c(-10,10), ylim = c(-10,10))
abline(h = 0, col = "black")
abline(v = 0, col = "black")
Run Code Online (Sandbox Code Playgroud)
所以abline's 将平面划分为四个笛卡尔象限。我想用不同的颜色为每个象限的背景着色。分别为 1-4 象限说蓝色、红色、绿色和黄色。
任何的想法?
假设我有一个数字向量,我想要舍入到"更漂亮"的数字,例如:
vec <- c(1.739362e-08,8.782537e-08,0.5339712)
Run Code Online (Sandbox Code Playgroud)
我希望它是:
pretty.vec <- c(1.74e-08,8.78e-08,0.53)
Run Code Online (Sandbox Code Playgroud)
我如何实现这一目标?使用round并没有真正帮助,因为它将前两个元素舍入为0:
> round(vec,2)
[1] 0.00 0.00 0.53
Run Code Online (Sandbox Code Playgroud) 我正在尝试组合一系列R plotly绘图 - 垂直,但由于每个单独绘图的 y 轴标题是水平的,因此组合绘图变得混乱。
这是我的例子:
数据:
set.seed(1)
df <- data.frame(cluster=unlist(lapply(letters[1:10],function(i) rep(paste0("cluster:",i),200))),
group=rep(c(rep("A",100),rep("B",100)),10),val=rnorm(2000))
df$group <- factor(df$group,levels=c("A","B"))
Run Code Online (Sandbox Code Playgroud)
绘制一个图列表density,每个df$cluster:
library(plotly)
plot.list <- lapply(unique(df$cluster),function(i){
density.df <- do.call(rbind,lapply(c("A","B"),function(b){
dens <- density(dplyr::filter(df,cluster == i,group == b)$val,adjust=1)
return(data.frame(x=dens$x,y=dens$y,group=b,stringsAsFactors=F))
}))
density.df$group <- factor(density.df$group,levels=c("A","B"))
if(i == unique(df$cluster)[1]){
cluster.plot <- plot_ly(x=~density.df$x,y=~density.df$y,type='scatter',mode='lines',color=~density.df$group,line=list(width=3),showlegend=T) %>%
layout(xaxis=list(title="Val",zeroline=F),yaxis=list(title=i,zeroline=F,showticklabels=F),legend=list(orientation="h",xanchor="center",x=0,y=1))
} else{
cluster.plot <- plot_ly(x=~density.df$x,y=~density.df$y,type='scatter',mode='lines',color=~density.df$group,line=list(width=3),showlegend=F) %>%
layout(xaxis=list(title="Val",zeroline=F),yaxis=list(title=i,zeroline=F,showticklabels=F))
}
return(cluster.plot)
})
Run Code Online (Sandbox Code Playgroud)
然后,使用以下方法组合它们:
subplot(plot.list,nrows=length(plot.list),shareX=T,shareY=T,titleX=T,titleY=T)
Run Code Online (Sandbox Code Playgroud)
如何将每个图的 y 轴标题旋转plot.list为水平而不是垂直?