小编dan*_*dan的帖子

覆盖geom_histogram或stat_bin上的geom_points

我想使用ggplot绘制直方图(或使用的步骤图stat_bin)并使用它覆盖几个点geom_point.

这是一个base实现:

library(plotrix)
set.seed(10)
df <- data.frame(id=LETTERS,val=rnorm(length(LETTERS)))
selected.ids <- sample(LETTERS,3,replace=F)
h <- hist(df$val,plot=F,breaks=10)
cols <- sapply(rainbow(length(selected.ids)),function(x) color.id(x)[1])
selected.df <- data.frame(id=selected.ids,col=cols,stringsAsFactors=F)
selected.df$x <- df$val[which(df$id %in% selected.ids)]
selected.df <- selected.df[order(selected.df$x),]
selected.df$y <- h$counts[findInterval(selected.df$x,h$breaks)]
selected.df$col <- factor(selected.df$col,levels=cols)
plot(h)
segments(x0=selected.df$x,x1=selected.df$x,y0=selected.df$y,y1=selected.df$y,cex=18,lwd=8,col=selected.df$col)
Run Code Online (Sandbox Code Playgroud)

这使:

在此输入图像描述

但是,当我尝试ggplot:

ggplot(df,aes(x=val))+geom_histogram(bins=10,colour="black",alpha=0,fill="#FF6666")+geom_point(data=selected.df,aes(x=x,y=y,colour=factor(col)),size=2)+scale_fill_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")+scale_colour_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")
Run Code Online (Sandbox Code Playgroud)

点和直方图未对齐: 在此输入图像描述

理想情况下,我想使用步骤图绘制它:

ggplot(df,aes(x=val))+stat_bin(geom="step",bins=10)+geom_point(data=selected.df,aes(x=x,y=y,colour=factor(col)),size=2)+scale_fill_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")+scale_colour_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")
Run Code Online (Sandbox Code Playgroud)

这看起来非常像 geom_histogram

在此输入图像描述

而且我也想让线的末端触及y = 0线.

所以我使用stat_bin在步骤图中得到正确的结果?

r ggplot2

5
推荐指数
1
解决办法
944
查看次数

使用ggnet在点布局中绘制DAG

有没有办法在dot布局中绘制DAG (a-la Rgraphviz)ggnet

我的例子是基因本体图,取自topGOvignette:

  library(topGO)
  library(ALL)
  data(ALL)
  data(geneList)
  affyLib <- paste(annotation(ALL),"db",sep= ".")
  library(package=affyLib,character.only=TRUE)
  topgo.obj <- new("topGOdata",description="Simple session",ontology="BP",allGenes=geneList,geneSel=topDiffGenes,nodeSize=10,annot=annFUN.db,affyLib=affyLib)
  res.fisher <- runTest(topgo.obj,algorithm="classic",statistic="fisher")
  res.df <- GenTable(topgo.obj,classicFisher=res.fisher,orderBy="classicFisher",topNodes=length(score(res.fisher)))
Run Code Online (Sandbox Code Playgroud)

为方便起见,我设置:

res.df$p.value <- as.numeric(res.df$classicFisher)
Run Code Online (Sandbox Code Playgroud)

我保留了topGO graphNELp值低于0.05 的唯一节点及其祖先:

topgo.graph <- graph(topgo.obj)
sig.cutoff <- 0.05
sig.node.names <- dplyr::filter(res.df,p.value < sig.cutoff)$GO.ID
topgo.graph <- reverseArch(inducedGraph(topgo.graph,sig.node.names))
Run Code Online (Sandbox Code Playgroud)

要使用ggnet我将graphNEL图形转换为igraph:

  library(ggnet)
  library(network)
  library(sna)
  library(scales)
  library(igraph)
  library(intergraph)

  topgo.igraph <- graph_from_graphnel(topgo.graph,name=TRUE,weight=TRUE,unlist.attrs=TRUE)
  topgo.network <- asNetwork(topgo.igraph,amap=attrmap())
  ggnet2(net=topgo.network,size=10,arrow.size=12,arrow.gap=0.025)
Run Code Online (Sandbox Code Playgroud)

这给了我这个: 在此输入图像描述

使用Rgraphvizdot …

layout r graph ggplot2

5
推荐指数
0
解决办法
508
查看次数

在绘图热图中使用离散的自定义颜色

我正在尝试生成一个plotly heatmap,我希望颜色由离散比例指定。

这就是我的意思:

生成具有 2 个集群的数据并按层次对它们进行聚类:

require(permute)
set.seed(1)
mat <- rbind(cbind(matrix(rnorm(2500,2,1),nrow=25,ncol=500),matrix(rnorm(2500,-2,1),nrow=25,ncol=500)),
             cbind(matrix(rnorm(2500,-2,1),nrow=25,ncol=500),matrix(rnorm(2500,2,1),nrow=25,ncol=500)))
rownames(mat) <- paste("g",1:50,sep=".")
colnames(mat) <- paste("s",1:1000,sep=".")
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
Run Code Online (Sandbox Code Playgroud)

将值放入mat间隔并为每个间隔设置颜色:

require(RColorBrewer)
mat.intervals <- cut(mat,breaks=6)
interval.mat <- matrix(mat.intervals,nrow=50,ncol=1000,dimnames=list(rownames(mat),colnames(mat)))
interval.cols <- brewer.pal(6,"Set2")
names(interval.cols) <- levels(mat.intervals)
Run Code Online (Sandbox Code Playgroud)

使用ggplot2我以这种heatmap方式绘制(也legend指定了离散颜色和各自的范围):

require(reshape2)
interval.df <- reshape2::melt(interval.mat,varnames=c("gene","sample"),value.name="expr")
require(ggplot2)
ggplot(interval.df,aes(x=sample,y=gene,fill=expr))+
  geom_tile(color=NA)+theme_bw()+
  theme(strip.text.x=element_text(angle=90,vjust=1,hjust=0.5,size=6),panel.spacing=unit(0.025,"cm"),legend.key=element_blank(),plot.margin=unit(c(1,1,1,1),"cm"),legend.key.size=unit(0.25,"cm"),panel.border=element_blank(),strip.background=element_blank(),axis.ticks.y=element_line(size=0.25))+
  scale_color_manual(drop=FALSE,values=interval.cols,labels=names(interval.cols),name="expr")+
  scale_fill_manual(drop=FALSE,values=interval.cols,labels=names(interval.cols),name="expr")
Run Code Online (Sandbox Code Playgroud)

这使: 在此处输入图片说明

这是我尝试用以下方法生成它plotly

plot_ly(z=mat,x=colnames(mat),y=rownames(mat),type="heatmap",colors=interval.cols)
Run Code Online (Sandbox Code Playgroud)

这使: …

r heatmap ggplot2 plotly

5
推荐指数
2
解决办法
3571
查看次数

分面绘制热图

我希望能够将R plotly heatmap.

我的意思是:我有一个分层聚类的基因表达数据集:

require(permute)
set.seed(1)
mat <- rbind(cbind(matrix(rnorm(2500,2,1),nrow=25,ncol=500),matrix(rnorm(2500,-2,1),nrow=25,ncol=500)),
             cbind(matrix(rnorm(2500,-2,1),nrow=25,ncol=500),matrix(rnorm(2500,2,1),nrow=25,ncol=500)))
rownames(mat) <- paste("g",1:50,sep=".")
colnames(mat) <- paste("s",1:1000,sep=".")
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
Run Code Online (Sandbox Code Playgroud)

然后,我将其离散化为特定的表达范围,因为这恰好有助于解决我的案例的颜色问题。我还创建了其他结构来帮助我按照colorbar我想要的方式绘制:

require(RColorBrewer)
mat.intervals <- cut(mat,breaks=6)
interval.mat <- matrix(mat.intervals,nrow=50,ncol=1000,dimnames=list(rownames(mat),colnames(mat)))
interval.cols <- brewer.pal(6,"Set2")
names(interval.cols) <- levels(mat.intervals)
require(reshape2)
interval.df <- reshape2::melt(interval.mat,varnames=c("gene","sample"),value.name="expr")
interval.cols2 <- rep(interval.cols, each=ncol(mat))
color.df <- data.frame(range=c(0:(2*length(interval.cols)-1)),colors=c(0:(2*length(interval.cols)-1)))
color.df <- setNames(data.frame(color.df$range,color.df$colors),NULL)
for (i in 1:(2*length(interval.cols))) {
  color.df[[2]][[i]] <- interval.cols[[(i + 1) / …
Run Code Online (Sandbox Code Playgroud)

r plotly

5
推荐指数
1
解决办法
1364
查看次数

自定义绘图热图的悬停文本

我有一个matrix(来自几个条件的基因表达):

set.seed(1)
mat <- matrix(rnorm(50*10),nrow=50,ncol=10,dimnames=list(paste("C",1:50,sep="."),paste("G",1:10,sep=".")))
Run Code Online (Sandbox Code Playgroud)

我想将其绘制为heatmap使用plotlyin R

require(plotly)
heatmap.plotly <- plot_ly(x=colnames(mat),y=rownames(mat),z=mat,type="heatmap",colors=colorRamp(c("darkblue","white","darkred")),colorbar=list(title="Score",len=0.4)) %>%
  layout(yaxis=list(title="Condition"),xaxis=list(title="Gene"))
Run Code Online (Sandbox Code Playgroud)

工作正常。

但是,我想添加仅在悬停时才能看到的文本。

我认为这会奏效:

conditions.text <- paste(paste("C",1:50,sep="."),rep(paste(LETTERS[sample(26,10,replace=T)],collapse=""),50),sep=":")
heatmap.plotly <- plot_ly(x=colnames(mat),y=rownames(mat),z=mat,type="heatmap",colors=colorRamp(c("darkblue","white","darkred")),colorbar=list(title="Score",len=0.4),hoverinfo='text',text=~conditions.text) %>%
  layout(yaxis=list(title="Condition"),xaxis=list(title="Gene"))
Run Code Online (Sandbox Code Playgroud)

但事实并非如此。将鼠标悬停在情节上时,我实际上看不到任何文字。

请注意,我正在使用 amatrix而不是melted data.frame.

r hover heatmap plotly

5
推荐指数
1
解决办法
3713
查看次数

尝试打印树形图时出现节点堆栈溢出错误

我试图得到一个内部节点的高度dendrogramBFS顺序.

utils::str函数dendrogramBFS顺序打印.所以我认为我会使用它(将输出重定向到文件并对其进行一些解析以获取我需要的信息).

我的'dendrogram'2个分支机构和5902个成员总共可下载RDS文件链接:dendro.RDS.

当我尝试:

utils::str(dendro)
Run Code Online (Sandbox Code Playgroud)

我收到此错误:

Error in getOption("OutDec") : node stack overflow
Error during wrapup: node stack overflow
Run Code Online (Sandbox Code Playgroud)

我尝试使用一个简单的递归函数:

nodeHeights <- function(dendro){
  if(is.leaf(dendro))
    0
  else{
    cat(attr(dendro,"height"),"\n")
    max(nodeHeights(dendro[[1]]),nodeHeights(dendro[[2]]))+1
  }
}
Run Code Online (Sandbox Code Playgroud)

但是:nodeHeights(dendro)

抛出此错误:

Error: evaluation nested too deeply: infinite recursion / options(expressions=)?
Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)?
Run Code Online (Sandbox Code Playgroud)

任何的想法?或任何建议如何获得的节点高度dendrogramBFS订单?

r dendrogram

5
推荐指数
1
解决办法
3762
查看次数

chol.default(K) 中出现错误:5 阶的前导小数对于 betareg 不是正定的

我正在尝试使用这些数据来拟合beta regression模型:betareg functionbetareg package

df <- data.frame(category=c("c1","c1","c1","c1","c1","c1","c2","c2","c2","c2","c2","c2","c3","c3","c3","c3","c3","c3","c4","c4","c4","c4","c4","c4","c5","c5","c5","c5","c5","c5"),
                 value=c(6.6e-18,0.0061,0.015,1.1e-17,4.7e-17,0.0032,0.29,0.77,0.64,0.59,0.39,0.72,0.097,0.074,0.073,0.08,0.06,0.11,0.034,0.01,0.031,0.041,4.7e-17,0.025,0.58,0.14,0.24,0.29,0.55,0.15),stringsAsFactors = F)

df$category <- factor(df$category,levels=c("c1","c2","c3","c4","c5"))
Run Code Online (Sandbox Code Playgroud)

使用此命令:

library(betareg)
fit <- betareg(value ~ category, data = df)
Run Code Online (Sandbox Code Playgroud)

我得到这个error

Error in chol.default(K) : 
  the leading minor of order 5 is not positive definite
In addition: Warning message:
In sqrt(wpp) : NaNs produced
Error in chol.default(K) : 
  the leading minor of order 5 is not positive definite
In addition: Warning messages:
1: In betareg.fit(X, Y, Z, weights, offset, link, link.phi, …
Run Code Online (Sandbox Code Playgroud)

beta regression r lm

5
推荐指数
1
解决办法
5891
查看次数

散点图中的颜色编码误差线

我正在尝试创建一个森林图R plotly在其中我要通过相应的p值对效果大小(点)及其误差线进行颜色编码。

以下是玩具数据:

set.seed(1)

factors <- paste0(1:25,":age")
effect.sizes <- rnorm(25,0,1)
effect.errors <- abs(rnorm(25,0,1))
p.values <- runif(25,0,1)
Run Code Online (Sandbox Code Playgroud)

这是我正在尝试的:

library(dplyr)
plotly::plot_ly(type='scatter',mode="markers",y=~factors,x=~effect.sizes,color=~p.values,colors=grDevices::colorRamp(c("darkred","gray"))) %>%
      plotly::add_trace(error_x=list(array=effect.errors),marker=list(color=~p.values,colors=grDevices::colorRamp(c("darkred","gray")))) %>%
      plotly::colorbar(limits=c(0,1),len=0.4,title="P-Value") %>%
      plotly::layout(xaxis=list(title="Effect Size",zeroline=T,showticklabels=T),yaxis=list(title="Factor",zeroline=F,showticklabels=T))
Run Code Online (Sandbox Code Playgroud)

这给了我:

在此处输入图片说明

除了以下几点,这与我想要的非常接近:

  1. 我希望误差条的颜色类似于效果大小(通过相应的p值)。
  2. 删除trace下面的两个图例colorbar
  3. y轴上的标签顺序为 factors

任何的想法?

r plotly r-forestplot

5
推荐指数
1
解决办法
294
查看次数

向聚类散点图 (tSNE) 添加文本注释

我有我想要的XY数据(tSNE高维数据的 2D嵌入)scatter plot。数据被分配给几个clusters,所以我想对点进行颜色编码cluster,然后为每个点添加一个标签cluster,该标签与clusters具有相同的颜色编码,并且位于(尽可能)从cluster的点。

任何想法如何做到这一点使用R在任ggplot2ggrepelplotly

这是示例数据(XY坐标和cluster分配在 中df,标签在 中label.df)及其ggplot2一部分:

library(dplyr)
library(ggplot2)
set.seed(1)
df <- do.call(rbind,lapply(seq(1,20,4),function(i) data.frame(x=rnorm(50,mean=i,sd=1),y=rnorm(50,mean=i,sd=1),cluster=i)))
df$cluster <- factor(df$cluster)

label.df <- data.frame(cluster=levels(df$cluster),label=paste0("cluster: ",levels(df$cluster)))

ggplot(df,aes(x=x,y=y,color=cluster))+geom_point()+theme_minimal()+theme(legend.position="none")
Run Code Online (Sandbox Code Playgroud)

在此处输入图片说明

r scatter-plot ggplot2 plotly ggrepel

5
推荐指数
1
解决办法
1556
查看次数

使用 tidyr unite 将列值与列名组合

我有data.frame几列:

set.seed(1)
df <- data.frame(cluster=LETTERS[1:4],group=c(rep("m",2),rep("f",2)),point=rnorm(4),err=runif(4,0.1,0.3))
Run Code Online (Sandbox Code Playgroud)

并且我将添加另一列"\n" concatenates,其中列名在值之前,其各自行的所有列。

我知道这个:

library(tidyr)
library(dplyr)
tidyr::unite(df,text,sep="\n")
Run Code Online (Sandbox Code Playgroud)

给我这个tibble

                                         text
1  A\nm\n0.487429052428485\n0.286941046221182
2  B\nm\n0.738324705129217\n0.142428504256532
3  C\nf\n0.575781351653492\n0.230334753217176
4 D\nf\n-0.305388387156356\n0.125111019192263
Run Code Online (Sandbox Code Playgroud)

但我想要的是tibble

                                         text
1  cluster: A\ngroup: m\npoint: 0.487429052428485\nerr: 0.286941046221182
2  cluster: B\ngroup: m\npoint: 0.738324705129217\nerr: 0.142428504256532
3  cluster: C\ngroup: f\npoint: 0.575781351653492\nerr: 0.230334753217176
4 cluster: D\ngroup: f\npoint: -0.305388387156356\nerr: 0.125111019192263
Run Code Online (Sandbox Code Playgroud)

任何的想法?

r tidyr tibble

5
推荐指数
1
解决办法
524
查看次数