我想使用ggplot绘制直方图(或使用的步骤图stat_bin)并使用它覆盖几个点geom_point.
这是一个base实现:
library(plotrix)
set.seed(10)
df <- data.frame(id=LETTERS,val=rnorm(length(LETTERS)))
selected.ids <- sample(LETTERS,3,replace=F)
h <- hist(df$val,plot=F,breaks=10)
cols <- sapply(rainbow(length(selected.ids)),function(x) color.id(x)[1])
selected.df <- data.frame(id=selected.ids,col=cols,stringsAsFactors=F)
selected.df$x <- df$val[which(df$id %in% selected.ids)]
selected.df <- selected.df[order(selected.df$x),]
selected.df$y <- h$counts[findInterval(selected.df$x,h$breaks)]
selected.df$col <- factor(selected.df$col,levels=cols)
plot(h)
segments(x0=selected.df$x,x1=selected.df$x,y0=selected.df$y,y1=selected.df$y,cex=18,lwd=8,col=selected.df$col)
Run Code Online (Sandbox Code Playgroud)
这使:
但是,当我尝试ggplot:
ggplot(df,aes(x=val))+geom_histogram(bins=10,colour="black",alpha=0,fill="#FF6666")+geom_point(data=selected.df,aes(x=x,y=y,colour=factor(col)),size=2)+scale_fill_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")+scale_colour_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")
Run Code Online (Sandbox Code Playgroud)
理想情况下,我想使用步骤图绘制它:
ggplot(df,aes(x=val))+stat_bin(geom="step",bins=10)+geom_point(data=selected.df,aes(x=x,y=y,colour=factor(col)),size=2)+scale_fill_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")+scale_colour_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")
Run Code Online (Sandbox Code Playgroud)
这看起来非常像 geom_histogram
而且我也想让线的末端触及y = 0线.
所以我使用stat_bin在步骤图中得到正确的结果?
有没有办法在dot布局中绘制DAG (a-la Rgraphviz)ggnet?
我的例子是基因本体图,取自topGO的vignette:
library(topGO)
library(ALL)
data(ALL)
data(geneList)
affyLib <- paste(annotation(ALL),"db",sep= ".")
library(package=affyLib,character.only=TRUE)
topgo.obj <- new("topGOdata",description="Simple session",ontology="BP",allGenes=geneList,geneSel=topDiffGenes,nodeSize=10,annot=annFUN.db,affyLib=affyLib)
res.fisher <- runTest(topgo.obj,algorithm="classic",statistic="fisher")
res.df <- GenTable(topgo.obj,classicFisher=res.fisher,orderBy="classicFisher",topNodes=length(score(res.fisher)))
Run Code Online (Sandbox Code Playgroud)
为方便起见,我设置:
res.df$p.value <- as.numeric(res.df$classicFisher)
Run Code Online (Sandbox Code Playgroud)
我保留了topGO graphNELp值低于0.05 的唯一节点及其祖先:
topgo.graph <- graph(topgo.obj)
sig.cutoff <- 0.05
sig.node.names <- dplyr::filter(res.df,p.value < sig.cutoff)$GO.ID
topgo.graph <- reverseArch(inducedGraph(topgo.graph,sig.node.names))
Run Code Online (Sandbox Code Playgroud)
要使用ggnet我将graphNEL图形转换为igraph:
library(ggnet)
library(network)
library(sna)
library(scales)
library(igraph)
library(intergraph)
topgo.igraph <- graph_from_graphnel(topgo.graph,name=TRUE,weight=TRUE,unlist.attrs=TRUE)
topgo.network <- asNetwork(topgo.igraph,amap=attrmap())
ggnet2(net=topgo.network,size=10,arrow.size=12,arrow.gap=0.025)
Run Code Online (Sandbox Code Playgroud)
使用Rgraphviz的dot …
我正在尝试生成一个plotly heatmap,我希望颜色由离散比例指定。
这就是我的意思:
生成具有 2 个集群的数据并按层次对它们进行聚类:
require(permute)
set.seed(1)
mat <- rbind(cbind(matrix(rnorm(2500,2,1),nrow=25,ncol=500),matrix(rnorm(2500,-2,1),nrow=25,ncol=500)),
cbind(matrix(rnorm(2500,-2,1),nrow=25,ncol=500),matrix(rnorm(2500,2,1),nrow=25,ncol=500)))
rownames(mat) <- paste("g",1:50,sep=".")
colnames(mat) <- paste("s",1:1000,sep=".")
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
Run Code Online (Sandbox Code Playgroud)
将值放入mat间隔并为每个间隔设置颜色:
require(RColorBrewer)
mat.intervals <- cut(mat,breaks=6)
interval.mat <- matrix(mat.intervals,nrow=50,ncol=1000,dimnames=list(rownames(mat),colnames(mat)))
interval.cols <- brewer.pal(6,"Set2")
names(interval.cols) <- levels(mat.intervals)
Run Code Online (Sandbox Code Playgroud)
使用ggplot2我以这种heatmap方式绘制(也legend指定了离散颜色和各自的范围):
require(reshape2)
interval.df <- reshape2::melt(interval.mat,varnames=c("gene","sample"),value.name="expr")
require(ggplot2)
ggplot(interval.df,aes(x=sample,y=gene,fill=expr))+
geom_tile(color=NA)+theme_bw()+
theme(strip.text.x=element_text(angle=90,vjust=1,hjust=0.5,size=6),panel.spacing=unit(0.025,"cm"),legend.key=element_blank(),plot.margin=unit(c(1,1,1,1),"cm"),legend.key.size=unit(0.25,"cm"),panel.border=element_blank(),strip.background=element_blank(),axis.ticks.y=element_line(size=0.25))+
scale_color_manual(drop=FALSE,values=interval.cols,labels=names(interval.cols),name="expr")+
scale_fill_manual(drop=FALSE,values=interval.cols,labels=names(interval.cols),name="expr")
Run Code Online (Sandbox Code Playgroud)
这是我尝试用以下方法生成它plotly:
plot_ly(z=mat,x=colnames(mat),y=rownames(mat),type="heatmap",colors=interval.cols)
Run Code Online (Sandbox Code Playgroud)
这使: …
我希望能够将R plotly heatmap.
我的意思是:我有一个分层聚类的基因表达数据集:
require(permute)
set.seed(1)
mat <- rbind(cbind(matrix(rnorm(2500,2,1),nrow=25,ncol=500),matrix(rnorm(2500,-2,1),nrow=25,ncol=500)),
cbind(matrix(rnorm(2500,-2,1),nrow=25,ncol=500),matrix(rnorm(2500,2,1),nrow=25,ncol=500)))
rownames(mat) <- paste("g",1:50,sep=".")
colnames(mat) <- paste("s",1:1000,sep=".")
hc.col <- hclust(dist(t(mat)))
dd.col <- as.dendrogram(hc.col)
col.order <- order.dendrogram(dd.col)
hc.row <- hclust(dist(mat))
dd.row <- as.dendrogram(hc.row)
row.order <- order.dendrogram(dd.row)
mat <- mat[row.order,col.order]
Run Code Online (Sandbox Code Playgroud)
然后,我将其离散化为特定的表达范围,因为这恰好有助于解决我的案例的颜色问题。我还创建了其他结构来帮助我按照colorbar我想要的方式绘制:
require(RColorBrewer)
mat.intervals <- cut(mat,breaks=6)
interval.mat <- matrix(mat.intervals,nrow=50,ncol=1000,dimnames=list(rownames(mat),colnames(mat)))
interval.cols <- brewer.pal(6,"Set2")
names(interval.cols) <- levels(mat.intervals)
require(reshape2)
interval.df <- reshape2::melt(interval.mat,varnames=c("gene","sample"),value.name="expr")
interval.cols2 <- rep(interval.cols, each=ncol(mat))
color.df <- data.frame(range=c(0:(2*length(interval.cols)-1)),colors=c(0:(2*length(interval.cols)-1)))
color.df <- setNames(data.frame(color.df$range,color.df$colors),NULL)
for (i in 1:(2*length(interval.cols))) {
color.df[[2]][[i]] <- interval.cols[[(i + 1) / …Run Code Online (Sandbox Code Playgroud) 我有一个matrix(来自几个条件的基因表达):
set.seed(1)
mat <- matrix(rnorm(50*10),nrow=50,ncol=10,dimnames=list(paste("C",1:50,sep="."),paste("G",1:10,sep=".")))
Run Code Online (Sandbox Code Playgroud)
我想将其绘制为heatmap使用plotlyin R。
require(plotly)
heatmap.plotly <- plot_ly(x=colnames(mat),y=rownames(mat),z=mat,type="heatmap",colors=colorRamp(c("darkblue","white","darkred")),colorbar=list(title="Score",len=0.4)) %>%
layout(yaxis=list(title="Condition"),xaxis=list(title="Gene"))
Run Code Online (Sandbox Code Playgroud)
工作正常。
但是,我想添加仅在悬停时才能看到的文本。
我认为这会奏效:
conditions.text <- paste(paste("C",1:50,sep="."),rep(paste(LETTERS[sample(26,10,replace=T)],collapse=""),50),sep=":")
heatmap.plotly <- plot_ly(x=colnames(mat),y=rownames(mat),z=mat,type="heatmap",colors=colorRamp(c("darkblue","white","darkred")),colorbar=list(title="Score",len=0.4),hoverinfo='text',text=~conditions.text) %>%
layout(yaxis=list(title="Condition"),xaxis=list(title="Gene"))
Run Code Online (Sandbox Code Playgroud)
但事实并非如此。将鼠标悬停在情节上时,我实际上看不到任何文字。
请注意,我正在使用 amatrix而不是melted data.frame.
我试图得到一个内部节点的高度dendrogram的BFS顺序.
该utils::str函数dendrogram按BFS顺序打印.所以我认为我会使用它(将输出重定向到文件并对其进行一些解析以获取我需要的信息).
我的'dendrogram'2个分支机构和5902个成员总共可下载RDS文件链接:dendro.RDS.
当我尝试:
utils::str(dendro)
Run Code Online (Sandbox Code Playgroud)
我收到此错误:
Error in getOption("OutDec") : node stack overflow
Error during wrapup: node stack overflow
Run Code Online (Sandbox Code Playgroud)
我尝试使用一个简单的递归函数:
nodeHeights <- function(dendro){
if(is.leaf(dendro))
0
else{
cat(attr(dendro,"height"),"\n")
max(nodeHeights(dendro[[1]]),nodeHeights(dendro[[2]]))+1
}
}
Run Code Online (Sandbox Code Playgroud)
但是:nodeHeights(dendro)
抛出此错误:
Error: evaluation nested too deeply: infinite recursion / options(expressions=)?
Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)?
Run Code Online (Sandbox Code Playgroud)
任何的想法?或任何建议如何获得的节点高度dendrogram的BFS订单?
我正在尝试使用这些数据来拟合beta regression模型:betareg functionbetareg package
df <- data.frame(category=c("c1","c1","c1","c1","c1","c1","c2","c2","c2","c2","c2","c2","c3","c3","c3","c3","c3","c3","c4","c4","c4","c4","c4","c4","c5","c5","c5","c5","c5","c5"),
value=c(6.6e-18,0.0061,0.015,1.1e-17,4.7e-17,0.0032,0.29,0.77,0.64,0.59,0.39,0.72,0.097,0.074,0.073,0.08,0.06,0.11,0.034,0.01,0.031,0.041,4.7e-17,0.025,0.58,0.14,0.24,0.29,0.55,0.15),stringsAsFactors = F)
df$category <- factor(df$category,levels=c("c1","c2","c3","c4","c5"))
Run Code Online (Sandbox Code Playgroud)
使用此命令:
library(betareg)
fit <- betareg(value ~ category, data = df)
Run Code Online (Sandbox Code Playgroud)
我得到这个error:
Error in chol.default(K) :
the leading minor of order 5 is not positive definite
In addition: Warning message:
In sqrt(wpp) : NaNs produced
Error in chol.default(K) :
the leading minor of order 5 is not positive definite
In addition: Warning messages:
1: In betareg.fit(X, Y, Z, weights, offset, link, link.phi, …Run Code Online (Sandbox Code Playgroud) 我正在尝试创建一个森林图,R plotly在其中我要通过相应的p值对效果大小(点)及其误差线进行颜色编码。
以下是玩具数据:
set.seed(1)
factors <- paste0(1:25,":age")
effect.sizes <- rnorm(25,0,1)
effect.errors <- abs(rnorm(25,0,1))
p.values <- runif(25,0,1)
Run Code Online (Sandbox Code Playgroud)
这是我正在尝试的:
library(dplyr)
plotly::plot_ly(type='scatter',mode="markers",y=~factors,x=~effect.sizes,color=~p.values,colors=grDevices::colorRamp(c("darkred","gray"))) %>%
plotly::add_trace(error_x=list(array=effect.errors),marker=list(color=~p.values,colors=grDevices::colorRamp(c("darkred","gray")))) %>%
plotly::colorbar(limits=c(0,1),len=0.4,title="P-Value") %>%
plotly::layout(xaxis=list(title="Effect Size",zeroline=T,showticklabels=T),yaxis=list(title="Factor",zeroline=F,showticklabels=T))
Run Code Online (Sandbox Code Playgroud)
这给了我:
除了以下几点,这与我想要的非常接近:
trace下面的两个图例colorbarfactors任何的想法?
我有我想要的XY数据(tSNE高维数据的 2D嵌入)scatter plot。数据被分配给几个clusters,所以我想对点进行颜色编码cluster,然后为每个点添加一个标签cluster,该标签与clusters具有相同的颜色编码,并且位于(尽可能)从cluster的点。
任何想法如何做到这一点使用R在任ggplot2和ggrepel或plotly?
这是示例数据(XY坐标和cluster分配在 中df,标签在 中label.df)及其ggplot2一部分:
library(dplyr)
library(ggplot2)
set.seed(1)
df <- do.call(rbind,lapply(seq(1,20,4),function(i) data.frame(x=rnorm(50,mean=i,sd=1),y=rnorm(50,mean=i,sd=1),cluster=i)))
df$cluster <- factor(df$cluster)
label.df <- data.frame(cluster=levels(df$cluster),label=paste0("cluster: ",levels(df$cluster)))
ggplot(df,aes(x=x,y=y,color=cluster))+geom_point()+theme_minimal()+theme(legend.position="none")
Run Code Online (Sandbox Code Playgroud)
我有data.frame几列:
set.seed(1)
df <- data.frame(cluster=LETTERS[1:4],group=c(rep("m",2),rep("f",2)),point=rnorm(4),err=runif(4,0.1,0.3))
Run Code Online (Sandbox Code Playgroud)
并且我将添加另一列"\n" concatenates,其中列名在值之前,其各自行的所有列。
我知道这个:
library(tidyr)
library(dplyr)
tidyr::unite(df,text,sep="\n")
Run Code Online (Sandbox Code Playgroud)
给我这个tibble:
text
1 A\nm\n0.487429052428485\n0.286941046221182
2 B\nm\n0.738324705129217\n0.142428504256532
3 C\nf\n0.575781351653492\n0.230334753217176
4 D\nf\n-0.305388387156356\n0.125111019192263
Run Code Online (Sandbox Code Playgroud)
但我想要的是tibble:
text
1 cluster: A\ngroup: m\npoint: 0.487429052428485\nerr: 0.286941046221182
2 cluster: B\ngroup: m\npoint: 0.738324705129217\nerr: 0.142428504256532
3 cluster: C\ngroup: f\npoint: 0.575781351653492\nerr: 0.230334753217176
4 cluster: D\ngroup: f\npoint: -0.305388387156356\nerr: 0.125111019192263
Run Code Online (Sandbox Code Playgroud)
任何的想法?
r ×10
plotly ×5
ggplot2 ×4
heatmap ×2
beta ×1
dendrogram ×1
ggrepel ×1
graph ×1
hover ×1
layout ×1
lm ×1
r-forestplot ×1
regression ×1
scatter-plot ×1
tibble ×1
tidyr ×1