我正在尝试并排绘制两个图表,并使用一个常见的图例,该图例包含两个图表之间的所有变量(图表之间的某些变量是不同的)。
这是我一直在尝试的模拟示例:
#make relative abundance values for n rows
makeData <- function(n){
n <- n
x <- runif(n, 0, 1)
y <- x / sum(x)
}
#make random matrices filled with relative abundance values
makeDF <- function(col, rw){
df <- matrix(ncol=col, nrow=rw)
for(i in 1:ncol(df)){
df[,i] <- makeData(nrow(df))
}
return(df)
}
#create df1 and assign col names
df1 <- makeDF(4, 5)
colSums(df1) #verify relative abundance values = 1
df1 <- as.data.frame(df1)
colnames(df1) <- c("taxa","s1", "s2", "s3")
df1$taxa <- c("ASV1", "ASV2", "ASV3", "ASV4", "ASV5")
#repeat for df2
df2 <- makeDF(4,5)
df2 <- as.data.frame(df2)
colnames(df2) <- c("taxa","s1", "s2", "s3")
df2$taxa <- c("ASV1", "ASV5", "ASV6", "ASV7", "ASV8")
# convert wide data format to long format -- for plotting
library(reshape2)
makeLong <- function(df){
df.long <- melt(df, id.vars="taxa",
measure.vars=grep("s\\d+", names(df), val=T),
variable.name="sample",
value.name="value")
return(df.long)
}
df1 <- makeLong(df1)
df2 <- makeLong(df2)
#generate distinct colours for each asv
taxas <- union(df1$taxa, df2$taxa)
library("RColorBrewer")
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
colpals <- qual_col_pals[c("Set1", "Dark2", "Set3"),] #select colour palettes
col_vector = unlist(mapply(brewer.pal, colpals$maxcolors, rownames(colpals)))
taxa.col=sample(col_vector, length(taxas))
names(taxa.col) <- taxas
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")
Run Code Online (Sandbox Code Playgroud)
(如果您对如何生成更好的模拟数据有建议,请务必!)
当我运行代码时,我能够在一张图中获得两个图表,但图例并未包含两个图中的所有变量:
根据我在网上搜索的内容,图例仅在图形之间的变量相同时才起作用,但就我而言,我有相似和不同的变量。
谢谢你的帮助!
也许这就是您正在寻找的:
将变量转换taxa为水平等于taxas变量的因子,即包括两个数据集中的所有水平。
向两个scale_fill_manual添加参数drop=FALSE以防止删除未使用的因子级别。
注意:我只添加了代码的相关部分,并在脚本开头将种子设置为42。
set.seed(42)
df1$taxa <- factor(df1$taxa, taxas)
df2$taxa <- factor(df2$taxa, taxas)
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity") +
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")
Run Code Online (Sandbox Code Playgroud)