我每天进行回归分析.在我的情况下,这通常意味着估计连续和分类预测因子对各种结果的影响.生存分析可能是我执行的最常见的分析.这种分析通常在期刊中以非常方便的方式呈现.这是一个例子:
我想知道是否有人遇到过任何可以公开使用的功能或包:
直接使用回归对象(coxph,lm,lmer,glm或你拥有的任何对象)
绘制每个预测变量对森林图的影响,或者甚至允许绘制预测变量的选择.
对于分类预测变量,还会显示参考类别
显示因子变量的每个类别中的事件数(参见上图).显示p值.
最好使用ggplot
提供某种定制
我知道sjPlot包允许绘制lme4,glm和lm结果.但是没有包允许上面提到的coxph结果和coxph是最常用的回归方法之一.我试图自己创建这样的功能,但没有任何成功.我已经读过这篇伟大的帖子:从日记中重现表和情节,但无法弄清楚如何"概括"代码.
任何建议都非常受欢迎.
Nic*_*edy 21
编辑我现在将它们放在github上的一个包中.我已经使用从输出测试它coxph
,lm
和glm
.
例:
devtools::install_github("NikNakk/forestmodel")
library("forestmodel")
example(forest_model)
Run Code Online (Sandbox Code Playgroud)
在SO上发布的原始代码(由github包取代):
我专门针对coxph
模型进行了研究,尽管相同的技术可以扩展到其他回归模型,特别是因为它使用broom
包来提取系数.提供的forest_cox
函数将输出作为其参数coxph
.(数据model.frame
用于计算每组中的个体数量并查找因子的参考水平.)它还需要许多格式化参数.返回值是ggplot
可以打印,保存等的值.
输出建模在问题中显示的NEJM图上.
library("survival")
library("broom")
library("ggplot2")
library("dplyr")
forest_cox <- function(cox, widths = c(0.10, 0.07, 0.05, 0.04, 0.54, 0.03, 0.17),
colour = "black", shape = 15, banded = TRUE) {
data <- model.frame(cox)
forest_terms <- data.frame(variable = names(attr(cox$terms, "dataClasses"))[-1],
term_label = attr(cox$terms, "term.labels"),
class = attr(cox$terms, "dataClasses")[-1], stringsAsFactors = FALSE,
row.names = NULL) %>%
group_by(term_no = row_number()) %>% do({
if (.$class == "factor") {
tab <- table(eval(parse(text = .$term_label), data, parent.frame()))
data.frame(.,
level = names(tab),
level_no = 1:length(tab),
n = as.integer(tab),
stringsAsFactors = FALSE, row.names = NULL)
} else {
data.frame(., n = sum(!is.na(eval(parse(text = .$term_label), data, parent.frame()))),
stringsAsFactors = FALSE)
}
}) %>%
ungroup %>%
mutate(term = paste0(term_label, replace(level, is.na(level), "")),
y = n():1) %>%
left_join(tidy(cox), by = "term")
rel_x <- cumsum(c(0, widths / sum(widths)))
panes_x <- numeric(length(rel_x))
forest_panes <- 5:6
before_after_forest <- c(forest_panes[1] - 1, length(panes_x) - forest_panes[2])
panes_x[forest_panes] <- with(forest_terms, c(min(conf.low, na.rm = TRUE), max(conf.high, na.rm = TRUE)))
panes_x[-forest_panes] <-
panes_x[rep(forest_panes, before_after_forest)] +
diff(panes_x[forest_panes]) / diff(rel_x[forest_panes]) *
(rel_x[-(forest_panes)] - rel_x[rep(forest_panes, before_after_forest)])
forest_terms <- forest_terms %>%
mutate(variable_x = panes_x[1],
level_x = panes_x[2],
n_x = panes_x[3],
conf_int = ifelse(is.na(level_no) | level_no > 1,
sprintf("%0.2f (%0.2f-%0.2f)", exp(estimate), exp(conf.low), exp(conf.high)),
"Reference"),
p = ifelse(is.na(level_no) | level_no > 1,
sprintf("%0.3f", p.value),
""),
estimate = ifelse(is.na(level_no) | level_no > 1, estimate, 0),
conf_int_x = panes_x[forest_panes[2] + 1],
p_x = panes_x[forest_panes[2] + 2]
)
forest_lines <- data.frame(x = c(rep(c(0, mean(panes_x[forest_panes + 1]), mean(panes_x[forest_panes - 1])), each = 2),
panes_x[1], panes_x[length(panes_x)]),
y = c(rep(c(0.5, max(forest_terms$y) + 1.5), 3),
rep(max(forest_terms$y) + 0.5, 2)),
linetype = rep(c("dashed", "solid"), c(2, 6)),
group = rep(1:4, each = 2))
forest_headings <- data.frame(term = factor("Variable", levels = levels(forest_terms$term)),
x = c(panes_x[1],
panes_x[3],
mean(panes_x[forest_panes]),
panes_x[forest_panes[2] + 1],
panes_x[forest_panes[2] + 2]),
y = nrow(forest_terms) + 1,
label = c("Variable", "N", "Hazard Ratio", "", "p"),
hjust = c(0, 0, 0.5, 0, 1)
)
forest_rectangles <- data.frame(xmin = panes_x[1],
xmax = panes_x[forest_panes[2] + 2],
y = seq(max(forest_terms$y), 1, -2)) %>%
mutate(ymin = y - 0.5, ymax = y + 0.5)
forest_theme <- function() {
theme_minimal() +
theme(axis.ticks.x = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
strip.text = element_blank(),
panel.margin = unit(rep(2, 4), "mm")
)
}
forest_range <- exp(panes_x[forest_panes])
forest_breaks <- c(
if (forest_range[1] < 0.1) seq(max(0.02, ceiling(forest_range[1] / 0.02) * 0.02), 0.1, 0.02),
if (forest_range[1] < 0.8) seq(max(0.2, ceiling(forest_range[1] / 0.2) * 0.2), 0.8, 0.2),
1,
if (forest_range[2] > 2) seq(2, min(10, floor(forest_range[2] / 2) * 2), 2),
if (forest_range[2] > 20) seq(20, min(100, floor(forest_range[2] / 20) * 20), 20)
)
main_plot <- ggplot(forest_terms, aes(y = y))
if (banded) {
main_plot <- main_plot +
geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
forest_rectangles, fill = "#EFEFEF")
}
main_plot <- main_plot +
geom_point(aes(estimate, y), size = 5, shape = shape, colour = colour) +
geom_errorbarh(aes(estimate,
xmin = conf.low,
xmax = conf.high,
y = y),
height = 0.15, colour = colour) +
geom_line(aes(x = x, y = y, linetype = linetype, group = group),
forest_lines) +
scale_linetype_identity() +
scale_alpha_identity() +
scale_x_continuous(breaks = log(forest_breaks),
labels = sprintf("%g", forest_breaks),
expand = c(0, 0)) +
geom_text(aes(x = x, label = label, hjust = hjust),
forest_headings,
fontface = "bold") +
geom_text(aes(x = variable_x, label = variable),
subset(forest_terms, is.na(level_no) | level_no == 1),
fontface = "bold",
hjust = 0) +
geom_text(aes(x = level_x, label = level), hjust = 0, na.rm = TRUE) +
geom_text(aes(x = n_x, label = n), hjust = 0) +
geom_text(aes(x = conf_int_x, label = conf_int), hjust = 0) +
geom_text(aes(x = p_x, label = p), hjust = 1) +
forest_theme()
main_plot
}
Run Code Online (Sandbox Code Playgroud)
样本数据和图表
pretty_lung <- lung %>%
transmute(time,
status,
Age = age,
Sex = factor(sex, labels = c("Male", "Female")),
ECOG = factor(lung$ph.ecog),
`Meal Cal` = meal.cal)
lung_cox <- coxph(Surv(time, status) ~ ., pretty_lung)
print(forest_cox(lung_cox))
Run Code Online (Sandbox Code Playgroud)
对于“为我编写此代码”的问题,您肯定有很多具体要求。这不符合您的标准,但也许有人会发现它在基本图形中很有用
中心面板中的图可以是任何东西,只要每条线有一个图并且每条线都适合。(实际上这不是真的,如果您愿意,任何类型的绘图都可以进入该面板,因为它只是一个普通的绘图窗口)。这段代码中有三个例子:点、箱线图、线。
这是输入数据。只是“标题”的通用列表和索引,比“直接使用回归对象”要好得多。
## indices of headers
idx <- c(1,5,7,22)
l <- list('Make/model' = rownames(mtcars),
'No. of\ncycles' = mtcars$cyl,
MPG = mtcars$mpg)
l[] <- lapply(seq_along(l), function(x)
ifelse(seq_along(l[[x]]) %in% idx, l[[x]], paste0(' ', l[[x]])))
# List of 3
# $ Make/model : chr [1:32] "Mazda RX4" " Mazda RX4 Wag" " Datsun 710" " Hornet 4 Drive" ...
# $ No. of
# cycles: chr [1:32] "6" " 6" " 4" " 6" ...
# $ MPG : chr [1:32] "21" " 21" " 22.8" " 21.4" ...
Run Code Online (Sandbox Code Playgroud)
我意识到这段代码生成了一个pdf。我不想把它改成图片上传,所以我用imagemagick转换了
## choose the type of plot you want
pl <- c('point','box','line')[1]
## extra (or less) c(bottom, left, top, right) spacing for additions in margins
pad <- c(0,0,0,0)
## default padding
oma <- c(1,1,2,1)
## proportional size of c(left, middle, right) panels
xfig = c(.25,.45,.3)
## proportional size of c(caption, main plot)
yfig = c(.15, .85)
cairo_pdf('~/desktop/pl.pdf', height = 9, width = 8)
x <- l[-3]
lx <- seq_along(x[[1]])
nx <- length(lx)
xcf <- cumsum(xfig)[-length(xfig)]
ycf <- cumsum(yfig)[-length(yfig)]
plot.new()
par(oma = oma, mar = c(0,0,0,0), family = 'serif')
plot.window(range(seq_along(x)), range(lx))
## bars -- see helper fn below
par(fig = c(0,1,ycf,1), oma = par('oma') + pad)
bars(lx)
## caption
par(fig = c(0,1,0,ycf), mar = c(0,0,3,0), oma = oma + pad)
p <- par('usr')
box('plot')
rect(p[1], p[3], p[2], p[4], col = adjustcolor('cornsilk', .5))
mtext('\tFigure I: Some fancy statistical model results.',
adj = 0, font = 2, line = -1)
mtext(paste('\tHere we discuss the fancy graphic that you are currently reading',
'about. We worked really hard on it, and you\n\tshould appreciate',
'our hard work by citing this paper in your next manuscript.'),
adj = 0, line = -3)
## left panel -- select two columns
lp <- l[1:2]
par(fig = c(0,xcf[1],ycf,1), oma = oma + vec(pad, 0, 4))
plot_text(lp, c(1,2),
adj = rep(0:1, c(nx, nx)),
font = vec(1, 3, idx, nx),
col = c(rep(1, nx), vec(1, 'transparent', idx, nx))
) -> at
vtext(unique(at$x), max(at$y) + c(1,1.5), names(lp),
font = 2, xpd = NA, adj = c(0,1))
## right panel -- select three columns
rp <- l[c(2:3,3)]
par(fig = c(tail(xcf, -1),1,ycf,1), oma = oma + vec(pad, 0, 2))
plot_text(rp, c(1,2),
col = c(rep(vec(1, 'transparent', idx, nx), 2),
vec('transparent', 2, idx, nx)),
font = vec(1, 3, idx, nx),
adj = rep(c(NA,NA,1), each = nx)
) -> at
vtext(unique(at$x), max(at$y) + c(1.5,1,1), names(rp),
font = 2, xpd = NA, adj = c(NA, NA, 1), col = c(1,1,2))
## middle panel -- some generic plot
par(new = TRUE, fig = c(xcf[1], xcf[2], ycf, 1),
mar = c(0,2,0,2), oma = oma + vec(pad, 0, c(2,4)))
set.seed(1)
xx <- rev(rnorm(length(lx)))
yy <- rev(lx)
plot(xx, yy, ann = FALSE, axes = FALSE, type = 'n',
panel.first = {
segments(0, 0, 0, nx, lty = 'dashed')
},
panel.last = {
## option 1: points, confidence intervals
if (pl == 'point') {
points(xx, yy, pch = 15, col = vec(1, 2, idx, nx))
segments(xx * .5, yy, xx * 1.5, yy, col = vec(1, 2, idx, nx))
}
## option 2: boxplot, distributions
if (pl == 'box')
boxplot(rnorm(200) ~ rep_len(1:nx, 200), at = nx:1,
col = vec(par('bg'), 2, idx, nx),
horizontal = TRUE, axes = FALSE, add = TRUE)
## option 3: trend lines
if (pl == 'line') {
for (ii in 1:nx) {
n <- sample(40, 1)
wh <- which(nx:1 %in% ii)
lines(cumsum(rep(.1, n)) - 2, wh + cumsum(runif(n, -.2, .2)), xpd = NA,
col = (ii %in% idx) + 1L, lwd = c(1,3)[(ii %in% idx) + 1L])
}
}
## final touches
mtext('HR (95% confidence interval)', font = 2, line = -.5)
axis(1, at = -3:2, tcl = 0.2, mgp = c(0,0,0))
mtext(c('Worse','Better'), side = 1, line = 1, at = c(-4, 3))
try(silent = TRUE, {
## can just replace this with graphics::arrows with minor changes
## i just like the filled ones
rawr::arrows2(-.1, -1.5, -3, size = .5, width = .5)
rawr::arrows2(0.1, -1.5, 2, size = .5, width = .5)
})
}
)
box('outer')
dev.off()
Run Code Online (Sandbox Code Playgroud)
使用这四个辅助函数(参见正文中的示例使用)
vec <- function(default, replacement, idx, n) {
# vec(1, 0, 2:3, 5); vec(1:5, 0, 2:3)
out <- if (missing(n))
default else rep(default, n)
out[idx] <- replacement
out
}
bars <- function(x, cols = c(NA, grey(.9)), horiz = TRUE) {
# plot(1:10, type = 'n'); bars(1:10)
p <- par('usr')
cols <- vec(cols[1], cols[2], which(!x %% 2), length(x))
x <- rev(x) + 0.5
if (horiz)
rect(p[1], x - 1L, p[2], x, border = NA, col = rev(cols), xpd = NA) else
rect(x - 1L, p[3], x, p[4], border = NA, col = rev(cols), xpd = NA)
invisible()
}
vtext <- function(...) {Vectorize(text.default)(...); invisible()}
plot_text <- function(x, width = range(seq_along(x)), ...) {
# plot(col(mtcars), row(mtcars), type = 'n'); plot_text(mtcars)
lx <- lengths(x)[1]
rn <- range(seq_along(x))
sx <- (seq_along(x) - 1) / diff(rn) * diff(width) + width[1]
xx <- rep(sx, each = lx)
yy <- rep(rev(seq.int(lx)), length(x))
vtext(xx, yy, unlist(x), ..., xpd = NA)
invisible(list(x = sx, y = rev(seq.int(lx))))
}
Run Code Online (Sandbox Code Playgroud)