Ahs*_*hsk 1 plot regression r ggplot2 mixed-models
我想获得模型交互效果的预测(例如使用sjPlot::plot_model或ggeffects打包,然后将它们提供给 ggplot2 以可视化 ggplot2 中的交互。有人可以帮助编写代码来做到这一点吗?我的问题与堆栈上的其他问题不同,因为它首先使用标准包进行预测,然后绘制它们。
我当前的型号:
\nmodel <- glmmTMB(total_count ~ mean_temp*lwd_duration + (1|year), family = nbinom1, data=df)\n\nsummary(model)\n\nFamily: nbinom1 ( log )\nFormula: total_count ~ mean_temp * lwd_duration + (1 | year)\nData: df\n\n AIC BIC logLik deviance df.resid \n 260.6 270.7 -124.3 248.6 34 \n\nRandom effects:\n\nConditional model:\n Groups Name Variance Std.Dev. \n year (Intercept) 3.683e-09 6.069e-05\nNumber of obs: 40, groups: year, 4\n\nDispersion parameter for nbinom1 family (): 178 \n\nConditional model:\n Estimate Std. Error z value Pr(>|z|) \n(Intercept) 9.609769 3.276724 2.933 0.00336 **\nmean_temp -0.341435 0.177426 -1.924 0.05431 . \nlwd_duration -0.105528 0.039145 -2.696 0.00702 **\nmean_temp:lwd_duration 0.005819 0.002078 2.801 0.00510 **\n---\nSignif. codes: 0 \xe2\x80\x98***\xe2\x80\x99 0.001 \xe2\x80\x98**\xe2\x80\x99 0.01 \xe2\x80\x98*\xe2\x80\x99 0.05 \xe2\x80\x98.\xe2\x80\x99 0.1 \xe2\x80\x98 \xe2\x80\x99 1\nRun Code Online (Sandbox Code Playgroud)\n可重现的例子:
\ndf <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, \n 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, \n 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, \n 4L), levels = c("2017", "2016", "2015", "2014"), class = "factor"), \n mean_temp = c(10.31, 10.31, 11.35, 11.35, 14.05, 14.05, 15.96, \n 15.96, 16.73, 16.73, 20.92, 20.92, 21.89, 21.89, 21.48, 21.48, \n 25.82, 25.82, 21.06, 21.06, 16.3, 16.3, 20.16, 20.16, 19.85, \n 19.85, 25.45, 25.45, 24.32, 24.32, 20.59, 20.78, 20.78, 20.78, \n 22.23, 22.23, 22.23, 19.71, 19.71, 19.71), lwd_duration = c(116.53, \n 116.53, 146.18, 146.18, 184.48, 184.48, 67.3, 67.3, 70.08, \n 70.08, 71.43, 71.43, 68.58, 68.58, 91.6, 91.6, 72.07, 72.07, \n 59.57, 59.57, 13.02, 13.02, 75.33, 75.33, 60.07, 60.07, 98.08, \n 98.08, 71.78, 71.78, 40.25, 54.5, 54.5, 54.5, 32.47, 32.47, \n 32.47, 61.73, 61.73, 61.73), total_count = c(0L, 0L, 0L, \n 0L, 0L, 0L, 0L, 11L, 0L, 0L, 4L, 2L, 6L, 6L, 11L, 1L, 12L, \n 0L, 1L, 2L, 2L, 2L, 18L, 362L, 135L, 684L, 34L, 123L, 21L, \n 6L, 0L, 3L, 0L, 0L, 0L, 0L, 4L, 4L, 3L, 13L)), class = c("grouped_df", \n "tbl_df", "tbl", "data.frame"), row.names = c(NA, -40L), groups = structure(list(\n year = structure(1:4, levels = c("2017", "2016", "2015", \n "2014"), class = "factor"), .rows = structure(list(1:10, \n 11:20, 21:30, 31:40), ptype = integer(0), class = c("vctrs_list_of", \n "vctrs_vctr", "list"))), row.names = c(NA, -4L), .drop = TRUE, class = c("tbl_df", \n "tbl", "data. Frame")))\nRun Code Online (Sandbox Code Playgroud)\n
从头开始执行此操作的典型方法是创建预测变量所有组合的数据框,然后在predict其上运行:
library(glmmTMB)
library(ggplot2)
model <- glmmTMB(total_count ~ mean_temp*lwd_duration + (1|year),
family = nbinom1, data = df)
model
pred_df <- expand.grid(mean_temp = seq(10, 25, len = 100),
lwd_duration = seq(10, 200, 0.1),
year = factor(2017, 2017:2014))
pred_df$total_count <- predict(model, newdata = pred_df, type = "response")
Run Code Online (Sandbox Code Playgroud)
绘制结果的方法有多种,但选择一个变量作为色标,一个变量作为 x 轴,可以得到美观且相当直观的输出。您可以在 x 轴上显示温度:
ggplot(pred_df, aes(mean_temp, total_count, color = lwd_duration,
group = lwd_duration)) +
geom_line(alpha = 0.1) +
scale_color_distiller(palette = "Spectral") +
coord_cartesian(ylim = range(df$total_count), xlim = range(df$mean_temp)) +
theme_minimal(base_size = 22)
Run Code Online (Sandbox Code Playgroud)
或者使用温度来表示颜色:
ggplot(pred_df, aes(lwd_duration, total_count, color = mean_temp,
group = mean_temp)) +
geom_line() +
scale_color_distiller(palette = "RdBu") +
coord_cartesian(ylim = range(df$total_count), xlim = range(df$lwd_duration)) +
theme_minimal(base_size = 22)
Run Code Online (Sandbox Code Playgroud)
尽管这些图看起来不错,但如果将实际数据与它们一起绘制,我怀疑它们有点过度拟合。
如果你想使用ggpredict,你可以这样做:
ggpredict(model, terms = c("mean_temp[10:27]", "lwd_duration[10:200]"),
type = "random") %>%
as.data.frame() %>%
rename(mean_temp = x, lwd_duration = group, total_count = predicted) %>%
mutate(lwd_duration = as.numeric(as.character(lwd_duration))) %>%
ggplot(aes(mean_temp, total_count, color = lwd_duration,
group = lwd_duration)) +
geom_line() +
geom_point(data = df, shape = 21, aes(fill = lwd_duration), color = "black",
size = 2.5) +
scale_color_distiller(palette = "Spectral") +
scale_fill_distiller(palette = "Spectral", guide = "none") +
coord_cartesian(ylim = range(df$total_count), xlim = range(df$mean_temp)) +
scale_y_continuous(trans = "log1p") +
theme_minimal(base_size = 22)
Run Code Online (Sandbox Code Playgroud)
请注意,您可以在术语名称后面的方括号中编辑术语的范围和密度(请参阅帮助文件中的详细信息)。您可能只能使用每个预测变量的几个水平,并且最多只能绘制三个变量 - 其他变量将保持稳定。