如何在ggplot2中绘制混合模型的交互效果?

Ahs*_*hsk 1 plot regression r ggplot2 mixed-models

我想获得模型交互效果的预测(例如使用sjPlot::plot_modelggeffects打包,然后将它们提供给 ggplot2 以可视化 ggplot2 中的交互。有人可以帮助编写代码来做到这一点吗?我的问题与堆栈上的其他问题不同,因为它首先使用标准包进行预测,然后绘制它们。

\n

我当前的型号:

\n
model <- glmmTMB(total_count ~ mean_temp*lwd_duration + (1|year), family = nbinom1, data=df)\n\nsummary(model)\n\nFamily: nbinom1  ( log )\nFormula:          total_count ~ mean_temp * lwd_duration + (1 | year)\nData: df\n\n     AIC      BIC   logLik deviance df.resid \n   260.6    270.7   -124.3    248.6       34 \n\nRandom effects:\n\nConditional model:\n Groups Name        Variance  Std.Dev. \n year   (Intercept) 3.683e-09 6.069e-05\nNumber of obs: 40, groups:  year, 4\n\nDispersion parameter for nbinom1 family ():  178 \n\nConditional model:\n                        Estimate Std. Error z value Pr(>|z|)   \n(Intercept)             9.609769   3.276724   2.933  0.00336 **\nmean_temp              -0.341435   0.177426  -1.924  0.05431 . \nlwd_duration           -0.105528   0.039145  -2.696  0.00702 **\nmean_temp:lwd_duration  0.005819   0.002078   2.801  0.00510 **\n---\nSignif. codes:  0 \xe2\x80\x98***\xe2\x80\x99 0.001 \xe2\x80\x98**\xe2\x80\x99 0.01 \xe2\x80\x98*\xe2\x80\x99 0.05 \xe2\x80\x98.\xe2\x80\x99 0.1 \xe2\x80\x98 \xe2\x80\x99 1\n
Run Code Online (Sandbox Code Playgroud)\n

可重现的例子:

\n
df <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, \n                                        1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, \n                                        3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, \n                                        4L), levels = c("2017", "2016", "2015", "2014"), class = "factor"), \n                     mean_temp = c(10.31, 10.31, 11.35, 11.35, 14.05, 14.05, 15.96, \n                                   15.96, 16.73, 16.73, 20.92, 20.92, 21.89, 21.89, 21.48, 21.48, \n                                   25.82, 25.82, 21.06, 21.06, 16.3, 16.3, 20.16, 20.16, 19.85, \n                                   19.85, 25.45, 25.45, 24.32, 24.32, 20.59, 20.78, 20.78, 20.78, \n                                   22.23, 22.23, 22.23, 19.71, 19.71, 19.71), lwd_duration = c(116.53, \n                                                                                               116.53, 146.18, 146.18, 184.48, 184.48, 67.3, 67.3, 70.08, \n                                                                                               70.08, 71.43, 71.43, 68.58, 68.58, 91.6, 91.6, 72.07, 72.07, \n                                                                                               59.57, 59.57, 13.02, 13.02, 75.33, 75.33, 60.07, 60.07, 98.08, \n                                                                                               98.08, 71.78, 71.78, 40.25, 54.5, 54.5, 54.5, 32.47, 32.47, \n                                                                                               32.47, 61.73, 61.73, 61.73), total_count = c(0L, 0L, 0L, \n                                                                                                                                            0L, 0L, 0L, 0L, 11L, 0L, 0L, 4L, 2L, 6L, 6L, 11L, 1L, 12L, \n                                                                                                                                            0L, 1L, 2L, 2L, 2L, 18L, 362L, 135L, 684L, 34L, 123L, 21L, \n                                                                                                                                            6L, 0L, 3L, 0L, 0L, 0L, 0L, 4L, 4L, 3L, 13L)), class = c("grouped_df", \n                                                                                                                                                                                                     "tbl_df", "tbl", "data.frame"), row.names = c(NA, -40L), groups = structure(list(\n                                                                                                                                                                                                       year = structure(1:4, levels = c("2017", "2016", "2015", \n                                                                                                                                                                                                                                        "2014"), class = "factor"), .rows = structure(list(1:10, \n                                                                                                                                                                                                                                                                                           11:20, 21:30, 31:40), ptype = integer(0), class = c("vctrs_list_of", \n                                                                                                                                                                                                                                                                                                                                               "vctrs_vctr", "list"))), row.names = c(NA, -4L), .drop = TRUE, class = c("tbl_df", \n                                                                                                                                                                                                                                                                                                                                                                                                                        "tbl", "data. Frame")))\n
Run Code Online (Sandbox Code Playgroud)\n

All*_*ron 6

从头开始执行此操作的典型方法是创建预测变量所有组合的数据框,然后在predict其上运行:

library(glmmTMB)
library(ggplot2)

model <- glmmTMB(total_count ~ mean_temp*lwd_duration + (1|year),
                 family = nbinom1, data = df)
model

pred_df <- expand.grid(mean_temp = seq(10, 25, len = 100),
                       lwd_duration = seq(10, 200, 0.1),
                       year = factor(2017, 2017:2014))

pred_df$total_count <- predict(model, newdata = pred_df, type = "response")
Run Code Online (Sandbox Code Playgroud)

绘制结果的方法有多种,但选择一个变量作为色标,一个变量作为 x 轴,可以得到美观且相当直观的输出。您可以在 x 轴上显示温度:

ggplot(pred_df, aes(mean_temp, total_count, color = lwd_duration,
                    group = lwd_duration)) +
  geom_line(alpha = 0.1) +
  scale_color_distiller(palette = "Spectral") +
  coord_cartesian(ylim = range(df$total_count), xlim = range(df$mean_temp)) +
  theme_minimal(base_size = 22)
Run Code Online (Sandbox Code Playgroud)

在此输入图像描述

或者使用温度来表示颜色:

ggplot(pred_df, aes(lwd_duration, total_count, color = mean_temp,
                    group = mean_temp)) +
  geom_line() +
  scale_color_distiller(palette = "RdBu") +
  coord_cartesian(ylim = range(df$total_count), xlim = range(df$lwd_duration)) +
  theme_minimal(base_size = 22)
Run Code Online (Sandbox Code Playgroud)

在此输入图像描述

尽管这些图看起来不错,但如果将实际数据与它们一起绘制,我怀疑它们有点过度拟合。


如果你想使用ggpredict,你可以这样做:

ggpredict(model, terms = c("mean_temp[10:27]", "lwd_duration[10:200]"),
          type = "random") %>%
  as.data.frame() %>%
  rename(mean_temp = x, lwd_duration = group, total_count = predicted) %>%
  mutate(lwd_duration = as.numeric(as.character(lwd_duration))) %>%
  ggplot(aes(mean_temp, total_count, color = lwd_duration, 
             group = lwd_duration)) + 
  geom_line() +
  geom_point(data = df, shape = 21, aes(fill = lwd_duration), color = "black",
             size = 2.5) +
  scale_color_distiller(palette = "Spectral") +
  scale_fill_distiller(palette = "Spectral", guide = "none") +
  coord_cartesian(ylim = range(df$total_count), xlim = range(df$mean_temp)) +
  scale_y_continuous(trans = "log1p") +
  theme_minimal(base_size = 22)
Run Code Online (Sandbox Code Playgroud)

在此输入图像描述

请注意,您可以在术语名称后面的方括号中编辑术语的范围和密度(请参阅帮助文件中的详细信息)。您可能只能使用每个预测变量的几个水平,并且最多只能绘制三个变量 - 其他变量将保持稳定。