强制 GAM 模型拟合为单调并通过 R mgcv 的固定点 (x0, y0)

Question

强制 GAM 模型拟合为单调并通过 R mgcv 的固定点 (x0, y0)

我想在两个约束，以适应GAM模型数据simultatenously：（1）拟合单调（增加），（2）配合经过一个固定的点，比如说，(x0,y0)。

到目前为止，我设法让这两个约束分开工作：

对于 (1)，基于mgcv::pcls() 文档示例，通过使用mgcv::mono.con()来获得足以满足单调性的线性约束，并通过mgcv::pcls()使用约束来估计模型系数。
对于（2），基于这篇文章，通过使用模型公式中的偏移项将节点位置 x0 处的样条值设置为 0 +。

但是，我很难同时结合这两个约束。我想一种方法是mgcv::pcls()，但我既不能解决 (a) 使用偏移将节点位置 x0 处的样条值设置为 0 + 的类似技巧，也不能 (b) 设置相等约束（我认为）可以产生我的（2）约束设置）。

我还注意到，对于我的约束条件 (2)，将结点位置 x0 处的样条值设置为 0 的方法产生了奇怪的摆动结果（与不受约束的 GAM 拟合相比）——如下所示。

到目前为止的尝试：分别为两个约束下的数据拟合平滑函数

模拟一些数据

library(mgcv) set.seed(1) x <- sort(runif(100) * 4 - 1) f <- exp(4*x)/(1+exp(4*x)) y <- f + rnorm(100) * 0.1 dat <- data.frame(x=x, y=y)
Run Code Online (Sandbox Code Playgroud)
GAM 无约束（用于比较）

k <- 13 fit0 <- gam(y ~ s(x, k = k, bs = "cr"), data = dat) # predict from unconstrained GAM fit newdata <- data.frame(x = seq(-1, 3, length.out = 1000)) newdata$y_pred_fit0 <- predict(fit0, newdata = newdata)
Run Code Online (Sandbox Code Playgroud)
GAM 约束：（1）拟合是单调的（递增）

k <- 13 # Show regular spline fit (and save fitted object) f.ug <- gam(y~s(x,k=k,bs="cr")) # explicitly construct smooth term's design matrix sm <- smoothCon(s(x,k=k,bs="cr"),dat,knots=NULL)[[1]] # find linear constraints sufficient for monotonicity of a cubic regression spline # it assumes "cr" is the basis and its knots are provided as input F <- mono.con(sm$xp) G <- list( X=sm$X, C=matrix(0,0,0), # [0 x 0] matrix (no equality constraints) sp=f.ug$sp, # smoothing parameter estimates (taken from unconstrained model) p=sm$xp, # array of feasible initial parameter estimates y=y, w= dat$y * 0 + 1 # weights for data ) G$Ain <- F$A # matrix for the inequality constraints G$bin <- F$b # vector for the inequality constraints G$S <- sm$S # list of penalty matrices; The first parameter it penalizes is given by off[i]+1 G$off <- 0 # Offset values locating the elements of M$S in the correct location within each penalty coefficient matrix. (Zero offset implies starting in first location) p <- pcls(G); # fit spline (using smoothing parameter estimates from unconstrained fit) # predict newdata$y_pred_fit2 <- Predict.matrix(sm, data.frame(x = newdata$x)) %*% p # plot plot(y ~ x, data = dat) lines(y_pred_fit0 ~ x, data = newdata, col = 2, lwd = 2) lines(y_pred_fit2 ~ x, data = newdata, col = 4, lwd = 2)
Run Code Online (Sandbox Code Playgroud)
蓝线：受限；红线：无约束

GAM 约束：（2）拟合通过(x0,y0)=(-1, -0.1)

k <- 13 ## Create a spline basis and penalty ## Make sure there is a knot at the constraint point (here: -1) knots <- data.frame(x = seq(-1,3,length=k)) # explicit construction of a smooth term in a GAM sm <- smoothCon(s(x,k=k,bs="cr"), dat, knots=knots)[[1]] ## 1st parameter is value of spline at knot location -1, set it to 0 by dropping knot_which <- which(knots$x == -1) X <- sm$X[, -knot_which] ## spline basis S <- sm$S[[1]][-knot_which, -knot_which] ## spline penalty off <- dat$y * 0 + (-0.1) ## offset term to force curve through (x0, y0) ## fit spline constrained through (x0, y0) gam_1 <- gam(y ~ X - 1 + offset(off), paraPen = list(X = list(S))) # predict (add offset of -0.1) newdata_tmp <- Predict.matrix(sm, data.frame(x = newdata$x)) newdata_tmp <- newdata_tmp[, -knot_which] newdata$y_pred_fit1 <- (newdata_tmp %*% coef(gam_1))[, 1] + (-0.1) # plot plot(y ~ x, data = dat) lines(y_pred_fit0 ~ x, data = newdata, col = 2, lwd = 2) lines(y_pred_fit1 ~ x, data = newdata, col = 3, lwd = 2) # lines at cross of which the plot should go throught abline(v=-1, col = 3); abline(h=-0.1, col = 3)
Run Code Online (Sandbox Code Playgroud)
绿线：受限；红线：无约束

Answer 1

Gi_*_*_F. 5

我想你可以增加数据向量x和y用(x0, y0)，然后把一个（真的），高权重的第一观察（即增加权重向量的G列表）。

除了简单的加权策略，我们可以从初步平滑的结果开始编写二次规划问题。这在下面的第二个 R 代码中进行了说明（在这种情况下，我使用了 p 样条平滑器，参见 Eilers 和 Marx 1991）。

希望这会有所帮助（此处讨论了类似的问题）。

Rcode 示例 1（权重策略）

set.seed(123)
N = 100
x <- sort(runif(N) * 4 - 1)
f <- exp(4*x)/(1+exp(4*x))
y <- f + rnorm(N) * 0.1
x = c(-1, x)
y = c(-0.1, y)
dat = data.frame(x = x, y= y)

k <- 13
fit0 <- gam(y ~ s(x, k = k, bs = "cr"), data = dat)
# predict from unconstrained GAM fit
newdata <- data.frame(x = seq(-1, 3, length.out = 1000))
newdata$y_pred_fit0 <- predict(fit0, newdata = newdata)

k <- 13

# Show regular spline fit (and save fitted object)
f.ug <- gam(y~s(x,k=k,bs="cr"))

# explicitly construct smooth term's design matrix
sm <- smoothCon(s(x,k=k,bs="cr"),dat,knots=NULL)[[1]]
# find linear constraints sufficient for monotonicity of a cubic regression spline
# it assumes "cr" is the basis and its knots are provided as input
F <- mono.con(sm$xp)

G <- list(
X=sm$X,
C=matrix(0,0,0),  # [0 x 0] matrix (no equality constraints)
sp=f.ug$sp,       # smoothing parameter estimates (taken from unconstrained model)
p=sm$xp,          # array of feasible initial parameter estimates
y=y,           
w= c(1e8, 1:N * 0 + 1)  # weights for data    
)
G$Ain <- F$A        # matrix for the inequality constraints
G$bin <- F$b        # vector for the inequality constraints
G$S <- sm$S         # list of penalty matrices; The first parameter it penalizes is given by off[i]+1
G$off <- 0          # Offset values locating the elements of M$S in the correct location within each penalty coefficient matrix.  (Zero offset implies starting in first location)

p <- pcls(G);       # fit spline (using smoothing parameter estimates from unconstrained fit)

# predict 
newdata$y_pred_fit2 <- Predict.matrix(sm, data.frame(x = newdata$x)) %*% p
# plot
plot(y ~ x, data = dat)
lines(y_pred_fit0 ~ x, data = newdata, col = 2, lwd = 2)
lines(y_pred_fit2 ~ x, data = newdata, col = 4, lwd = 2)
abline(v = -1)
abline(h = -0.1)

Run Code Online (Sandbox Code Playgroud)

rm(list = ls())
library(mgcv)
library(pracma)
library(colorout)

set.seed(123)
N   = 100
x   = sort(runif(N) * 4 - 1)
f   = exp(4*x)/(1+exp(4*x))
y   = f + rnorm(N) * 0.1
x0  = -1
y0  = -0.1 
dat = data.frame(x = x, y= y)

k = 50
# Show regular spline fit (and save fitted object)
f.ug = gam(y~s(x,k=k,bs="ps"))

# explicitly construct smooth term's design matrix
sm = smoothCon(s(x,k=k,bs="ps"), dat,knots=NULL)[[1]]

# Build quadprog to estimate the coefficients 
scf = sapply(f.ug$smooth, '[[', 'S.scale')
lam = f.ug$sp / scf
Xp  = rbind(sm$X, sqrt(lam) * f.ug$smooth[[1]]$D)  
yp  = c(dat$y, rep(0, k - 2))
X0  = Predict.matrix(sm, data.frame(x = x0))
sm$deriv = 1
X1 = Predict.matrix(sm, data.frame(x = dat$x))
coef_mono = pracma::lsqlincon(Xp, yp, Aeq = X0, beq = y0, A = -X1, b = rep(0, N))

# fitted values
fit = sm$X %*% coef_mono
sm$deriv = 0
xf = seq(-1, 3, len = 1000)
Xf = Predict.matrix(sm, data.frame(x = xf))
fine_fit = Xf %*% coef_mono

# plot
par(mfrow = c(2, 1), mar = c(3,3,3,3))
plot(dat$x, dat$y, pch = 1, main= 'Data and fit')
lines(dat$x, f.ug$fitted, lwd = 2, col = 2)
lines(dat$x, fit, col = 4, lty = 1, lwd = 2)
lines(xf, fine_fit, col = 3, lwd = 2, lty = 2)
abline(h = -0.1)
abline(v = -1)

plot(dat$x, X1 %*% coef_mono, type = 'l', main = 'Derivative of the fit', lwd = 2)
abline(h = 0.0)

Run Code Online (Sandbox Code Playgroud)

归档时间：	4 年，10 月前
查看次数：	544 次
最近记录：	4 年，10 月前