Using the ozone data, fit a model with O3 as the response and temp, humidity and ibh as pre- dictors. Use the BoxCox method to determine the best transformation on the response.
rm(list=ls(all=TRUE))
library(faraway)
library(MASS)
data(ozone)
attach(ozone)
ozone[1:3,]
## O3 vh wind humidity temp ibh dpg ibt vis doy
## 1 3 5710 4 28 40 2693 -25 87 250 33
## 2 5 5700 3 37 45 590 -24 128 100 34
## 3 5 5760 3 51 54 1450 25 139 60 35
g <- lm(O3~temp+humidity+ibh)
bla <-boxcox(g,plotit=T,lambda=seq(0.1, 0.5, by=0.1))
bla$x[which(bla$y==max(bla$y))] # the lambda value that gives max Log-likelihood
## [1] 0.2777778
Using boxcox(), we have the best \(\lambda\) at 0.2777778.
y.dot <- exp(mean(log(O3))) #geometric mean
w <- y.dot * log(O3) * (1/2 * log(O3) - log(y.dot)) #p102, start from lambda0 = 0
h.lambda0.yi <- log(O3) * y.dot #p93 (41)&(42)
reg <- lm(h.lambda0.yi~ w + temp + humidity + ibh) #p98 (44)
#iterations
lambda=rep(0,50)
lambda[1]=0
lambda[2]=-summary(reg)$coefficients[2,1]
for (i in 2:49)
{
new.lambda0 = lambda[i]
h.lamb0.yi <- y.dot ^ (1-new.lambda0) * (O3^new.lambda0 - 1)/new.lambda0
wi = -(((-1 + O3^new.lambda0) * y.dot^(1 - new.lambda0))/new.lambda0^2) + (O3^new.lambda0*y.dot^(1 - new.lambda0)*log(O3))/new.lambda0 - ((-1 + O3^new.lambda0)*y.dot^(1 - new.lambda0)*log(y.dot))/new.lambda0
reg_w <- lm(h.lamb0.yi ~ wi + temp + humidity + ibh)
beta_w <- summary(reg_w)$coefficients[2,1]
lambda[i+1] <- new.lambda0 -beta_w
}
summary(reg_w)
##
## Call:
## lm(formula = h.lamb0.yi ~ wi + temp + humidity + ibh)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.2417 -2.2482 0.1849 2.5952 10.3222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2789199 1.7915411 -1.272 0.204
## wi -0.0024509 0.0778527 -0.031 0.975
## temp 0.2756934 0.0171716 16.055 < 2e-16 ***
## humidity 0.0630937 0.0108665 5.806 1.52e-08 ***
## ibh -0.0010157 0.0001324 -7.672 2.00e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.644 on 325 degrees of freedom
## Multiple R-squared: 0.7161, Adjusted R-squared: 0.7126
## F-statistic: 205 on 4 and 325 DF, p-value: < 2.2e-16
plot(lambda, type='l'); abline(bla$x[which(bla$y==max(bla$y))],0, lty='dotted',col='red')
title("The Method of Constructed Variables")
So lambda oscillates approaching 0.2777778, which is exactly the result we get from boxcox() embedded code.