Chapter 9 (Faraway book) exercise 3

Using the ozone data, fit a model with O3 as the response and temp, humidity and ibh as pre- dictors. Use the BoxCox method to determine the best transformation on the response.

(1) Using boxcox() code from MASS library

rm(list=ls(all=TRUE)) 
library(faraway)
library(MASS)
data(ozone)
attach(ozone)
ozone[1:3,]
##   O3   vh wind humidity temp  ibh dpg ibt vis doy
## 1  3 5710    4       28   40 2693 -25  87 250  33
## 2  5 5700    3       37   45  590 -24 128 100  34
## 3  5 5760    3       51   54 1450  25 139  60  35
g <- lm(O3~temp+humidity+ibh)
bla <-boxcox(g,plotit=T,lambda=seq(0.1, 0.5, by=0.1))

bla$x[which(bla$y==max(bla$y))]  # the lambda value that gives max Log-likelihood
## [1] 0.2777778

Using boxcox(), we have the best \(\lambda\) at 0.2777778.

(2) Using constructed variables method (class notes p96)

y.dot <- exp(mean(log(O3)))                         #geometric mean
w <- y.dot * log(O3) * (1/2 * log(O3) - log(y.dot))  #p102, start from lambda0 = 0
h.lambda0.yi <- log(O3) * y.dot                      #p93 (41)&(42)
reg <- lm(h.lambda0.yi~ w + temp + humidity + ibh)   #p98 (44)

#iterations
lambda=rep(0,50)
lambda[1]=0
lambda[2]=-summary(reg)$coefficients[2,1]
for (i in 2:49)
{
new.lambda0 = lambda[i]
h.lamb0.yi <-  y.dot ^ (1-new.lambda0) * (O3^new.lambda0 - 1)/new.lambda0
wi = -(((-1 + O3^new.lambda0) * y.dot^(1 - new.lambda0))/new.lambda0^2) + (O3^new.lambda0*y.dot^(1 - new.lambda0)*log(O3))/new.lambda0 - ((-1 + O3^new.lambda0)*y.dot^(1 - new.lambda0)*log(y.dot))/new.lambda0
reg_w <- lm(h.lamb0.yi ~ wi + temp + humidity + ibh) 
beta_w <- summary(reg_w)$coefficients[2,1]
lambda[i+1] <- new.lambda0 -beta_w 
}
summary(reg_w)
## 
## Call:
## lm(formula = h.lamb0.yi ~ wi + temp + humidity + ibh)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2417  -2.2482   0.1849   2.5952  10.3222 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.2789199  1.7915411  -1.272    0.204    
## wi          -0.0024509  0.0778527  -0.031    0.975    
## temp         0.2756934  0.0171716  16.055  < 2e-16 ***
## humidity     0.0630937  0.0108665   5.806 1.52e-08 ***
## ibh         -0.0010157  0.0001324  -7.672 2.00e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.644 on 325 degrees of freedom
## Multiple R-squared:  0.7161, Adjusted R-squared:  0.7126 
## F-statistic:   205 on 4 and 325 DF,  p-value: < 2.2e-16
plot(lambda, type='l'); abline(bla$x[which(bla$y==max(bla$y))],0, lty='dotted',col='red')
title("The Method of Constructed Variables")

So lambda oscillates approaching 0.2777778, which is exactly the result we get from boxcox() embedded code.