1 Problem Statement

An engineer studied the effect of four variables on a dimensionless factor used to describe pressure drops in a screen-plate bubble column Table B.9 summarizes the data

2 Questions 1 through 4

#do the first order interaction model
summary(lm(y~(x1+x2+x3+x4)^2,data=dat))
## 
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + x4)^2, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4804 -3.0766 -0.6635  2.9625 12.2221 
## 
## Coefficients: (2 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.88376   23.17863   0.685  0.49616    
## x1            0.18696    0.78447   0.238  0.81255    
## x2            0.37921    0.06332   5.989 1.89e-07 ***
## x3          -11.99940   67.31148  -0.178  0.85919    
## x4           -8.86442   35.62553  -0.249  0.80446    
## x1:x2         0.01155    0.00869   1.329  0.18955    
## x1:x3              NA         NA      NA       NA    
## x1:x4        -1.11525    1.14847  -0.971  0.33592    
## x2:x3              NA         NA      NA       NA    
## x2:x4        -0.38547    0.11962  -3.222  0.00218 ** 
## x3:x4        72.85976  103.15353   0.706  0.48308    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 53 degrees of freedom
## Multiple R-squared:  0.7496, Adjusted R-squared:  0.7118 
## F-statistic: 19.83 on 8 and 53 DF,  p-value: 1.947e-13

3 Question 5 to 8

# do the reduced model with main effects only
summary(lm(y~(x1+x2+x3+x4),data=dat))
## 
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + x4), data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9958 -3.3092 -0.2419  3.3924 10.5668 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.89453    4.32508   1.363  0.17828    
## x1          -0.47790    0.34002  -1.406  0.16530    
## x2           0.18271    0.01718  10.633 3.78e-15 ***
## x3          35.40284   11.09960   3.190  0.00232 ** 
## x4           5.84391    2.90978   2.008  0.04935 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.014 on 57 degrees of freedom
## Multiple R-squared:  0.6914, Adjusted R-squared:  0.6697 
## F-statistic: 31.92 on 4 and 57 DF,  p-value: 5.818e-14

4 Question 8 to 11

#Let's do the best model
summary(lm(y~x2+x3+x4+x2:x4,data=dat))
## 
## Call:
## lm(formula = y ~ x2 + x3 + x4 + x2:x4, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.959 -3.358 -1.131  3.040 11.646 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.52261    4.03964   0.377  0.70763    
## x2           0.38056    0.06084   6.255 5.47e-08 ***
## x3          34.51062   10.29961   3.351  0.00144 ** 
## x4           9.52471    2.96093   3.217  0.00214 ** 
## x2:x4       -0.30472    0.09056  -3.365  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.658 on 57 degrees of freedom
## Multiple R-squared:  0.7336, Adjusted R-squared:  0.7149 
## F-statistic: 39.24 on 4 and 57 DF,  p-value: 9.297e-16
#store object into model
model<-lm(y~x2+x3+x4+x2:x4,data=dat)

5 Question 12 to 19

#setup the PI & CI values 
newx2<-c(10.0,3)
newx3<-c(0.5,0.25)
newx4<-c(0.75,0.85)

data.frame(newx2,newx3,newx4)
##   newx2 newx3 newx4
## 1    10  0.50  0.75
## 2     3  0.25  0.85
#Do the Fitted values, CI & PI
predict(model,data.frame(x2=newx2,x3=newx3,x4=newx4))
##        1        2 
## 27.44168 18.61092
predict(model,data.frame(x2=newx2,x3=newx3,x4=newx4),interval="confidence")
##        fit      lwr      upr
## 1 27.44168 24.00618 30.87717
## 2 18.61092 15.76975 21.45208
predict(model,data.frame(x2=newx2,x3=newx3,x4=newx4),interval="prediction")
##        fit       lwr      upr
## 1 27.44168 17.501496 37.38186
## 2 18.61092  8.860183 28.36165

6 Complete R Code

Full source code for 3.13

## --------- Multiple Regression in R using LM
# problem 3.13

#Read in the excel version of the data 
library(readxl)
dat<- readxl::read_excel("data-table-B9.xlsx")

#do the first order interaction model
summary(lm(y~(x1+x2+x3+x4)^2,data=dat))

# do the reduced model with main effects only
summary(lm(y~(x1+x2+x3+x4),data=dat))
#summary(lm(y~(x1+x2+x3+x4)^2-x1:x3-x2:x3,data=dat))

#Let's do the best model
summary(lm(y~x2+x3+x4+x2:x4,data=dat))

#store object into model
model<-lm(y~x2+x3+x4+x2:x4,data=dat)

#setup the PI & CI values 
newx2<-c(10.0,3)
newx3<-c(0.5,0.25)
newx4<-c(0.75,0.85)

data.frame(newx2,newx3,newx4)
#Do the Fitted values, CI & PI
predict(model,data.frame(x2=newx2,x3=newx3,x4=newx4))
predict(model,data.frame(x2=newx2,x3=newx3,x4=newx4),interval="confidence")
predict(model,data.frame(x2=newx2,x3=newx3,x4=newx4),interval="prediction")