Dataset

library(readxl)
dat<-read_excel("data-table-B9.xlsx")
str(dat)
## tibble [62 × 5] (S3: tbl_df/tbl/data.frame)
##  $ x1: num [1:62] 2.14 4.14 8.15 2.14 4.14 8.15 2.14 4.14 8.15 2.14 ...
##  $ x2: num [1:62] 10 10 10 10 10 10 10 10 10 10 ...
##  $ x3: num [1:62] 0.34 0.34 0.34 0.34 0.34 0.34 0.34 0.34 0.34 0.34 ...
##  $ x4: num [1:62] 1 1 1 0.246 0.379 0.474 0.141 0.234 0.311 0.076 ...
##  $ y : num [1:62] 28.9 31 26.4 27.2 26.1 23.2 19.7 22.1 22.8 29.2 ...
summary(dat)
##        x1              x2               x3               x4        
##  Min.   :2.140   Min.   :  1.25   Min.   :0.2500   Min.   :0.0760  
##  1st Qu.:4.140   1st Qu.:  2.63   1st Qu.:0.3400   1st Qu.:0.4733  
##  Median :4.300   Median : 10.00   Median :0.3400   Median :0.6160  
##  Mean   :4.784   Mean   : 22.25   Mean   :0.3477   Mean   :0.6028  
##  3rd Qu.:5.600   3rd Qu.: 10.10   3rd Qu.:0.3400   3rd Qu.:0.7788  
##  Max.   :8.150   Max.   :112.00   Max.   :0.5500   Max.   :1.0000  
##        y        
##  Min.   : 8.40  
##  1st Qu.:16.65  
##  Median :21.75  
##  Mean   :23.51  
##  3rd Qu.:27.95  
##  Max.   :48.60

Question 1

full_mod <- lm(y ~ (x1 + x2 + x3 + x4)^2, data = dat)
summary(full_mod)
## 
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + x4)^2, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4804 -3.0766 -0.6635  2.9625 12.2221 
## 
## Coefficients: (2 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.88376   23.17863   0.685  0.49616    
## x1            0.18696    0.78447   0.238  0.81255    
## x2            0.37921    0.06332   5.989 1.89e-07 ***
## x3          -11.99940   67.31148  -0.178  0.85919    
## x4           -8.86442   35.62553  -0.249  0.80446    
## x1:x2         0.01155    0.00869   1.329  0.18955    
## x1:x3              NA         NA      NA       NA    
## x1:x4        -1.11525    1.14847  -0.971  0.33592    
## x2:x3              NA         NA      NA       NA    
## x2:x4        -0.38547    0.11962  -3.222  0.00218 ** 
## x3:x4        72.85976  103.15353   0.706  0.48308    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 53 degrees of freedom
## Multiple R-squared:  0.7496, Adjusted R-squared:  0.7118 
## F-statistic: 19.83 on 8 and 53 DF,  p-value: 1.947e-13

Question 2

summary(full_mod)$r.squared
## [1] 0.7496145

Question 3

summary(full_mod)$fstatistic
##   value   numdf   dendf 
## 19.8342  8.0000 53.0000

Question 4

Reject H0; at least one regression coefficient is significantly different from zero.

Question 5

reduced_mod <- lm(y ~ x1 + x2 + x3 + x4, data = dat)
anova(reduced_mod, full_mod)
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2 + x3 + x4
## Model 2: y ~ (x1 + x2 + x3 + x4)^2
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     57 1432.8                              
## 2     53 1162.4  4    270.37 3.0819 0.02352 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Question 6

anova(reduced_mod, full_mod)
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2 + x3 + x4
## Model 2: y ~ (x1 + x2 + x3 + x4)^2
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     57 1432.8                              
## 2     53 1162.4  4    270.37 3.0819 0.02352 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Question 7

Fail to reject H0; none of the interactions are significant

Question 8

library(MASS)
step_mod <- stepAIC(full_mod, direction = "backward")
## Start:  AIC=199.73
## y ~ (x1 + x2 + x3 + x4)^2
## 
## 
## Step:  AIC=199.73
## y ~ x1 + x2 + x3 + x4 + x1:x2 + x1:x3 + x1:x4 + x2:x4 + x3:x4
## 
## 
## Step:  AIC=199.73
## y ~ x1 + x2 + x3 + x4 + x1:x2 + x1:x4 + x2:x4 + x3:x4
## 
##         Df Sum of Sq    RSS    AIC
## - x3:x4  1    10.942 1173.4 198.31
## - x1:x4  1    20.682 1183.1 198.82
## <none>               1162.4 199.73
## - x1:x2  1    38.737 1201.2 199.76
## - x2:x4  1   227.751 1390.2 208.82
## 
## Step:  AIC=198.31
## y ~ x1 + x2 + x3 + x4 + x1:x2 + x1:x4 + x2:x4
## 
##         Df Sum of Sq    RSS    AIC
## - x1:x4  1    19.837 1193.2 197.35
## <none>               1173.4 198.31
## - x1:x2  1    38.709 1212.1 198.32
## - x2:x4  1   228.394 1401.8 207.34
## - x3     1   249.320 1422.7 208.26
## 
## Step:  AIC=197.35
## y ~ x1 + x2 + x3 + x4 + x1:x2 + x2:x4
## 
##         Df Sum of Sq    RSS    AIC
## - x1:x2  1    32.307 1225.5 197.01
## <none>               1193.2 197.35
## - x2:x4  1   220.026 1413.2 205.84
## - x3     1   252.209 1445.4 207.24
## 
## Step:  AIC=197.01
## y ~ x1 + x2 + x3 + x4 + x2:x4
## 
##         Df Sum of Sq    RSS    AIC
## - x1     1    11.262 1236.8 195.57
## <none>               1225.5 197.01
## - x2:x4  1   207.286 1432.8 204.69
## - x3     1   248.430 1473.9 206.45
## 
## Step:  AIC=195.57
## y ~ x2 + x3 + x4 + x2:x4
## 
##         Df Sum of Sq    RSS    AIC
## <none>               1236.8 195.57
## - x3     1    243.60 1480.4 204.72
## - x2:x4  1    245.68 1482.4 204.81
summary(step_mod)
## 
## Call:
## lm(formula = y ~ x2 + x3 + x4 + x2:x4, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.959 -3.358 -1.131  3.040 11.646 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.52261    4.03964   0.377  0.70763    
## x2           0.38056    0.06084   6.255 5.47e-08 ***
## x3          34.51062   10.29961   3.351  0.00144 ** 
## x4           9.52471    2.96093   3.217  0.00214 ** 
## x2:x4       -0.30472    0.09056  -3.365  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.658 on 57 degrees of freedom
## Multiple R-squared:  0.7336, Adjusted R-squared:  0.7149 
## F-statistic: 39.24 on 4 and 57 DF,  p-value: 9.297e-16

Question 9

summary(step_mod)$adj.r.squared
## [1] 0.714905

Question 10

coef(step_mod)["x2:x4"]
##      x2:x4 
## -0.3047183

Question 11

Omit x1 since it is not used in the prediction.

Question 12, 13,14

new1 <- data.frame(x2 = 10, x3 = 0.5, x4 = 0.75)
predict(step_mod, new1, interval = "confidence")
##        fit      lwr      upr
## 1 27.44168 24.00618 30.87717

Question 15 & 16

new2 <- data.frame(x2 = 3, x3 = 0.25, x4 = 0.85)
predict(step_mod, new2, interval = "prediction")
##        fit      lwr      upr
## 1 18.61092 8.860183 28.36165

Question 17

True because prediction intervals are always wider.

Question 18

x1 was not statistically significant and removing it improved AIC.