Data preparation
cementdata <- read.csv("C:/Users/welcome/Downloads/BEST-Hald Cement data.csv", header = T) # Load data
attach(cementdata) # Attach dataset
# Extract x and y from data
x <- model.matrix(Y ~., cementdata) # Design matrix for predictors (xi)
y <- as.matrix(cementdata[,1]) # Response variable (y)
Test of hypothesis
Null hypothesis: B1 = B2 = B3 = B4 = 0.
ALternate hypothesis: Atleast one Bi != 0.
mlreg1 <- lm(Y ~., data = cementdata) # Fitting the model
modelcheck <- anova(mlreg1) # X1 and X2 explain variablity in Y and therefore reject Ho
print(modelcheck)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 1450.08 1450.08 242.3679 2.888e-07 ***
## X2 1 1207.78 1207.78 201.8705 5.863e-07 ***
## X3 1 9.79 9.79 1.6370 0.2366
## X4 1 0.25 0.25 0.0413 0.8441
## Residuals 8 47.86 5.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mlreg1) # Adjusted R-square: 0.9736 and Residual standard error: 2.446
##
## Call:
## lm(formula = Y ~ ., data = cementdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1750 -1.6709 0.2508 1.3783 3.9254
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.4054 70.0710 0.891 0.3991
## X1 1.5511 0.7448 2.083 0.0708 .
## X2 0.5102 0.7238 0.705 0.5009
## X3 0.1019 0.7547 0.135 0.8959
## X4 -0.1441 0.7091 -0.203 0.8441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.446 on 8 degrees of freedom
## Multiple R-squared: 0.9824, Adjusted R-squared: 0.9736
## F-statistic: 111.5 on 4 and 8 DF, p-value: 4.756e-07
Remove Insignificant variables ( X3 and X4) from the model and refit the model
mlreg2 <- lm(Y ~ X1 + X2) # refit model
modelcheck2 <- anova(mlreg2) # Reaffirms that X1 and X2
print(modelcheck2)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 1450.1 1450.08 250.43 2.088e-08 ***
## X2 1 1207.8 1207.78 208.58 5.029e-08 ***
## Residuals 10 57.9 5.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mlreg2) # Adjusted R-Square: 0.9744 and Residual standard error: 2.406
##
## Call:
## lm(formula = Y ~ X1 + X2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.893 -1.574 -1.302 1.363 4.048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.57735 2.28617 23.00 5.46e-10 ***
## X1 1.46831 0.12130 12.11 2.69e-07 ***
## X2 0.66225 0.04585 14.44 5.03e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.406 on 10 degrees of freedom
## Multiple R-squared: 0.9787, Adjusted R-squared: 0.9744
## F-statistic: 229.5 on 2 and 10 DF, p-value: 4.407e-09
We select the refit model, as it explains variablity in Y with minimum number of variables with high adjusted R-square.
Confidence interval for co-efficients
CI_coeff <- confint(mlreg2)
print(CI_coeff)
## 2.5 % 97.5 %
## (Intercept) 47.4834350 57.6712627
## X1 1.1980304 1.7385810
## X2 0.5600798 0.7644212
Confidence interval for Mean response predicition
CI_meanresponse <- predict(mlreg2, data.frame(X1 = X1, X2 = X2), interval = "confidence", level = 0.95)
print(CI_meanresponse)
## fit lwr upr
## 1 80.07400 77.38679 82.76122
## 2 73.25092 70.50710 75.99473
## 3 105.81474 103.96593 107.66355
## 4 89.25848 86.61956 91.89740
## 5 97.29251 95.74212 98.84291
## 6 105.15249 103.33331 106.97167
## 7 104.00205 100.77704 107.22706
## 8 74.57542 71.94224 77.20860
## 9 91.27549 89.00610 93.54487
## 10 114.53754 110.56117 118.51391
## 11 80.53567 78.23565 82.83570
## 12 112.43724 110.05956 114.81493
## 13 112.29344 109.81199 114.77489
Confidence interval for individual response predicition
CI_indresponse <- predict(mlreg2, data.frame(X1 = X1, X2 = X2), interval = "pred", level = 0.95)
print(CI_indresponse)
## fit lwr upr
## 1 80.07400 74.07664 86.07137
## 2 73.25092 67.22798 79.27386
## 3 105.81474 100.14329 111.48619
## 4 89.25848 83.28260 95.23436
## 5 97.29251 91.71121 102.87382
## 6 105.15249 99.49063 110.81435
## 7 104.00205 97.74522 110.25888
## 8 74.57542 68.60207 80.54877
## 9 91.27549 85.45334 97.09763
## 10 114.53754 107.86230 121.21278
## 11 80.53567 74.70152 86.36983
## 12 112.43724 106.57204 118.30245
## 13 112.29344 106.38540 118.20147