dat<-read.csv("C:\\Users\\18067\\Documents\\Fareeha Imam\\TTU R11767331\\Spring 2023\\SDA\\Assignment 8\\data-table-B9(1).csv")

colnames(dat)<-c("x1","x2","x3","x4","y")
variables = c("x1", "x2", "x3","x4")

1 Part a:

Consider a first order multiple regression model with two-factor interactions. Check for model adequacy and make any corrective actions if deemed necessary. Test for the signifcance of the full regression model, what do you conclude?

Fullmodel<-lm(y~x1*x2+x1*x3+x1*x4+x2*x3+x2*x4+x3*x4, data = dat)
summary(Fullmodel)
## 
## Call:
## lm(formula = y ~ x1 * x2 + x1 * x3 + x1 * x4 + x2 * x3 + x2 * 
##     x4 + x3 * x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4804 -3.0766 -0.6635  2.9625 12.2221 
## 
## Coefficients: (2 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.88376   23.17863   0.685  0.49616    
## x1            0.18696    0.78447   0.238  0.81255    
## x2            0.37921    0.06332   5.989 1.89e-07 ***
## x3          -11.99940   67.31148  -0.178  0.85919    
## x4           -8.86442   35.62553  -0.249  0.80446    
## x1:x2         0.01155    0.00869   1.329  0.18955    
## x1:x3              NA         NA      NA       NA    
## x1:x4        -1.11525    1.14847  -0.971  0.33592    
## x2:x3              NA         NA      NA       NA    
## x2:x4        -0.38547    0.11962  -3.222  0.00218 ** 
## x3:x4        72.85976  103.15353   0.706  0.48308    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 53 degrees of freedom
## Multiple R-squared:  0.7496, Adjusted R-squared:  0.7118 
## F-statistic: 19.83 on 8 and 53 DF,  p-value: 1.947e-13
--> Here we are taking corrective actions by eliminating x1:x3 and x2:x3. As You can see they are not significant and should be eliminate.
Fullmodelcorrection<-lm(y~x1*x2+x1*x3+x1*x4+x2*x3+x2*x4+x3*x4-x1:x3-x2:x3, data = dat)
summary(Fullmodelcorrection)
## 
## Call:
## lm(formula = y ~ x1 * x2 + x1 * x3 + x1 * x4 + x2 * x3 + x2 * 
##     x4 + x3 * x4 - x1:x3 - x2:x3, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4804 -3.0766 -0.6635  2.9625 12.2221 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.88376   23.17863   0.685  0.49616    
## x1            0.18696    0.78447   0.238  0.81255    
## x2            0.37921    0.06332   5.989 1.89e-07 ***
## x3          -11.99940   67.31148  -0.178  0.85919    
## x4           -8.86442   35.62553  -0.249  0.80446    
## x1:x2         0.01155    0.00869   1.329  0.18955    
## x1:x4        -1.11525    1.14847  -0.971  0.33592    
## x2:x4        -0.38547    0.11962  -3.222  0.00218 ** 
## x3:x4        72.85976  103.15353   0.706  0.48308    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 53 degrees of freedom
## Multiple R-squared:  0.7496, Adjusted R-squared:  0.7118 
## F-statistic: 19.83 on 8 and 53 DF,  p-value: 1.947e-13
--> Check for model Adequacy
plot(Fullmodel)

residuals(Fullmodel)
##            1            2            3            4            5            6 
##  2.990195122  6.715828908  5.375224650  8.578767237  6.433606173  3.734219038 
##            7            8            9           10           11           12 
##  2.093754070  3.511831138  3.817329466 12.222079252  5.770306630  4.993740658 
##           13           14           15           16           17           18 
##  2.441069895  0.136311533  1.161241943 -2.095198810 -4.907130359 -3.422131856 
##           19           20           21           22           23           24 
## -3.593945129 -1.801155768 -0.155899737  8.026174467  7.722028281  7.722715177 
##           25           26           27           28           29           30 
## -3.090940151 -3.671889645 -3.982355018 -3.491833405 -2.189477683 -4.422975706 
##           31           32           33           34           35           36 
## -2.727410367 -2.702287597 -2.354997502  1.407831279 -9.480442758 -2.949012768 
##           37           38           39           40           41           42 
## -6.001819558  0.014005657  0.060157963  0.862079843 -0.551382617 -0.688726767 
##           43           44           45           46           47           48 
## -4.093564991 -0.722193659 -1.638517538 -0.005476899 -6.079476886 -4.120378287 
##           49           50           51           52           53           54 
## -6.150758563 -0.638254127 -1.578440355 -2.045370729 -3.033696419  4.419776149 
##           55           56           57           58           59           60 
## -1.102692301 -4.321287081 -0.402372639 -1.879235054  2.879545753 -5.669884374 
##           61           62 
##  4.328351991  0.344440833
--> In question we asked for Test for the signifcance of full regression model, for this we need to perform f test. By using qf command we can get critical F value for full model
qf(0.99,10,49)
## [1] 2.706371
--> By using qf command, you can see here we got very low critical value of F as compared to model.
Reducedmod<- lm(y~x1+x2+x3+x4,data=dat)
summary(Reducedmod)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9958 -3.3092 -0.2419  3.3924 10.5668 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.89453    4.32508   1.363  0.17828    
## x1          -0.47790    0.34002  -1.406  0.16530    
## x2           0.18271    0.01718  10.633 3.78e-15 ***
## x3          35.40284   11.09960   3.190  0.00232 ** 
## x4           5.84391    2.90978   2.008  0.04935 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.014 on 57 degrees of freedom
## Multiple R-squared:  0.6914, Adjusted R-squared:  0.6697 
## F-statistic: 31.92 on 4 and 57 DF,  p-value: 5.818e-14
plot(Reducedmod)

--> Conclusion: We can say that the model is adequate. by considering our P-Value and R Squared value, one is very low and the other is high, respectively. One more reason for the model to be adequate is our critical F value which is not comparable with model.

2 Part b & c:

Test for the signifiance of all 2 factor interactions using a partial F-test. What are your findings? Determine the best fitting model using partial F and/or t-tests. What is the final model?

--> Intializing the Final Model. The final model consists of below terms which are significant
Finalmodel<-lm(y~x2+x3+x4+x2*x4,data=dat)
summary(Finalmodel)
## 
## Call:
## lm(formula = y ~ x2 + x3 + x4 + x2 * x4, data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.959 -3.358 -1.131  3.040 11.646 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.52261    4.03964   0.377  0.70763    
## x2           0.38056    0.06084   6.255 5.47e-08 ***
## x3          34.51062   10.29961   3.351  0.00144 ** 
## x4           9.52471    2.96093   3.217  0.00214 ** 
## x2:x4       -0.30472    0.09056  -3.365  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.658 on 57 degrees of freedom
## Multiple R-squared:  0.7336, Adjusted R-squared:  0.7149 
## F-statistic: 39.24 on 4 and 57 DF,  p-value: 9.297e-16
plot(Finalmodel)

--> Note: The terms which are not less than 0.05 are not significant. Non-significat terms are: x1*x2,x1*x3,x1*x4,x2*x3,x3*x4. Here we are removing X1 becuase is not affecting any thing.
anova(Reducedmod, Finalmodel)
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2 + x3 + x4
## Model 2: y ~ x2 + x3 + x4 + x2 * x4
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     57 1432.8                      
## 2     57 1236.8  0    196.02
--> The model is significant as it has very low P Value which is equals to 9.297e-16 which is less than 0.05

3 Part d and e:

Using the model from part c), calculate a 95% confidence interval on the mean response at the following points of interest (note, if a variable in the points of interest are not in the model, then it is omitted).
Using the model from part c), calculate a 95% prediction interval on the mean response at the points of interest (note, if a variable in the points of interest are not in the model, then it is omitted)

Reducedmodel1<- lm(y~x2+x3+x4,data=dat)
summary(Reducedmodel1)
## 
## Call:
## lm(formula = y ~ x2 + x3 + x4, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2730  -3.4598  -0.5632   2.7904  12.3370 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.64065    4.26751   1.087  0.28134    
## x2           0.18302    0.01733  10.563 3.92e-15 ***
## x3          34.62435   11.17861   3.097  0.00301 ** 
## x4           4.56878    2.78788   1.639  0.10667    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.056 on 58 degrees of freedom
## Multiple R-squared:  0.6807, Adjusted R-squared:  0.6642 
## F-statistic: 41.21 on 3 and 58 DF,  p-value: 2.146e-14
plot(Reducedmodel1)

head(dat)
##     x1 x2   x3    x4    y
## 1 2.14 10 0.34 1.000 28.9
## 2 4.14 10 0.34 1.000 31.0
## 3 8.15 10 0.34 1.000 26.4
## 4 2.14 10 0.34 0.246 27.2
## 5 4.14 10 0.34 0.379 26.1
## 6 8.15 10 0.34 0.474 23.2
--> Initializing x1,x2,x3,x4
x1<-c(5.0,10.0)
x2<-c(10,3)
x3<-c(0.5,0.25)
x4<-c(0.75,0.85)
newX1<- x1
newX2<- x2
newX3<- x3
newX4<- x4
--> Creating Data Frame
data.frame(x1=newX1,x2=newX2,x3=newX3,x4=newX4)
##   x1 x2   x3   x4
## 1  5 10 0.50 0.75
## 2 10  3 0.25 0.85
predict(Reducedmod,data.frame(newX1=x1,newX2=x2,newX3=x3,newX4=x4))
##        1        2 
## 27.41651 15.48169
--> Finding Confidence and predication interval. These are the intervals given below from where we can find the model
predict(Reducedmodel1,data.frame(x2,x3,x4),interval="confidence")
##        fit      lwr      upr
## 1 27.20962 23.48531 30.93392
## 2 17.72926 14.69977 20.75874
--> As you can see the confidence interval bounderies: Upper,Lower and fit
predict(Reducedmodel1,data.frame(x2,x3,x4),interval="prediction")
##        fit       lwr      upr
## 1 27.20962 16.426125 37.99311
## 2 17.72926  7.165591 28.29292
--> As you can see the Prediction interval bounderies: Upper,Lower and fit given above.