dat<-read.csv("C:\\Users\\18067\\Documents\\Fareeha Imam\\TTU R11767331\\Spring 2023\\SDA\\Assignment 8\\data-table-B9(1).csv")

colnames(dat)<-c("x1","x2","x3","x4","y")
variables = c("x1", "x2", "x3","x4")

1 Part a:

Consider a first order multiple regression model with two-factor interactions. Check for model adequacy and make any corrective actions if deemed necessary. Test for the signifcance of the full regression model, what do you conclude?

Fullmodel<-lm(y~x1*x2+x1*x3+x1*x4+x2*x3+x2*x4+x3*x4, data = dat)
summary(Fullmodel)

## 
## Call:
## lm(formula = y ~ x1 * x2 + x1 * x3 + x1 * x4 + x2 * x3 + x2 * 
##     x4 + x3 * x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4804 -3.0766 -0.6635  2.9625 12.2221 
## 
## Coefficients: (2 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.88376   23.17863   0.685  0.49616    
## x1            0.18696    0.78447   0.238  0.81255    
## x2            0.37921    0.06332   5.989 1.89e-07 ***
## x3          -11.99940   67.31148  -0.178  0.85919    
## x4           -8.86442   35.62553  -0.249  0.80446    
## x1:x2         0.01155    0.00869   1.329  0.18955    
## x1:x3              NA         NA      NA       NA    
## x1:x4        -1.11525    1.14847  -0.971  0.33592    
## x2:x3              NA         NA      NA       NA    
## x2:x4        -0.38547    0.11962  -3.222  0.00218 ** 
## x3:x4        72.85976  103.15353   0.706  0.48308    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 53 degrees of freedom
## Multiple R-squared:  0.7496, Adjusted R-squared:  0.7118 
## F-statistic: 19.83 on 8 and 53 DF,  p-value: 1.947e-13

--> Here we are taking corrective actions by eliminating x1:x3 and x2:x3. As You can see they are not significant and should be eliminate.

Fullmodelcorrection<-lm(y~x1*x2+x1*x3+x1*x4+x2*x3+x2*x4+x3*x4-x1:x3-x2:x3, data = dat)
summary(Fullmodelcorrection)

## 
## Call:
## lm(formula = y ~ x1 * x2 + x1 * x3 + x1 * x4 + x2 * x3 + x2 * 
##     x4 + x3 * x4 - x1:x3 - x2:x3, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4804 -3.0766 -0.6635  2.9625 12.2221 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.88376   23.17863   0.685  0.49616    
## x1            0.18696    0.78447   0.238  0.81255    
## x2            0.37921    0.06332   5.989 1.89e-07 ***
## x3          -11.99940   67.31148  -0.178  0.85919    
## x4           -8.86442   35.62553  -0.249  0.80446    
## x1:x2         0.01155    0.00869   1.329  0.18955    
## x1:x4        -1.11525    1.14847  -0.971  0.33592    
## x2:x4        -0.38547    0.11962  -3.222  0.00218 ** 
## x3:x4        72.85976  103.15353   0.706  0.48308    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.683 on 53 degrees of freedom
## Multiple R-squared:  0.7496, Adjusted R-squared:  0.7118 
## F-statistic: 19.83 on 8 and 53 DF,  p-value: 1.947e-13

--> Check for model Adequacy

plot(Fullmodel)

residuals(Fullmodel)

##            1            2            3            4            5            6 
##  2.990195122  6.715828908  5.375224650  8.578767237  6.433606173  3.734219038 
##            7            8            9           10           11           12 
##  2.093754070  3.511831138  3.817329466 12.222079252  5.770306630  4.993740658 
##           13           14           15           16           17           18 
##  2.441069895  0.136311533  1.161241943 -2.095198810 -4.907130359 -3.422131856 
##           19           20           21           22           23           24 
## -3.593945129 -1.801155768 -0.155899737  8.026174467  7.722028281  7.722715177 
##           25           26           27           28           29           30 
## -3.090940151 -3.671889645 -3.982355018 -3.491833405 -2.189477683 -4.422975706 
##           31           32           33           34           35           36 
## -2.727410367 -2.702287597 -2.354997502  1.407831279 -9.480442758 -2.949012768 
##           37           38           39           40           41           42 
## -6.001819558  0.014005657  0.060157963  0.862079843 -0.551382617 -0.688726767 
##           43           44           45           46           47           48 
## -4.093564991 -0.722193659 -1.638517538 -0.005476899 -6.079476886 -4.120378287 
##           49           50           51           52           53           54 
## -6.150758563 -0.638254127 -1.578440355 -2.045370729 -3.033696419  4.419776149 
##           55           56           57           58           59           60 
## -1.102692301 -4.321287081 -0.402372639 -1.879235054  2.879545753 -5.669884374 
##           61           62 
##  4.328351991  0.344440833

--> In question we asked for Test for the signifcance of full regression model, for this we need to perform f test. By using qf command we can get critical F value for full model

qf(0.99,10,49)

## [1] 2.706371

--> By using qf command, you can see here we got very low critical value of F as compared to model.

Reducedmod<- lm(y~x1+x2+x3+x4,data=dat)
summary(Reducedmod)

## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9958 -3.3092 -0.2419  3.3924 10.5668 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.89453    4.32508   1.363  0.17828    
## x1          -0.47790    0.34002  -1.406  0.16530    
## x2           0.18271    0.01718  10.633 3.78e-15 ***
## x3          35.40284   11.09960   3.190  0.00232 ** 
## x4           5.84391    2.90978   2.008  0.04935 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.014 on 57 degrees of freedom
## Multiple R-squared:  0.6914, Adjusted R-squared:  0.6697 
## F-statistic: 31.92 on 4 and 57 DF,  p-value: 5.818e-14

plot(Reducedmod)

--> Conclusion: We can say that the model is adequate. by considering our P-Value and R Squared value, one is very low and the other is high, respectively. One more reason for the model to be adequate is our critical F value which is not comparable with model.

2 Part b & c:

Test for the signifiance of all 2 factor interactions using a partial F-test. What are your findings? Determine the best fitting model using partial F and/or t-tests. What is the final model?
--> Intializing the Final Model. The final model consists of below terms which are significant
Finalmodel<-lm(y~x2+x3+x4+x2*x4,data=dat)

summary(Finalmodel)

## ## Call: ## lm(formula = y ~ x2 + x3 + x4 + x2 * x4, data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -9.959 -3.358 -1.131 3.040 11.646 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 1.52261 4.03964 0.377 0.70763 ## x2 0.38056 0.06084 6.255 5.47e-08 *** ## x3 34.51062 10.29961 3.351 0.00144 ** ## x4 9.52471 2.96093 3.217 0.00214 ** ## x2:x4 -0.30472 0.09056 -3.365 0.00137 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 4.658 on 57 degrees of freedom ## Multiple R-squared: 0.7336, Adjusted R-squared: 0.7149 ## F-statistic: 39.24 on 4 and 57 DF, p-value: 9.297e-16

plot(Finalmodel)

--> Note: The terms which are not less than 0.05 are not significant. Non-significat terms are: x1*x2,x1*x3,x1*x4,x2*x3,x3*x4. Here we are removing X1 becuase is not affecting any thing.
anova(Reducedmod, Finalmodel)

## Analysis of Variance Table ## ## Model 1: y ~ x1 + x2 + x3 + x4 ## Model 2: y ~ x2 + x3 + x4 + x2 * x4 ## Res.Df RSS Df Sum of Sq F Pr(>F) ## 1 57 1432.8 ## 2 57 1236.8 0 196.02
--> The model is significant as it has very low P Value which is equals to 9.297e-16 which is less than 0.05

3 Part d and e:

Using the model from part c), calculate a 95% confidence interval on the mean response at the following points of interest (note, if a variable in the points of interest are not in the model, then it is omitted).
Using the model from part c), calculate a 95% prediction interval on the mean response at the points of interest (note, if a variable in the points of interest are not in the model, then it is omitted)

Reducedmodel1<- lm(y~x2+x3+x4,data=dat) summary(Reducedmodel1)

## ## Call: ## lm(formula = y ~ x2 + x3 + x4, data = dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -11.2730 -3.4598 -0.5632 2.7904 12.3370 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 4.64065 4.26751 1.087 0.28134 ## x2 0.18302 0.01733 10.563 3.92e-15 *** ## x3 34.62435 11.17861 3.097 0.00301 ** ## x4 4.56878 2.78788 1.639 0.10667 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 5.056 on 58 degrees of freedom ## Multiple R-squared: 0.6807, Adjusted R-squared: 0.6642 ## F-statistic: 41.21 on 3 and 58 DF, p-value: 2.146e-14

plot(Reducedmodel1)

head(dat)

## x1 x2 x3 x4 y ## 1 2.14 10 0.34 1.000 28.9 ## 2 4.14 10 0.34 1.000 31.0 ## 3 8.15 10 0.34 1.000 26.4 ## 4 2.14 10 0.34 0.246 27.2 ## 5 4.14 10 0.34 0.379 26.1 ## 6 8.15 10 0.34 0.474 23.2
--> Initializing x1,x2,x3,x4
x1<-c(5.0,10.0) x2<-c(10,3) x3<-c(0.5,0.25) x4<-c(0.75,0.85)

newX1<- x1 newX2<- x2 newX3<- x3 newX4<- x4
--> Creating Data Frame
data.frame(x1=newX1,x2=newX2,x3=newX3,x4=newX4)

## x1 x2 x3 x4 ## 1 5 10 0.50 0.75 ## 2 10 3 0.25 0.85

predict(Reducedmod,data.frame(newX1=x1,newX2=x2,newX3=x3,newX4=x4))

## 1 2 ## 27.41651 15.48169
--> Finding Confidence and predication interval. These are the intervals given below from where we can find the model
predict(Reducedmodel1,data.frame(x2,x3,x4),interval="confidence")

## fit lwr upr ## 1 27.20962 23.48531 30.93392 ## 2 17.72926 14.69977 20.75874
--> As you can see the confidence interval bounderies: Upper,Lower and fit
predict(Reducedmodel1,data.frame(x2,x3,x4),interval="prediction")

## fit lwr upr ## 1 27.20962 16.426125 37.99311 ## 2 17.72926 7.165591 28.29292
--> As you can see the Prediction interval bounderies: Upper,Lower and fit given above.

Assignment # 08

Fareeha Imam

Last compiled on February 25, 2023 at 4:10 PM - CST

1 Part a:

2 Part b & c:

3 Part d and e: