dat <- read.csv('C:\\Users\\18067\\Documents\\Fareeha Imam\\TTU R11767331\\Spring 2023\\SDA\\Assignment 10\\data-SoftDrinkDeliveryTime.csv')
dat
## Observation DeliveryTime.min. NumCases Distance.ft.
## 1 1 16.68 7 560
## 2 2 11.50 3 220
## 3 3 12.03 3 340
## 4 4 14.88 4 80
## 5 5 13.75 6 150
## 6 6 18.11 7 330
## 7 7 8.00 2 110
## 8 8 17.83 7 210
## 9 9 79.24 30 1460
## 10 10 21.50 5 605
## 11 11 40.33 16 688
## 12 12 21.00 10 215
## 13 13 13.50 4 255
## 14 14 19.75 6 462
## 15 15 24.00 9 448
## 16 16 29.00 10 776
## 17 17 15.35 6 200
## 18 18 19.00 7 132
## 19 19 9.50 3 36
## 20 20 35.10 17 770
## 21 21 17.90 10 140
## 22 22 52.32 26 810
## 23 23 18.75 9 450
## 24 24 19.83 8 635
## 25 25 10.75 4 150
What are your estimates of the regression parameters and what is the associated value of R2?
head(dat)
## Observation DeliveryTime.min. NumCases Distance.ft.
## 1 1 16.68 7 560
## 2 2 11.50 3 220
## 3 3 12.03 3 340
## 4 4 14.88 4 80
## 5 5 13.75 6 150
## 6 6 18.11 7 330
dat<- dat[,-1]
head(dat)
## DeliveryTime.min. NumCases Distance.ft.
## 1 16.68 7 560
## 2 11.50 3 220
## 3 12.03 3 340
## 4 14.88 4 80
## 5 13.75 6 150
## 6 18.11 7 330
--> Represnting DeliveryTime.min. as y
dat$y<-dat$DeliveryTime.min.
dat$x1<-dat$NumCases
dat$x2<-dat$Distance.ft.
head(dat)
## DeliveryTime.min. NumCases Distance.ft. y x1 x2
## 1 16.68 7 560 16.68 7 560
## 2 11.50 3 220 11.50 3 220
## 3 12.03 3 340 12.03 3 340
## 4 14.88 4 80 14.88 4 80
## 5 13.75 6 150 13.75 6 150
## 6 18.11 7 330 18.11 7 330
--> Intializing Model1 to determine the value of R^2
model1<-lm(y~x1+x2+x1:x2,data= dat)
summary(model1)
##
## Call:
## lm(formula = y ~ x1 + x2 + x1:x2, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7316 -1.5387 0.0606 1.4375 4.7841
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.1390846 1.3997413 5.100 4.73e-05 ***
## x1 1.0144063 0.1912517 5.304 2.93e-05 ***
## x2 0.0058273 0.0033825 1.723 0.099622 .
## x1:x2 0.0007419 0.0001750 4.240 0.000366 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.449 on 21 degrees of freedom
## Multiple R-squared: 0.9782, Adjusted R-squared: 0.9751
## F-statistic: 314.6 on 3 and 21 DF, p-value: < 2.2e-16
model1$coefficients
## (Intercept) x1 x2 x1:x2
## 7.1390845734 1.0144062540 0.0058273479 0.0007419211
summary(model1)$r.squared
## [1] 0.9782308
--> As we know that, the Regression Parameters are b0 , b1 , b2 and b3. the values of each parameters are given below;
b0 = 7.1390846 What do you notice in the diagnostic plots (all of them)
plot(model1)
Remove the observation(s) that appears to be the most influential
dat<-dat[-9,]
--> we observe that Point 9 is significant, so we eliminate it and re-fit the model
What are your estimates of the regression parameters what is the associated value of R2?
model2<-lm(y~x1+x2+x1:x2,data = dat)
summary(model2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x1:x2, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8495 -1.3509 -0.0835 1.6174 4.9098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7984402 1.9709874 2.942 0.008062 **
## x1 1.2660217 0.3229617 3.920 0.000848 ***
## x2 0.0080441 0.0040895 1.967 0.063212 .
## x1:x2 0.0003480 0.0004432 0.785 0.441497
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.452 on 20 degrees of freedom
## Multiple R-squared: 0.9502, Adjusted R-squared: 0.9428
## F-statistic: 127.3 on 3 and 20 DF, p-value: 3.368e-13
summary(model2)$r.squared
## [1] 0.9502353
--> As you can see the value of R squared is 0.9502353
model1$coefficients
## (Intercept) x1 x2 x1:x2
## 7.1390845734 1.0144062540 0.0058273479 0.0007419211
model2$coefficients
## (Intercept) x1 x2 x1:x2
## 5.7984402036 1.2660216932 0.0080440801 0.0003480246
--> The values of each parameters for model1 are given below;
b0 = 7.1390845734 What do you notice in the diagnostic plots (all of them) after removing this point?
plot(model2)
Is there now another point that might be influential or that has leverage?
--> Yes there is a point that has leverage and it is not influential.Do you believe that this point should have be removed? (explain)
--> I believe there is no need to eliminate any point, since we didn't found any influential point