mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\2 SLR\\Assignment\\delivery_time.csv")
attach(mydata)
colnames(mydata)
## [1] "Delivery.Time" "Sorting.Time"
# 1st, 2md, 3rd, 4th Moment Business Decisions
summary(mydata)
## Delivery.Time Sorting.Time
## Min. : 8.00 Min. : 2.00
## 1st Qu.:13.50 1st Qu.: 4.00
## Median :17.83 Median : 6.00
## Mean :16.79 Mean : 6.19
## 3rd Qu.:19.75 3rd Qu.: 8.00
## Max. :29.00 Max. :10.00
var(Delivery.Time)
## [1] 25.75462
var(Sorting.Time)
## [1] 6.461905
sd(Delivery.Time)
## [1] 5.074901
sd(Sorting.Time)
## [1] 2.542028
library(e1071)
skewness(Delivery.Time)
## [1] 0.3036468
kurtosis(Sorting.Time)
## [1] -1.335955
# Plotting
boxplot(Delivery.Time)
boxplot(Sorting.Time, horizontal = TRUE)
hist(Sorting.Time)
qqnorm(Sorting.Time)
plot(mydata)
# Correlation
cor(mydata)
## Delivery.Time Sorting.Time
## Delivery.Time 1.0000000 0.8259973
## Sorting.Time 0.8259973 1.0000000
# Model
model <- lm(Delivery.Time~Sorting.Time)
summary(model) # R-squared value : 0.6823
##
## Call:
## lm(formula = Delivery.Time ~ Sorting.Time)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1729 -2.0298 -0.0298 0.8741 6.6722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5827 1.7217 3.823 0.00115 **
## Sorting.Time 1.6490 0.2582 6.387 3.98e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.935 on 19 degrees of freedom
## Multiple R-squared: 0.6823, Adjusted R-squared: 0.6655
## F-statistic: 40.8 on 1 and 19 DF, p-value: 3.983e-06
qqplot(Delivery.Time,Sorting.Time)
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
influence.measures(model)
## Influence measures of
## lm(formula = Delivery.Time ~ Sorting.Time) :
##
## dfb.1_ dfb.Sr.T dffit cov.r cook.d hat inf
## 1 1.91e-01 -0.278610 -0.33248 1.245 5.65e-02 0.1599
## 2 3.03e-02 -0.022432 0.03389 1.216 6.06e-04 0.0847
## 3 1.14e-01 -0.019795 0.25857 1.015 3.29e-02 0.0479
## 4 -1.45e-01 0.242465 0.32346 1.139 5.27e-02 0.1087
## 5 -6.23e-01 0.908714 1.08441 0.735 4.62e-01 0.1599 *
## 6 -3.81e-02 0.006603 -0.08626 1.151 3.89e-03 0.0479
## 7 4.62e-03 0.021845 0.07042 1.165 2.60e-03 0.0527
## 8 -2.67e-01 0.219435 -0.27796 1.203 3.96e-02 0.1264
## 9 5.23e-01 -0.762526 -0.90995 0.860 3.52e-01 0.1599
## 10 1.51e-01 -0.252137 -0.33636 1.131 5.68e-02 0.1087
## 11 -1.31e-03 0.003138 0.00532 1.202 1.50e-05 0.0730
## 12 -2.33e-01 0.173016 -0.26140 1.123 3.46e-02 0.0847
## 13 -7.68e-03 -0.036290 -0.11698 1.145 7.12e-03 0.0527
## 14 -3.86e-03 0.003174 -0.00402 1.275 8.53e-06 0.1264
## 15 6.49e-02 -0.053338 0.06756 1.271 2.41e-03 0.1264
## 16 1.62e-01 -0.119938 0.18121 1.171 1.70e-02 0.0847
## 17 -9.41e-02 0.016308 -0.21302 1.061 2.28e-02 0.0479
## 18 -8.38e-05 -0.000396 -0.00128 1.176 8.59e-07 0.0527
## 19 -3.28e-01 0.285388 -0.33165 1.293 5.65e-02 0.1835
## 20 -1.56e-03 -0.007378 -0.02378 1.175 2.98e-04 0.0527
## 21 4.97e-01 -0.291894 0.67467 0.598 1.71e-01 0.0586 *
influenceIndexPlot(model)
influencePlot(model)
## StudRes Hat CookD
## 1 -0.7620498 0.15991157 0.05651746
## 5 2.4855038 0.15991157 0.46205304
## 9 -2.0856523 0.15991157 0.35195395
## 19 -0.6995967 0.18349300 0.05651387
## 21 2.7045160 0.05858511 0.17082097
# Eliminating the Record 5 and 21
model2 <- lm(Delivery.Time~Sorting.Time, data = mydata[-c(5,21),])
summary(model2) # R-squared value : 0.7771
##
## Call:
## lm(formula = Delivery.Time ~ Sorting.Time, data = mydata[-c(5,
## 21), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9376 -1.6943 0.1908 0.8925 3.9286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.7970 1.2748 5.332 5.51e-05 ***
## Sorting.Time 1.5041 0.1954 7.699 6.13e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.076 on 17 degrees of freedom
## Multiple R-squared: 0.7771, Adjusted R-squared: 0.764
## F-statistic: 59.27 on 1 and 17 DF, p-value: 6.13e-07
# Eliminating the Record 5 and 21 and Logarthmic transformation
model3 <- lm(Delivery.Time~log(Sorting.Time), data = mydata[-c(5,21),])
summary(model3) # R-squared value : 0.8107
##
## Call:
## lm(formula = Delivery.Time ~ log(Sorting.Time), data = mydata[-c(5,
## 21), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8673 -1.2227 0.2327 0.9693 4.0901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0300 1.6837 1.206 0.244
## log(Sorting.Time) 8.1375 0.9536 8.533 1.5e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.913 on 17 degrees of freedom
## Multiple R-squared: 0.8107, Adjusted R-squared: 0.7996
## F-statistic: 72.82 on 1 and 17 DF, p-value: 1.5e-07
# Eliminating the Record 5 and 21 and Logarthmic transformation on both side
model4 <- lm(log(Delivery.Time)~log(Sorting.Time), data = mydata[-c(5,21),])
summary(model4) # R-squared value : 0.8427
##
## Call:
## lm(formula = log(Delivery.Time) ~ log(Sorting.Time), data = mydata[-c(5,
## 21), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18371 -0.07527 0.01535 0.06894 0.20534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.75818 0.10528 16.700 5.57e-12 ***
## log(Sorting.Time) 0.56907 0.05963 9.543 3.06e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1196 on 17 degrees of freedom
## Multiple R-squared: 0.8427, Adjusted R-squared: 0.8334
## F-statistic: 91.07 on 1 and 17 DF, p-value: 3.059e-08
confint(model4,level=0.95)
## 2.5 % 97.5 %
## (Intercept) 1.5360581 1.9803092
## log(Sorting.Time) 0.4432565 0.6948752
predict(model4,interval="predict")
## Warning in predict.lm(model4, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 3.068506 2.798823 3.338189
## 2 2.547076 2.285026 2.809127
## 3 2.777813 2.518606 3.037019
## 4 3.008549 2.742260 3.274838
## 6 2.777813 2.518606 3.037019
## 7 2.865535 2.604786 3.126283
## 8 2.383366 2.113407 2.653326
## 9 3.068506 2.798823 3.338189
## 10 3.008549 2.742260 3.274838
## 11 2.941523 2.678288 3.204757
## 12 2.547076 2.285026 2.809127
## 13 2.865535 2.604786 3.126283
## 14 2.383366 2.113407 2.653326
## 15 2.383366 2.113407 2.653326
## 16 2.547076 2.285026 2.809127
## 17 2.777813 2.518606 3.037019
## 18 2.865535 2.604786 3.126283
## 19 2.152630 1.864086 2.441174
## 20 2.865535 2.604786 3.126283
# MODEL4 gives the best R-Squared value
model_final <- predict(model4)
model_final
## 1 2 3 4 6 7 8 9
## 3.068506 2.547076 2.777813 3.008549 2.777813 2.865535 2.383366 3.068506
## 10 11 12 13 14 15 16 17
## 3.008549 2.941523 2.547076 2.865535 2.383366 2.383366 2.547076 2.777813
## 18 19 20
## 2.865535 2.152630 2.865535
rmse <- sqrt(mean((model_final-Delivery.Time)^2))
## Warning in model_final - Delivery.Time: longer object length is not a
## multiple of shorter object length
rmse
## [1] 14.9031
plot(model4)
hist(residuals(model4))
### MODEL4 predicts with an accuracy of 84.27%