# choose the Delivery_Time.csv data set
dl.tm <- read.csv(file.choose())
# windows()
dt.st <- dl.tm
View(dt.st)
# 21 Observations of 2 variables
# Scatter Diagram (Plot x,y)
plot(dt.st$Sorting.Time,dt.st$Delivery.Time)

# Other Exploratory data analysis and Plots
boxplot(dt.st)

hist(dt.st$Sorting.Time)

hist(dt.st$Delivery.Time)

summary(dt.st)
## Delivery.Time Sorting.Time
## Min. : 8.00 Min. : 2.00
## 1st Qu.:13.50 1st Qu.: 4.00
## Median :17.83 Median : 6.00
## Mean :16.79 Mean : 6.19
## 3rd Qu.:19.75 3rd Qu.: 8.00
## Max. :29.00 Max. :10.00
# Correlation coefficient value for Delivery Time and Sorting Time
dt<- dt.st$Delivery.Time
st <- dt.st$Sorting.Time
cor(st,dt)
## [1] 0.8259973
# If |r| is greater than 0.85 then Co-relation is Strong(Correlation Co-efficient = 0.8259973).
# This has a moderate Correlation
# Simple model without using any transformation
reg<-lm(dt~st)
summary(reg)
##
## Call:
## lm(formula = dt ~ st)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1729 -2.0298 -0.0298 0.8741 6.6722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5827 1.7217 3.823 0.00115 **
## st 1.6490 0.2582 6.387 3.98e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.935 on 19 degrees of freedom
## Multiple R-squared: 0.6823, Adjusted R-squared: 0.6655
## F-statistic: 40.8 on 1 and 19 DF, p-value: 3.983e-06
# Probability value should be less than 0.05(0.00115)
# The multiple-R-Squared Value is 0.6823 which is lesser than 0.8(In General)
# Adjusted R-Squared Value is 0.6655
# The Probability Value for F-Statistic is 3.983e-06(Overall Probability Model is also less than 0.05)
confint(reg,level = 0.95) # confidence interval
## 2.5 % 97.5 %
## (Intercept) 2.979134 10.186334
## st 1.108673 2.189367
# The above code will get you 2 equations
# 1 to caliculate the lower range and other for upper range
# Function to Predict the above model
predict(reg,interval="predict")
## Warning in predict.lm(reg, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 23.072933 16.457161 29.68870
## 2 13.178814 6.780993 19.57663
## 3 16.476853 10.188630 22.76508
## 4 21.423913 14.955850 27.89198
## 5 23.072933 16.457161 29.68870
## 6 16.476853 10.188630 22.76508
## 7 18.125873 11.823294 24.42845
## 8 11.529794 5.010345 18.04924
## 9 23.072933 16.457161 29.68870
## 10 21.423913 14.955850 27.89198
## 11 19.774893 13.411938 26.13785
## 12 13.178814 6.780993 19.57663
## 13 18.125873 11.823294 24.42845
## 14 11.529794 5.010345 18.04924
## 15 11.529794 5.010345 18.04924
## 16 13.178814 6.780993 19.57663
## 17 16.476853 10.188630 22.76508
## 18 18.125873 11.823294 24.42845
## 19 9.880774 3.198090 16.56346
## 20 18.125873 11.823294 24.42845
## 21 14.827833 8.507631 21.14804
# predict(reg,type="prediction")
# Adjusted R-squared value for the above model is 0.6655
# we may have to do transformation of variables for better R-squared value
# Applying transformations
# Logarthmic transformation
reg_log<-lm(dt~log(st)) # Regression using logarthmic transformation
summary(reg_log)
##
## Call:
## lm(formula = dt ~ log(st))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0829 -2.0133 -0.1965 0.9351 7.0171
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.160 2.455 0.472 0.642
## log(st) 9.043 1.373 6.587 2.64e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.873 on 19 degrees of freedom
## Multiple R-squared: 0.6954, Adjusted R-squared: 0.6794
## F-statistic: 43.39 on 1 and 19 DF, p-value: 2.642e-06
confint(reg_log,level=0.95)
## 2.5 % 97.5 %
## (Intercept) -3.97778 6.297147
## log(st) 6.16977 11.917057
predict(reg_log,interval="predict")
## Warning in predict.lm(reg_log, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 21.98291 15.6099875 28.35584
## 2 13.69652 7.4628028 19.93023
## 3 17.36331 11.2049447 23.52167
## 4 21.03009 14.7287585 27.33143
## 5 21.98291 15.6099875 28.35584
## 6 17.36331 11.2049447 23.52167
## 7 18.75735 12.5700473 24.94466
## 8 11.09489 4.6786298 17.51115
## 9 21.98291 15.6099875 28.35584
## 10 21.03009 14.7287585 27.33143
## 11 19.96493 13.7271824 26.20268
## 12 13.69652 7.4628028 19.93023
## 13 18.75735 12.5700473 24.94466
## 14 11.09489 4.6786298 17.51115
## 15 11.09489 4.6786298 17.51115
## 16 13.69652 7.4628028 19.93023
## 17 17.36331 11.2049447 23.52167
## 18 18.75735 12.5700473 24.94466
## 19 7.42810 0.5911537 14.26505
## 20 18.75735 12.5700473 24.94466
## 21 15.71450 9.5493253 21.87967
# Multiple R-squared value for the above model is 0.6954
# Adjusted R-squared: 0.6794
# we may have to do different transformation for a better R-squared value
# Applying different transformations
# Exponential model
reg_exp<-lm(log(dt)~st) # regression using Exponential model
summary(reg_exp)
##
## Call:
## lm(formula = log(dt) ~ st)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29209 -0.13364 0.02065 0.08421 0.41892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.12137 0.10297 20.601 1.86e-14 ***
## st 0.10555 0.01544 6.836 1.59e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1755 on 19 degrees of freedom
## Multiple R-squared: 0.7109, Adjusted R-squared: 0.6957
## F-statistic: 46.73 on 1 and 19 DF, p-value: 1.593e-06
confint(reg_exp,level=0.95)
## 2.5 % 97.5 %
## (Intercept) 1.90584807 2.3368956
## st 0.07323457 0.1378686
exp(predict(reg_exp,interval="predict"))
## Warning in predict.lm(reg_exp, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 23.97203 16.138575 35.60775
## 2 12.72512 8.679275 18.65695
## 3 15.71603 10.789743 22.89153
## 4 21.57071 14.650800 31.75904
## 5 23.97203 16.138575 35.60775
## 6 15.71603 10.789743 22.89153
## 7 17.46560 11.980604 25.46174
## 8 11.45042 7.753250 16.91061
## 9 23.97203 16.138575 35.60775
## 10 21.57071 14.650800 31.75904
## 11 19.40993 13.266335 28.39859
## 12 12.72512 8.679275 18.65695
## 13 17.46560 11.980604 25.46174
## 14 11.45042 7.753250 16.91061
## 15 11.45042 7.753250 16.91061
## 16 12.72512 8.679275 18.65695
## 17 15.71603 10.789743 22.89153
## 18 17.46560 11.980604 25.46174
## 19 10.30341 6.908812 15.36592
## 20 17.46560 11.980604 25.46174
## 21 14.14173 9.690362 20.63787
# R-squared value - 0.7109
# Adjusted R SQuare Value - 0.6957
# Higher the R-sqaured value - Better chances of getting good model
# for Delivery Time and Sorting Time
# Quadratic model
dt.st[,"st_sq"] = st*st
# Quadratic model
quad_mod <- lm(dt~st+I(st^2),data=dt.st)
summary(quad_mod)
##
## Call:
## lm(formula = dt ~ st + I(st^2), data = dt.st)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4324 -1.6951 -0.5365 0.9075 6.6676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.5222 4.1597 0.847 0.4082
## st 2.8130 1.4608 1.926 0.0701 .
## I(st^2) -0.0932 0.1151 -0.810 0.4286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.962 on 18 degrees of freedom
## Multiple R-squared: 0.6934, Adjusted R-squared: 0.6594
## F-statistic: 20.36 on 2 and 18 DF, p-value: 2.391e-05
confint(quad_mod,level=0.95)
## 2.5 % 97.5 %
## (Intercept) -5.2169258 12.2613936
## st -0.2560669 5.8820703
## I(st^2) -0.3349939 0.1485975
predict(quad_mod,interval="predict")
## Warning in predict.lm(quad_mod, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 22.332430 15.360768 29.30409
## 2 13.283069 6.796484 19.76965
## 3 17.045108 10.506802 23.58341
## 4 21.290194 14.728917 27.85147
## 5 22.332430 15.360768 29.30409
## 6 17.045108 10.506802 23.58341
## 7 18.646533 12.120732 25.17233
## 8 11.122455 4.434281 17.81063
## 9 22.332430 15.360768 29.30409
## 10 21.290194 14.728917 27.85147
## 11 20.061562 13.573170 26.54995
## 12 13.283069 6.796484 19.76965
## 13 18.646533 12.120732 25.17233
## 14 11.122455 4.434281 17.81063
## 15 11.122455 4.434281 17.81063
## 16 13.283069 6.796484 19.76965
## 17 17.045108 10.506802 23.58341
## 18 18.646533 12.120732 25.17233
## 19 8.775444 1.423580 16.12731
## 20 18.646533 12.120732 25.17233
## 21 15.257287 8.758744 21.75583
# Adjusted R-Squared = 0.6594
#Multiple R -Squared Value = 0.6934
# Quadratic model
qd_model <- lm(dt~st+st_sq,data=dt.st)
summary(qd_model)
##
## Call:
## lm(formula = dt ~ st + st_sq, data = dt.st)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4324 -1.6951 -0.5365 0.9075 6.6676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.5222 4.1597 0.847 0.4082
## st 2.8130 1.4608 1.926 0.0701 .
## st_sq -0.0932 0.1151 -0.810 0.4286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.962 on 18 degrees of freedom
## Multiple R-squared: 0.6934, Adjusted R-squared: 0.6594
## F-statistic: 20.36 on 2 and 18 DF, p-value: 2.391e-05
confint(quad_mod,level=0.95)
## 2.5 % 97.5 %
## (Intercept) -5.2169258 12.2613936
## st -0.2560669 5.8820703
## I(st^2) -0.3349939 0.1485975
predict(quad_mod,interval="predict")
## Warning in predict.lm(quad_mod, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 22.332430 15.360768 29.30409
## 2 13.283069 6.796484 19.76965
## 3 17.045108 10.506802 23.58341
## 4 21.290194 14.728917 27.85147
## 5 22.332430 15.360768 29.30409
## 6 17.045108 10.506802 23.58341
## 7 18.646533 12.120732 25.17233
## 8 11.122455 4.434281 17.81063
## 9 22.332430 15.360768 29.30409
## 10 21.290194 14.728917 27.85147
## 11 20.061562 13.573170 26.54995
## 12 13.283069 6.796484 19.76965
## 13 18.646533 12.120732 25.17233
## 14 11.122455 4.434281 17.81063
## 15 11.122455 4.434281 17.81063
## 16 13.283069 6.796484 19.76965
## 17 17.045108 10.506802 23.58341
## 18 18.646533 12.120732 25.17233
## 19 8.775444 1.423580 16.12731
## 20 18.646533 12.120732 25.17233
## 21 15.257287 8.758744 21.75583
# Adjusted R-Squared = 0.6594
#Multiple R -Squared Value = 0.6934
# Cubic model
poly_mod <- lm(dt~st+I(st^2)+I(st^3),data=dt.st)
summary(poly_mod) # 0.9811
##
## Call:
## lm(formula = dt ~ st + I(st^2) + I(st^3), data = dt.st)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8972 -1.7972 -0.1601 0.8077 6.2028
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.15818 10.98653 -0.378 0.710
## st 7.50248 6.37003 1.178 0.255
## I(st^2) -0.92525 1.10553 -0.837 0.414
## I(st^3) 0.04446 0.05874 0.757 0.460
##
## Residual standard error: 2.998 on 17 degrees of freedom
## Multiple R-squared: 0.7034, Adjusted R-squared: 0.6511
## F-statistic: 13.44 on 3 and 17 DF, p-value: 9.586e-05
confint(poly_mod,level=0.95)
## 2.5 % 97.5 %
## (Intercept) -27.33772564 19.0213717
## st -5.93710786 20.9420660
## I(st^2) -3.25771246 1.4072034
## I(st^3) -0.07947156 0.1683837
predict(poly_mod,interval="predict")
## Warning in predict.lm(poly_mod, interval = "predict"): predictions on current data refer to _future_ responses
## fit lwr upr
## 1 22.797240 15.5939839 30.00050
## 2 13.892856 7.0844910 20.70122
## 3 17.150048 10.4982995 23.80180
## 4 20.827000 14.0344853 27.61951
## 5 22.797240 15.5939839 30.00050
## 6 17.150048 10.4982995 23.80180
## 7 18.270140 11.5550572 24.98522
## 8 11.222284 4.4189573 18.02561
## 9 22.797240 15.5939839 30.00050
## 10 20.827000 14.0344853 27.61951
## 11 19.406879 12.5644227 26.24933
## 12 13.892856 7.0844910 20.70122
## 13 18.270140 11.5550572 24.98522
## 14 11.222284 4.4189573 18.02561
## 15 11.222284 4.4189573 18.02561
## 16 13.892856 7.0844910 20.70122
## 17 17.150048 10.4982995 23.80180
## 18 18.270140 11.5550572 24.98522
## 19 7.501412 -0.7718603 15.77468
## 20 18.270140 11.5550572 24.98522
## 21 15.779865 9.0162244 22.54351
# Adjusted R-Squared = 0.6511
#Multiple R -Squared Value = 0.7034
model_R_Squared_values <- list(model=NULL,R_squared=NULL)
model_R_Squared_values[["model"]] <- c("reg","reg_log","reg_exp","quad_mod","poly_mod")
model_R_Squared_values[["R_squared"]] <- c(0.6655,0.6794,0.6957,0.6594,0.6511)
Final <- cbind(model_R_Squared_values[["model"]],model_R_Squared_values[["R_squared"]])
View(model_R_Squared_values)
View(Final)
# Exponential model gives the best Adjusted R-Squared value
predicted_Value <- exp(predict(reg_exp))
predicted_Value
## 1 2 3 4 5 6 7 8
## 23.97203 12.72512 15.71603 21.57071 23.97203 15.71603 17.46560 11.45042
## 9 10 11 12 13 14 15 16
## 23.97203 21.57071 19.40993 12.72512 17.46560 11.45042 11.45042 12.72512
## 17 18 19 20 21
## 15.71603 17.46560 10.30341 17.46560 14.14173
Final <- cbind(Sorting_Time=dt.st$Sorting.Time ,Delivery_Time = dt.st$Delivery.Time,Predicted_Delivery_time=predicted_Value)
View(Final)
rmse<-sqrt(mean((predicted_Value-dt)^2))
rmse
## [1] 2.94025
plot(reg_exp)




hist(residuals(reg_exp)) # close to normal distribution
