# Reading data
dataset <- read.csv("Store24.csv")
# Summarizging data
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(dataset)
## vars n mean sd median trimmed mad
## store 1 75 38.00 21.79 38.00 38.00 28.17
## Sales 2 75 1205413.12 304531.31 1127332.00 1182031.25 288422.04
## Profit 3 75 276313.61 89404.08 265014.00 270260.34 90532.00
## MTenure 4 75 45.30 57.67 24.12 33.58 29.67
## CTenure 5 75 13.93 17.70 7.21 10.60 6.14
## Pop 6 75 9825.59 5911.67 8896.00 9366.07 7266.22
## Comp 7 75 3.79 1.31 3.63 3.66 0.82
## Visibility 8 75 3.08 0.75 3.00 3.07 0.00
## PedCount 9 75 2.96 0.99 3.00 2.97 1.48
## Res 10 75 0.96 0.20 1.00 1.00 0.00
## Hours24 11 75 0.84 0.37 1.00 0.92 0.00
## CrewSkill 12 75 3.46 0.41 3.50 3.47 0.34
## MgrSkill 13 75 3.64 0.41 3.59 3.62 0.45
## ServQual 14 75 87.15 12.61 89.47 88.62 15.61
## min max range skew kurtosis se
## store 1.00 75.00 74.00 0.00 -1.25 2.52
## Sales 699306.00 2113089.00 1413783.00 0.71 -0.09 35164.25
## Profit 122180.00 518998.00 396818.00 0.62 -0.21 10323.49
## MTenure 0.00 277.99 277.99 2.01 3.90 6.66
## CTenure 0.89 114.15 113.26 3.52 15.00 2.04
## Pop 1046.00 26519.00 25473.00 0.62 -0.23 682.62
## Comp 1.65 11.13 9.48 2.48 11.31 0.15
## Visibility 2.00 5.00 3.00 0.25 -0.38 0.09
## PedCount 1.00 5.00 4.00 0.00 -0.52 0.11
## Res 0.00 1.00 1.00 -4.60 19.43 0.02
## Hours24 0.00 1.00 1.00 -1.82 1.32 0.04
## CrewSkill 2.06 4.64 2.58 -0.43 1.64 0.05
## MgrSkill 2.96 4.62 1.67 0.27 -0.53 0.05
## ServQual 57.90 100.00 42.10 -0.66 -0.72 1.46
#mean sd
mean(dataset$Profit)
## [1] 276313.6
sd(dataset$Profit)
## [1] 89404.08
mean(dataset$MTenure)
## [1] 45.29644
sd(dataset$MTenure)
## [1] 57.67155
mean(dataset$CTenure)
## [1] 13.9315
sd(dataset$CTenure)
## [1] 17.69752
#histograms
hist(dataset$MTenure, col="gray", xlab="MTenure", main="Manager Tenure")

hist(dataset$CTenure, col="gray", xlab="CTenure", main="Crew Tenure")

#scatterplots
#scatter plot of Profit vs. MTenure
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(Profit ~ MTenure, data=dataset,main="Scatterplot of Profit vs. MTenure",xlab="MTenure",ylab="Profit")

scatterplot(Profit ~ CTenure, data=dataset,main="Scatterplot of Profit vs. CTenure",xlab="CTenure",ylab="Profit")

#correlation of Profit with MTenure
#the correlation of Profit with CTenure
cor(dataset$Profit, dataset$MTenure)
## [1] 0.4388692
cor(dataset$Profit, dataset$CTenure)
## [1] 0.2576789
cor.test(dataset[,"Profit"], dataset[,"MTenure"])
##
## Pearson's product-moment correlation
##
## data: dataset[, "Profit"] and dataset[, "MTenure"]
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
cor.test(dataset[,"Profit"], dataset[,"CTenure"])
##
## Pearson's product-moment correlation
##
## data: dataset[, "Profit"] and dataset[, "CTenure"]
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03262507 0.45786339
## sample estimates:
## cor
## 0.2576789
#Constructing a Correlation Matrix for all variables
cor(dataset)
## store Sales Profit MTenure CTenure
## store 1.00000000 -0.22693400 -0.19993481 -0.05655216 0.019930097
## Sales -0.22693400 1.00000000 0.92387059 0.45488023 0.254315184
## Profit -0.19993481 0.92387059 1.00000000 0.43886921 0.257678895
## MTenure -0.05655216 0.45488023 0.43886921 1.00000000 0.243383135
## CTenure 0.01993010 0.25431518 0.25767890 0.24338314 1.000000000
## Pop -0.28936691 0.40348147 0.43063326 -0.06089646 -0.001532449
## Comp 0.03194023 -0.23501372 -0.33454148 0.18087179 -0.070281327
## Visibility -0.02648858 0.13065638 0.13569207 0.15651731 0.066506016
## PedCount -0.22117519 0.42391087 0.45023346 0.06198608 -0.084112627
## Res -0.03142976 -0.16672402 -0.15947734 -0.06234721 -0.340340876
## Hours24 0.02687986 0.06324716 -0.02568703 -0.16513872 0.072865022
## CrewSkill 0.04866273 0.16402179 0.16008443 0.10162169 0.257154817
## MgrSkill -0.07218804 0.31163056 0.32284842 0.22962743 0.124045346
## ServQual -0.32246921 0.38638112 0.36245032 0.18168875 0.081156172
## Pop Comp Visibility PedCount Res
## store -0.289366908 0.03194023 -0.02648858 -0.221175193 -0.03142976
## Sales 0.403481471 -0.23501372 0.13065638 0.423910867 -0.16672402
## Profit 0.430633264 -0.33454148 0.13569207 0.450233461 -0.15947734
## MTenure -0.060896460 0.18087179 0.15651731 0.061986084 -0.06234721
## CTenure -0.001532449 -0.07028133 0.06650602 -0.084112627 -0.34034088
## Pop 1.000000000 -0.26828355 -0.04998269 0.607638861 -0.23693726
## Comp -0.268283553 1.00000000 0.02844548 -0.146325204 0.21923878
## Visibility -0.049982694 0.02844548 1.00000000 -0.141068116 0.02194756
## PedCount 0.607638861 -0.14632520 -0.14106812 1.000000000 -0.28437852
## Res -0.236937265 0.21923878 0.02194756 -0.284378520 1.00000000
## Hours24 -0.221767927 0.12957478 0.04692587 -0.275973353 -0.08908708
## CrewSkill 0.282845090 -0.04229731 -0.19745297 0.213672596 -0.15331247
## MgrSkill 0.083554590 0.22407913 0.07348301 0.087475440 -0.03213640
## ServQual 0.123946521 0.01814508 0.20992919 -0.005445552 0.09081624
## Hours24 CrewSkill MgrSkill ServQual
## store 0.02687986 0.04866273 -0.07218804 -0.322469213
## Sales 0.06324716 0.16402179 0.31163056 0.386381121
## Profit -0.02568703 0.16008443 0.32284842 0.362450323
## MTenure -0.16513872 0.10162169 0.22962743 0.181688755
## CTenure 0.07286502 0.25715482 0.12404535 0.081156172
## Pop -0.22176793 0.28284509 0.08355459 0.123946521
## Comp 0.12957478 -0.04229731 0.22407913 0.018145080
## Visibility 0.04692587 -0.19745297 0.07348301 0.209929194
## PedCount -0.27597335 0.21367260 0.08747544 -0.005445552
## Res -0.08908708 -0.15331247 -0.03213640 0.090816237
## Hours24 1.00000000 0.10536295 -0.03883007 0.058325655
## CrewSkill 0.10536295 1.00000000 -0.02100949 -0.033516504
## MgrSkill -0.03883007 -0.02100949 1.00000000 0.356702708
## ServQual 0.05832565 -0.03351650 0.35670271 1.000000000
library(car)
scatterplotMatrix(dataset[,c("Profit","MTenure","CTenure")],main="Scatter Plot Matrix")

library(car)
scatterplotMatrix(dataset[,c("Profit","MTenure","CTenure","Comp","Pop","PedCount")],main="Scatter Plot Matrix")

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(dataset,main="Corrgram of store.df intercorrelations")

#Regression Model
m1 <- lm(Profit ~ MTenure + CTenure + Pop + PedCount + Res + Visibility + Hours24 + Comp,data=dataset)
summary(m1)
##
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Pop + PedCount + Res +
## Visibility + Hours24 + Comp, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105789 -35946 -7069 33780 112390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7610.041 66821.994 0.114 0.909674
## MTenure 760.993 127.086 5.988 9.72e-08 ***
## CTenure 944.978 421.687 2.241 0.028400 *
## Pop 3.667 1.466 2.501 0.014890 *
## PedCount 34087.359 9073.196 3.757 0.000366 ***
## Res 91584.675 39231.283 2.334 0.022623 *
## Visibility 12625.447 9087.620 1.389 0.169411
## Hours24 63233.307 19641.114 3.219 0.001994 **
## Comp -25286.887 5491.937 -4.604 1.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56970 on 66 degrees of freedom
## Multiple R-squared: 0.6379, Adjusted R-squared: 0.594
## F-statistic: 14.53 on 8 and 66 DF, p-value: 5.382e-12
# confidence intervals
confint(m1)
## 2.5 % 97.5 %
## (Intercept) -1.258044e+05 141024.457560
## MTenure 5.072581e+02 1014.727399
## CTenure 1.030519e+02 1786.904132
## Pop 7.390282e-01 6.594184
## PedCount 1.597214e+04 52202.579289
## Res 1.325689e+04 169912.458917
## Visibility -5.518571e+03 30769.464999
## Hours24 2.401856e+04 102448.057104
## Comp -3.625189e+04 -14321.880698
#actual profit
dataset$Profit
## [1] 265014 424007 222735 210122 300480 469050 476355 361115 474725 278625
## [11] 389886 329020 152513 261571 203951 196277 265584 394039 261495 269235
## [21] 282584 367036 277414 267354 282124 211912 230194 273036 263956 333607
## [31] 211885 149033 292745 382199 322624 219292 187765 203184 221130 222913
## [41] 147327 264072 337233 439781 410149 315780 387853 284169 195276 251013
## [51] 237344 169201 365018 159792 147672 189235 122180 227601 303069 356071
## [61] 177046 202641 239036 221157 301641 146058 362067 236339 375393 254203
## [71] 198529 196772 279193 518998 296826
#predicted profit by the OLS model
fitted(m1)
## 1 2 3 4 5 6 7 8
## 282884.6 311616.6 247387.2 188867.1 308773.0 379779.2 392304.9 371985.2
## 9 10 11 12 13 14 15 16
## 443237.0 300474.6 390414.7 420779.0 210319.6 268639.8 279296.3 202381.0
## 17 18 19 20 21 22 23 24
## 352534.2 455293.3 256081.6 275088.3 277490.0 271166.4 309003.2 214340.6
## 25 26 27 28 29 30 31 32
## 246051.2 219299.0 258929.7 280699.0 210844.3 260034.8 197082.6 191247.4
## 33 34 35 36 37 38 39 40
## 207234.6 370486.2 318628.6 232328.1 240430.8 199026.7 260630.9 173787.2
## 41 42 43 44 45 46 47 48
## 237766.0 277755.6 375932.0 475485.8 350220.8 279391.3 399517.8 208750.4
## 49 50 51 52 53 54 55 56
## 215972.9 307812.7 282907.8 212113.7 252711.1 195979.6 214674.3 167063.9
## 57 58 59 60 61 62 63 64
## 227968.7 218550.3 265067.8 331875.7 192084.1 218925.7 238526.9 318618.1
## 65 66 67 68 69 70 71 72
## 293397.2 218979.5 261546.3 240964.4 280082.4 282110.4 205893.0 262434.7
## 73 74 75
## 269862.0 412871.4 252828.2
#Compare profit predicted by the model with the actual profit given in the data
predictedProfit = data.frame(fitted(m1))
actualProfit = data.frame(dataset$Profit)
profitComparison = cbind(actualProfit, predictedProfit)
View(profitComparison)
#convert PedCount into factor variable
dataset$PedCount <- factor(dataset$PedCount)
#convert Visibility into factor variable
dataset$Visibility <- factor(dataset$Visibility)
#convert Hours24 into factor variable
dataset$Hours24 <- factor(dataset$Hours24)
#convert Res into factor variable
dataset$Res[dataset$Res == 0] <- 'Industrial'
dataset$Res[dataset$Res == 1] <- 'Residential'
#converting them into a Factor
dataset$Res <- factor(dataset$Res)
table(dataset$Res)
##
## Industrial Residential
## 3 72
#Hours24
table(dataset$Hours24)
##
## 0 1
## 12 63
#PedCount
table(dataset$PedCount)
##
## 1 2 3 4 5
## 5 19 29 18 4
# Visibility
table(dataset$Visibility)
##
## 2 3 4 5
## 16 39 18 2
m2 <- lm(Profit ~ MTenure + CTenure + Pop + PedCount + Res + Visibility + Hours24 + Comp,data=dataset)
summary(m2)
##
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Pop + PedCount + Res +
## Visibility + Hours24 + Comp, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -117216 -33066 -7849 34399 110419
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48476.214 61471.750 0.789 0.433404
## MTenure 810.971 145.017 5.592 5.59e-07 ***
## CTenure 1016.017 443.756 2.290 0.025519 *
## Pop 3.218 1.555 2.069 0.042760 *
## PedCount2 67304.861 31677.186 2.125 0.037671 *
## PedCount3 94474.471 32160.240 2.938 0.004662 **
## PedCount4 115585.561 34704.229 3.331 0.001475 **
## PedCount5 186176.929 52125.818 3.572 0.000699 ***
## ResResidential 92247.443 40918.294 2.254 0.027771 *
## Visibility3 17696.740 19903.857 0.889 0.377436
## Visibility4 27823.713 22395.954 1.242 0.218861
## Visibility5 34400.482 49081.134 0.701 0.486037
## Hours241 57108.155 21767.662 2.624 0.010979 *
## Comp -26201.681 5877.719 -4.458 3.61e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 58200 on 61 degrees of freedom
## Multiple R-squared: 0.6506, Adjusted R-squared: 0.5762
## F-statistic: 8.738 on 13 and 61 DF, p-value: 1.07e-09
m1$coefficients
## (Intercept) MTenure CTenure Pop PedCount
## 7610.041452 760.992734 944.978026 3.666606 34087.358789
## Res Visibility Hours24 Comp
## 91584.675234 12625.447050 63233.307162 -25286.886662
m2$coefficients
## (Intercept) MTenure CTenure Pop PedCount2
## 48476.214150 810.971201 1016.017324 3.217894 67304.861162
## PedCount3 PedCount4 PedCount5 ResResidential Visibility3
## 94474.470859 115585.561387 186176.929186 92247.442870 17696.740097
## Visibility4 Visibility5 Hours241 Comp
## 27823.712701 34400.481708 57108.154711 -26201.681328