setwd("C:/Users/CJ With HP/Desktop/IIM Lucknow/Datasets")
store24.df <- read.csv(paste("Store24.csv",sep = ""))
attach(store24.df)
summary(store24.df)
## store Sales Profit MTenure
## Min. : 1.0 Min. : 699306 Min. :122180 Min. : 0.00
## 1st Qu.:19.5 1st Qu.: 984579 1st Qu.:211004 1st Qu.: 6.67
## Median :38.0 Median :1127332 Median :265014 Median : 24.12
## Mean :38.0 Mean :1205413 Mean :276314 Mean : 45.30
## 3rd Qu.:56.5 3rd Qu.:1362388 3rd Qu.:331314 3rd Qu.: 50.92
## Max. :75.0 Max. :2113089 Max. :518998 Max. :277.99
## CTenure Pop Comp Visibility
## Min. : 0.8871 Min. : 1046 Min. : 1.651 Min. :2.00
## 1st Qu.: 4.3943 1st Qu.: 5616 1st Qu.: 3.151 1st Qu.:3.00
## Median : 7.2115 Median : 8896 Median : 3.629 Median :3.00
## Mean : 13.9315 Mean : 9826 Mean : 3.788 Mean :3.08
## 3rd Qu.: 17.2156 3rd Qu.:14104 3rd Qu.: 4.230 3rd Qu.:4.00
## Max. :114.1519 Max. :26519 Max. :11.128 Max. :5.00
## PedCount Res Hours24 CrewSkill
## Min. :1.00 Min. :0.00 Min. :0.00 Min. :2.060
## 1st Qu.:2.00 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:3.225
## Median :3.00 Median :1.00 Median :1.00 Median :3.500
## Mean :2.96 Mean :0.96 Mean :0.84 Mean :3.457
## 3rd Qu.:4.00 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:3.655
## Max. :5.00 Max. :1.00 Max. :1.00 Max. :4.640
## MgrSkill ServQual
## Min. :2.957 Min. : 57.90
## 1st Qu.:3.344 1st Qu.: 78.95
## Median :3.589 Median : 89.47
## Mean :3.638 Mean : 87.15
## 3rd Qu.:3.925 3rd Qu.: 99.90
## Max. :4.622 Max. :100.00
mean(Profit)
## [1] 276313.6
sd(Profit)
## [1] 89404.08
mean(MTenure)
## [1] 45.29644
sd(MTenure)
## [1] 57.67155
mean(CTenure)
## [1] 13.9315
sd(CTenure)
## [1] 17.69752
newdata <- store24.df[order(-Profit),]
datatop<-newdata[1:5,1:5]
newdata <- store24.df[order(Profit),]
databottom <- newdata[1:5,1:5]
plot(MTenure,Profit,main="Profit V/s MTenure",xlab = "MTenure",ylab = "Profit",xlim=c(0,300),cex=0.8)
abline(lm(Profit~MTenure),col="red")
plot(CTenure,Profit,main="Profit V/s CTenure",xlab = "MTenure",ylab = "Profit",xlim=c(0,300),cex=0.8)
abline(lm(Profit~CTenure),col="red")
#10. Use R to construct a Correlation Matrix for all the variables in the dataset. (Display the numbers up to 2 Decimal places)
round(cor(store24.df[,c(2:13)]),2)
## Sales Profit MTenure CTenure Pop Comp Visibility PedCount
## Sales 1.00 0.92 0.45 0.25 0.40 -0.24 0.13 0.42
## Profit 0.92 1.00 0.44 0.26 0.43 -0.33 0.14 0.45
## MTenure 0.45 0.44 1.00 0.24 -0.06 0.18 0.16 0.06
## CTenure 0.25 0.26 0.24 1.00 0.00 -0.07 0.07 -0.08
## Pop 0.40 0.43 -0.06 0.00 1.00 -0.27 -0.05 0.61
## Comp -0.24 -0.33 0.18 -0.07 -0.27 1.00 0.03 -0.15
## Visibility 0.13 0.14 0.16 0.07 -0.05 0.03 1.00 -0.14
## PedCount 0.42 0.45 0.06 -0.08 0.61 -0.15 -0.14 1.00
## Res -0.17 -0.16 -0.06 -0.34 -0.24 0.22 0.02 -0.28
## Hours24 0.06 -0.03 -0.17 0.07 -0.22 0.13 0.05 -0.28
## CrewSkill 0.16 0.16 0.10 0.26 0.28 -0.04 -0.20 0.21
## MgrSkill 0.31 0.32 0.23 0.12 0.08 0.22 0.07 0.09
## Res Hours24 CrewSkill MgrSkill
## Sales -0.17 0.06 0.16 0.31
## Profit -0.16 -0.03 0.16 0.32
## MTenure -0.06 -0.17 0.10 0.23
## CTenure -0.34 0.07 0.26 0.12
## Pop -0.24 -0.22 0.28 0.08
## Comp 0.22 0.13 -0.04 0.22
## Visibility 0.02 0.05 -0.20 0.07
## PedCount -0.28 -0.28 0.21 0.09
## Res 1.00 -0.09 -0.15 -0.03
## Hours24 -0.09 1.00 0.11 -0.04
## CrewSkill -0.15 0.11 1.00 -0.02
## MgrSkill -0.03 -0.04 -0.02 1.00
round(cor(Profit,MTenure),2)
## [1] 0.44
round(cor(Profit,CTenure),2)
## [1] 0.26
round(cor(Profit,CTenure),2)
## [1] 0.26
library(corrgram)
corrgram(store24.df, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of store variables")
#14. Run a Pearson’s Correlation test on the correlation between Profit and MTenure. What is the p-value?
cor.test(Profit,MTenure)
##
## Pearson's product-moment correlation
##
## data: Profit and MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
cor.test(Profit,CTenure)
##
## Pearson's product-moment correlation
##
## data: Profit and CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03262507 0.45786339
## sample estimates:
## cor
## 0.2576789
fit<- lm(Profit ~ MTenure + CTenure + Comp + Pop + PedCount + Res + Hours24 + Visibility)
summary(fit)
##
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Comp + Pop + PedCount +
## Res + Hours24 + Visibility)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105789 -35946 -7069 33780 112390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7610.041 66821.994 0.114 0.909674
## MTenure 760.993 127.086 5.988 9.72e-08 ***
## CTenure 944.978 421.687 2.241 0.028400 *
## Comp -25286.887 5491.937 -4.604 1.94e-05 ***
## Pop 3.667 1.466 2.501 0.014890 *
## PedCount 34087.359 9073.196 3.757 0.000366 ***
## Res 91584.675 39231.283 2.334 0.022623 *
## Hours24 63233.307 19641.114 3.219 0.001994 **
## Visibility 12625.447 9087.620 1.389 0.169411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56970 on 66 degrees of freedom
## Multiple R-squared: 0.6379, Adjusted R-squared: 0.594
## F-statistic: 14.53 on 8 and 66 DF, p-value: 5.382e-12
MTenure,Comp,PedCount,Res,Hours,CTenure,Pop
Visibility
Expected change in the Profit at a store, if the Manager’s tenure i.e. number of months of experience with Store24, increases by one month = 760.993
Expected change in the Profit at a store, if the Customer’s tenure i.e. number of months of experience with Store24, increases by one month = 944.978
The scatterplot between Profit and MTenure exhibits a cluster in the range (0 -50) months period. As the experience increases in this range, the profit increases simultaneously. Even at the outliers, more the Tenure of manager, better the profits. The relation between Crew Tenure and Profit is moderately significant as seen by a moderate correlation value.The three site location parameters viz. Number of competitors per 10,000 people within a ½ mile radius(Comp),pedestrian foot traffic volume(PedCount), Indicator for open 24 hours or not(Hours 24) are much significant in decieding the profits. Lower number of competitors, higher trafic, and 24 hours availability are statiscally significant in improving the profits. However, the other 2 site location parameters viz. Population within a ½ mile radius, Indicator for located in residential vs. industrial area, have moderate statistical significance in decieding the overall profit. Visibility of the shop is not statistically significant at all.
The manager must consider spending on wages, bonus, and better training to enhance Manager’s and crew’s tenure, which would result in better profits. Simulataneously, if the store is available for 24 hours, it would have improved profits, as shown by the regression analysis.