Retention Data Analysis

Including Plots

You can also embed plots, for example:

mean(store$Profit)

## [1] 276313.6

sd(store$Profit)

## [1] 89404.08

mean(store$MTenure)

## [1] 45.29644

sd(store$MTenure)

## [1] 57.67155

mean(store$CTenure)

## [1] 13.9315

sd(store$CTenure)

## [1] 17.69752

attach(mtcars)
newdata <- mtcars[order(mtcars$mpg),] # sort by mpg (ascending)
newdata[1:5,] # see the first 5 rows

##                      mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood  10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## Camaro Z28          13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## Duster 360          14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial   14.7   8  440 230 3.23 5.345 17.42  0  0    3    4

newdata <- mtcars[order(-mtcars$mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)

profitable_most <- store[order(-store$Profit),]
profitable_most[1:10,1:5]

##    store   Sales Profit   MTenure    CTenure
## 74    74 1782957 518998 171.09720  29.519510
## 7      7 1809256 476355  62.53080   7.326488
## 9      9 2113089 474725 108.99350   6.061602
## 6      6 1703140 469050 149.93590  11.351130
## 44    44 1807740 439781 182.23640 114.151900
## 2      2 1619874 424007  86.22219   6.636550
## 45    45 1602362 410149  47.64565   9.166325
## 18    18 1704826 394039 239.96980  33.774130
## 11    11 1583446 389886  44.81977   2.036961
## 47    47 1665657 387853  12.84790   6.636550

profitable_least <- store[order(store$Profit),]
profitable_least[1:10,1:5]

##    store   Sales Profit     MTenure   CTenure
## 57    57  699306 122180  24.3485700  2.956879
## 66    66  879581 146058 115.2039000  3.876797
## 41    41  744211 147327  14.9180200 11.926080
## 55    55  925744 147672   6.6703910 18.365500
## 32    32  828918 149033  36.0792600  6.636550
## 13    13  857843 152513   0.6571813  1.577002
## 54    54  811190 159792   6.6703910  3.876797
## 52    52 1073008 169201  24.1185600  3.416838
## 61    61  716589 177046  21.8184200 13.305950
## 37    37 1202917 187765  23.1985000  1.347023

plot(store$MTenure,store$Profit,xlab="MTenure",ylab="Profit",main="ScatterPlot of Profit vs MTenure")
abline(lm(store$Profit~store$MTenure),col="green")

plot(store$CTenure,store$Profit,xlab="CTenure",ylab="Profit",main="ScatterPlot of Profit vs CTenure")
abline(lm(store$Profit~store$CTenure),col="green")

store3 <- cor(store)
round(store3,2)

##            store Sales Profit MTenure CTenure   Pop  Comp Visibility
## store       1.00 -0.23  -0.20   -0.06    0.02 -0.29  0.03      -0.03
## Sales      -0.23  1.00   0.92    0.45    0.25  0.40 -0.24       0.13
## Profit     -0.20  0.92   1.00    0.44    0.26  0.43 -0.33       0.14
## MTenure    -0.06  0.45   0.44    1.00    0.24 -0.06  0.18       0.16
## CTenure     0.02  0.25   0.26    0.24    1.00  0.00 -0.07       0.07
## Pop        -0.29  0.40   0.43   -0.06    0.00  1.00 -0.27      -0.05
## Comp        0.03 -0.24  -0.33    0.18   -0.07 -0.27  1.00       0.03
## Visibility -0.03  0.13   0.14    0.16    0.07 -0.05  0.03       1.00
## PedCount   -0.22  0.42   0.45    0.06   -0.08  0.61 -0.15      -0.14
## Res        -0.03 -0.17  -0.16   -0.06   -0.34 -0.24  0.22       0.02
## Hours24     0.03  0.06  -0.03   -0.17    0.07 -0.22  0.13       0.05
## CrewSkill   0.05  0.16   0.16    0.10    0.26  0.28 -0.04      -0.20
## MgrSkill   -0.07  0.31   0.32    0.23    0.12  0.08  0.22       0.07
## ServQual   -0.32  0.39   0.36    0.18    0.08  0.12  0.02       0.21
##            PedCount   Res Hours24 CrewSkill MgrSkill ServQual
## store         -0.22 -0.03    0.03      0.05    -0.07    -0.32
## Sales          0.42 -0.17    0.06      0.16     0.31     0.39
## Profit         0.45 -0.16   -0.03      0.16     0.32     0.36
## MTenure        0.06 -0.06   -0.17      0.10     0.23     0.18
## CTenure       -0.08 -0.34    0.07      0.26     0.12     0.08
## Pop            0.61 -0.24   -0.22      0.28     0.08     0.12
## Comp          -0.15  0.22    0.13     -0.04     0.22     0.02
## Visibility    -0.14  0.02    0.05     -0.20     0.07     0.21
## PedCount       1.00 -0.28   -0.28      0.21     0.09    -0.01
## Res           -0.28  1.00   -0.09     -0.15    -0.03     0.09
## Hours24       -0.28 -0.09    1.00      0.11    -0.04     0.06
## CrewSkill      0.21 -0.15    0.11      1.00    -0.02    -0.03
## MgrSkill       0.09 -0.03   -0.04     -0.02     1.00     0.36
## ServQual      -0.01  0.09    0.06     -0.03     0.36     1.00

round(cor(store$Profit,store$MTenure),2)

## [1] 0.44

round(cor(store$Profit,store$CTenure),2)

## [1] 0.26

library(corrgram)

## Warning: package 'corrgram' was built under R version 3.4.3

corrgram(store,order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,main="Corrgram of store dataset")

 cor.test(store$Profit,store$MTenure)

## 
##  Pearson's product-moment correlation
## 
## data:  store$Profit and store$MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2353497 0.6055175
## sample estimates:
##       cor 
## 0.4388692

#p-value is 8.193e-05 which is strictly less than 0.05.hence there is strong correlation

 cor.test(store$Profit,store$CTenure)

## 
##  Pearson's product-moment correlation
## 
## data:  store$Profit and store$CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03262507 0.45786339
## sample estimates:
##       cor 
## 0.2576789

#p value is 0.02562 which is less than 0.05 hence we reject the null hypothesis

fit <- lm(Profit~MTenure+CTenure+Comp+Pop+PedCount+Res+Hours24+Visibility,data = store)
summary(fit)

## 
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Comp + Pop + PedCount + 
##     Res + Hours24 + Visibility, data = store)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -105789  -35946   -7069   33780  112390 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7610.041  66821.994   0.114 0.909674    
## MTenure        760.993    127.086   5.988 9.72e-08 ***
## CTenure        944.978    421.687   2.241 0.028400 *  
## Comp        -25286.887   5491.937  -4.604 1.94e-05 ***
## Pop              3.667      1.466   2.501 0.014890 *  
## PedCount     34087.359   9073.196   3.757 0.000366 ***
## Res          91584.675  39231.283   2.334 0.022623 *  
## Hours24      63233.307  19641.114   3.219 0.001994 ** 
## Visibility   12625.447   9087.620   1.389 0.169411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56970 on 66 degrees of freedom
## Multiple R-squared:  0.6379, Adjusted R-squared:  0.594 
## F-statistic: 14.53 on 8 and 66 DF,  p-value: 5.382e-12

task 2n based on the above regression analysis the variables whose beta componenets are statistically significant are MTenure,CTenure,Comp,Pop,PedCount,Res,Hours24 the variables whose beta components statistically insignificant are/is Visibility. task 2o the expected change in profit if the MTenure increases by one month is 760.993. the expected change in profit if the CTenure increases by one month is 944.978.

## Executive Summary

The above analysis was done to find the effect on the profit of different stores based on certain factors such as Manager Tenure in months,Crew tenure in Months ,Visibility of store on a scale of 1-5,Competition to the store per 100000 people,Population around the store,whether the store was 24hours open or not,whether the store was in residential or industrial area,5-point rating on pedestrian foot traffic volume with 5 being the highest.it was found that the most dominant factor was the managers tenure.larger the managers tenure,the store was expected to have the larger profit.Also Competition that is no of different stores within 1/2 mile radius was also a popular factor but had a negative correlation i.e with increase in competition the store was expected to ern lesser profit.Pedestrian Count i.e 5-point rating on pedestrian foot traffic volume with 5 being the highest is also a huge factor as it largely contributes to the growth and profit of the store.the more the pedestrains,the better the profit for the store.So the main factors were these three.other significant facotrs include Hours24 that is whether the store was open whole day or not.the store benefits if it is open for the whole day.Res and CTenure also affected the profit thought not that much.SO the stores location that is residential or industrial are also has a say on its profit and likelise the tenure of its crew members.LAstly,one important thing that was found is that Visible i.e 5-point rating on visibility of store front with 5 being the highest does not affect the stores profit largely.Whether the store is clearly visible or not visible does not have a large say on its profit.

Retention Data Analysis

Namit Garg

18 December 2017

R Markdown

Including Plots