This document gives Analysis of the Case Store24 (A): Managing Employee Retention.
setwd("~/Desktop/Data Analytics Internship/Employee Retention")
store <- read.csv(paste("Store24.csv" , sep = ""))
View(store)
library(psych)
describe(store)
## vars n mean sd median trimmed mad
## store 1 75 38.00 21.79 38.00 38.00 28.17
## Sales 2 75 1205413.12 304531.31 1127332.00 1182031.25 288422.04
## Profit 3 75 276313.61 89404.08 265014.00 270260.34 90532.00
## MTenure 4 75 45.30 57.67 24.12 33.58 29.67
## CTenure 5 75 13.93 17.70 7.21 10.60 6.14
## Pop 6 75 9825.59 5911.67 8896.00 9366.07 7266.22
## Comp 7 75 3.79 1.31 3.63 3.66 0.82
## Visibility 8 75 3.08 0.75 3.00 3.07 0.00
## PedCount 9 75 2.96 0.99 3.00 2.97 1.48
## Res 10 75 0.96 0.20 1.00 1.00 0.00
## Hours24 11 75 0.84 0.37 1.00 0.92 0.00
## CrewSkill 12 75 3.46 0.41 3.50 3.47 0.34
## MgrSkill 13 75 3.64 0.41 3.59 3.62 0.45
## ServQual 14 75 87.15 12.61 89.47 88.62 15.61
## min max range skew kurtosis se
## store 1.00 75.00 74.00 0.00 -1.25 2.52
## Sales 699306.00 2113089.00 1413783.00 0.71 -0.09 35164.25
## Profit 122180.00 518998.00 396818.00 0.62 -0.21 10323.49
## MTenure 0.00 277.99 277.99 2.01 3.90 6.66
## CTenure 0.89 114.15 113.26 3.52 15.00 2.04
## Pop 1046.00 26519.00 25473.00 0.62 -0.23 682.62
## Comp 1.65 11.13 9.48 2.48 11.31 0.15
## Visibility 2.00 5.00 3.00 0.25 -0.38 0.09
## PedCount 1.00 5.00 4.00 0.00 -0.52 0.11
## Res 0.00 1.00 1.00 -4.60 19.43 0.02
## Hours24 0.00 1.00 1.00 -1.82 1.32 0.04
## CrewSkill 2.06 4.64 2.58 -0.43 1.64 0.05
## MgrSkill 2.96 4.62 1.67 0.27 -0.53 0.05
## ServQual 57.90 100.00 42.10 -0.66 -0.72 1.46
library(psych)
describe(store)
## vars n mean sd median trimmed mad
## store 1 75 38.00 21.79 38.00 38.00 28.17
## Sales 2 75 1205413.12 304531.31 1127332.00 1182031.25 288422.04
## Profit 3 75 276313.61 89404.08 265014.00 270260.34 90532.00
## MTenure 4 75 45.30 57.67 24.12 33.58 29.67
## CTenure 5 75 13.93 17.70 7.21 10.60 6.14
## Pop 6 75 9825.59 5911.67 8896.00 9366.07 7266.22
## Comp 7 75 3.79 1.31 3.63 3.66 0.82
## Visibility 8 75 3.08 0.75 3.00 3.07 0.00
## PedCount 9 75 2.96 0.99 3.00 2.97 1.48
## Res 10 75 0.96 0.20 1.00 1.00 0.00
## Hours24 11 75 0.84 0.37 1.00 0.92 0.00
## CrewSkill 12 75 3.46 0.41 3.50 3.47 0.34
## MgrSkill 13 75 3.64 0.41 3.59 3.62 0.45
## ServQual 14 75 87.15 12.61 89.47 88.62 15.61
## min max range skew kurtosis se
## store 1.00 75.00 74.00 0.00 -1.25 2.52
## Sales 699306.00 2113089.00 1413783.00 0.71 -0.09 35164.25
## Profit 122180.00 518998.00 396818.00 0.62 -0.21 10323.49
## MTenure 0.00 277.99 277.99 2.01 3.90 6.66
## CTenure 0.89 114.15 113.26 3.52 15.00 2.04
## Pop 1046.00 26519.00 25473.00 0.62 -0.23 682.62
## Comp 1.65 11.13 9.48 2.48 11.31 0.15
## Visibility 2.00 5.00 3.00 0.25 -0.38 0.09
## PedCount 1.00 5.00 4.00 0.00 -0.52 0.11
## Res 0.00 1.00 1.00 -4.60 19.43 0.02
## Hours24 0.00 1.00 1.00 -1.82 1.32 0.04
## CrewSkill 2.06 4.64 2.58 -0.43 1.64 0.05
## MgrSkill 2.96 4.62 1.67 0.27 -0.53 0.05
## ServQual 57.90 100.00 42.10 -0.66 -0.72 1.46
attach(store)
## The following object is masked _by_ .GlobalEnv:
##
## store
mean(store$Profit)
## [1] 276313.6
sd(store$Profit)
## [1] 89404.08
mean(store$MTenure)
## [1] 45.29644
sd(store$MTenure)
## [1] 57.67155
mean(store$CTenure)
## [1] 13.9315
sd(store$CTenure)
## [1] 17.69752
attach(mtcars)
View(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg (ascending)
View(newdata)
newdata[1:5,] # see the first 5 rows
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
newdata <- mtcars[order(-mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)
mp<- store[order(store$Profit), ]
mp[1:10, 1:5]
## store Sales Profit MTenure CTenure
## 57 57 699306 122180 24.3485700 2.956879
## 66 66 879581 146058 115.2039000 3.876797
## 41 41 744211 147327 14.9180200 11.926080
## 55 55 925744 147672 6.6703910 18.365500
## 32 32 828918 149033 36.0792600 6.636550
## 13 13 857843 152513 0.6571813 1.577002
## 54 54 811190 159792 6.6703910 3.876797
## 52 52 1073008 169201 24.1185600 3.416838
## 61 61 716589 177046 21.8184200 13.305950
## 37 37 1202917 187765 23.1985000 1.347023
lp<- store[order(-store$Profit), ]
lp[1:10, 1:5]
## store Sales Profit MTenure CTenure
## 74 74 1782957 518998 171.09720 29.519510
## 7 7 1809256 476355 62.53080 7.326488
## 9 9 2113089 474725 108.99350 6.061602
## 6 6 1703140 469050 149.93590 11.351130
## 44 44 1807740 439781 182.23640 114.151900
## 2 2 1619874 424007 86.22219 6.636550
## 45 45 1602362 410149 47.64565 9.166325
## 18 18 1704826 394039 239.96980 33.774130
## 11 11 1583446 389886 44.81977 2.036961
## 47 47 1665657 387853 12.84790 6.636550
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(Profit~MTenure, data=store,
xlab="MTenure", ylab="Profit",
main="Scatterplot of MTenure v/s Profit")
scatterplot(Profit~CTenure, data=store,
xlab="CTenure", ylab="Profit",
main="Scatterplot of CTenure v/s Profit")
options(digits=2)
cor(store)
## store Sales Profit MTenure CTenure Pop Comp Visibility
## store 1.000 -0.227 -0.200 -0.057 0.0199 -0.2894 0.032 -0.026
## Sales -0.227 1.000 0.924 0.455 0.2543 0.4035 -0.235 0.131
## Profit -0.200 0.924 1.000 0.439 0.2577 0.4306 -0.335 0.136
## MTenure -0.057 0.455 0.439 1.000 0.2434 -0.0609 0.181 0.157
## CTenure 0.020 0.254 0.258 0.243 1.0000 -0.0015 -0.070 0.067
## Pop -0.289 0.403 0.431 -0.061 -0.0015 1.0000 -0.268 -0.050
## Comp 0.032 -0.235 -0.335 0.181 -0.0703 -0.2683 1.000 0.028
## Visibility -0.026 0.131 0.136 0.157 0.0665 -0.0500 0.028 1.000
## PedCount -0.221 0.424 0.450 0.062 -0.0841 0.6076 -0.146 -0.141
## Res -0.031 -0.167 -0.159 -0.062 -0.3403 -0.2369 0.219 0.022
## Hours24 0.027 0.063 -0.026 -0.165 0.0729 -0.2218 0.130 0.047
## CrewSkill 0.049 0.164 0.160 0.102 0.2572 0.2828 -0.042 -0.197
## MgrSkill -0.072 0.312 0.323 0.230 0.1240 0.0836 0.224 0.073
## ServQual -0.322 0.386 0.362 0.182 0.0812 0.1239 0.018 0.210
## PedCount Res Hours24 CrewSkill MgrSkill ServQual
## store -0.2212 -0.031 0.027 0.049 -0.072 -0.3225
## Sales 0.4239 -0.167 0.063 0.164 0.312 0.3864
## Profit 0.4502 -0.159 -0.026 0.160 0.323 0.3625
## MTenure 0.0620 -0.062 -0.165 0.102 0.230 0.1817
## CTenure -0.0841 -0.340 0.073 0.257 0.124 0.0812
## Pop 0.6076 -0.237 -0.222 0.283 0.084 0.1239
## Comp -0.1463 0.219 0.130 -0.042 0.224 0.0181
## Visibility -0.1411 0.022 0.047 -0.197 0.073 0.2099
## PedCount 1.0000 -0.284 -0.276 0.214 0.087 -0.0054
## Res -0.2844 1.000 -0.089 -0.153 -0.032 0.0908
## Hours24 -0.2760 -0.089 1.000 0.105 -0.039 0.0583
## CrewSkill 0.2137 -0.153 0.105 1.000 -0.021 -0.0335
## MgrSkill 0.0875 -0.032 -0.039 -0.021 1.000 0.3567
## ServQual -0.0054 0.091 0.058 -0.034 0.357 1.0000
cor(store$Profit, store$MTenure)
## [1] 0.44
cor(store$Profit, store$CTenure)
## [1] 0.26
library(corrgram)
## Warning: replacing previous import by 'magrittr::%>%' when loading
## 'dendextend'
corrgram(store,order=TRUE, lower.panel = panel.shade,
upper.panel = panel.pie, text.panel = panel.txt,
main="Corrgram of store24 dataset")
cor.test(store$Profit, store$MTenure, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: store$Profit and store$MTenure
## t = 4, df = 70, p-value = 8e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.24 0.61
## sample estimates:
## cor
## 0.44
P-value : 8e-05
cor.test(store$Profit, store$CTenure, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: store$Profit and store$CTenure
## t = 2, df = 70, p-value = 0.03
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.033 0.458
## sample estimates:
## cor
## 0.26
P-value: 0.03
rp<- lm(Profit~MTenure+CTenure+Comp+Pop+PedCount+Res+Hours24+Visibility, data=store)
summary(rp)
##
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Comp + Pop + PedCount +
## Res + Hours24 + Visibility, data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105789 -35946 -7069 33780 112390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7610.04 66821.99 0.11 0.90967
## MTenure 760.99 127.09 5.99 9.7e-08 ***
## CTenure 944.98 421.69 2.24 0.02840 *
## Comp -25286.89 5491.94 -4.60 1.9e-05 ***
## Pop 3.67 1.47 2.50 0.01489 *
## PedCount 34087.36 9073.20 3.76 0.00037 ***
## Res 91584.68 39231.28 2.33 0.02262 *
## Hours24 63233.31 19641.11 3.22 0.00199 **
## Visibility 12625.45 9087.62 1.39 0.16941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57000 on 66 degrees of freedom
## Multiple R-squared: 0.638, Adjusted R-squared: 0.594
## F-statistic: 14.5 on 8 and 66 DF, p-value: 5.38e-12
MTenure, CTenure, Comp, Pop, PedCount, Res, Hours24
Visibility
From the above analysis, we can decipher that if the Manager’s tenure is increased by a month, the profit changes by $760.99.
From the above analysis, we can decipher that if the Crew’s tenure is increased by a month, the profit changes by $944.98.
a)The most profitable store is with ID:74 and the least profitable store is :57
b)The correlation between Profit and MTenure are 0.44 while of that between Profit and CTenure is 0.26.
c)Pearson coefficient suggests that the value of p<0.05 which means the hypothesis is true.
d)The regression coefficient suggests that the value of p is significiant which says it is a good fit model.
e)R square value is:0.6379.It means that 63.79% of variations in the dependent variable can be explained by the independent variable.
f)Adjusted R square value is 0.594.It means 59.4% variation in the dependent variable can be explained by the independent variable also the value decreases as we add no of independent variables to it.
g)Explanatory variable(s) whose beta-coefficients are statistically significant are - MTenure , CTenure, Pop , PedCount , Res , Hours24 while that whose beta-coefficients are not statistically significant is the Visibility variable.
So as an overall analysis we can say it is a good model to fit in.