TASK 4: Analysis of the Case Store24 (A): Managing Employee Retention
Task 4a: Important questions that matter:
Task 4b. Done
Task 4c. Loading Store24.csv in R
setwd("c:/office/Week 3 Day 1")
store.df <- read.csv(paste("Store24.csv"), sep=",")
summary(store.df)
## store Sales Profit MTenure
## Min. : 1.0 Min. : 699306 Min. :122180 Min. : 0.00
## 1st Qu.:19.5 1st Qu.: 984579 1st Qu.:211004 1st Qu.: 6.67
## Median :38.0 Median :1127332 Median :265014 Median : 24.12
## Mean :38.0 Mean :1205413 Mean :276314 Mean : 45.30
## 3rd Qu.:56.5 3rd Qu.:1362388 3rd Qu.:331314 3rd Qu.: 50.92
## Max. :75.0 Max. :2113089 Max. :518998 Max. :277.99
## CTenure Pop Comp Visibility
## Min. : 0.8871 Min. : 1046 Min. : 1.651 Min. :2.00
## 1st Qu.: 4.3943 1st Qu.: 5616 1st Qu.: 3.151 1st Qu.:3.00
## Median : 7.2115 Median : 8896 Median : 3.629 Median :3.00
## Mean : 13.9315 Mean : 9826 Mean : 3.788 Mean :3.08
## 3rd Qu.: 17.2156 3rd Qu.:14104 3rd Qu.: 4.230 3rd Qu.:4.00
## Max. :114.1519 Max. :26519 Max. :11.128 Max. :5.00
## PedCount Res Hours24 CrewSkill
## Min. :1.00 Min. :0.00 Min. :0.00 Min. :2.060
## 1st Qu.:2.00 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:3.225
## Median :3.00 Median :1.00 Median :1.00 Median :3.500
## Mean :2.96 Mean :0.96 Mean :0.84 Mean :3.457
## 3rd Qu.:4.00 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:3.655
## Max. :5.00 Max. :1.00 Max. :1.00 Max. :4.640
## MgrSkill ServQual
## Min. :2.957 Min. : 57.90
## 1st Qu.:3.344 1st Qu.: 78.95
## Median :3.589 Median : 89.47
## Mean :3.638 Mean : 87.15
## 3rd Qu.:3.925 3rd Qu.: 99.90
## Max. :4.622 Max. :100.00
Task 4d. Use R to measure the mean and standard deviation of Profit.
library(psych)
describe(store.df$Profit)
## vars n mean sd median trimmed mad min max range
## X1 1 75 276313.6 89404.08 265014 270260.3 90532 122180 518998 396818
## skew kurtosis se
## X1 0.62 -0.21 10323.49
Use R to measure the mean and standard deviation of MTenure
describe(store.df$MTenure)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 75 45.3 57.67 24.12 33.58 29.67 0 277.99 277.99 2.01 3.9
## se
## X1 6.66
Use R to measure the mean and standard deviation of CTenure
describe(store.df$CTenure)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 75 13.93 17.7 7.21 10.6 6.14 0.89 114.15 113.26 3.52 15
## se
## X1 2.04
Task 4e. Sorting and Subsetting in R
attach(mtcars)
View(mtcars)
newdata <-mtcars[order(mpg),]
View(newdata)
newdata <-mtcars[order(-mpg),]
View(newdata)
detach(mtcars)
Task 4f: 4.Top 10 most profitable stores
Profitable_stores <-store.df[order(-store.df$Profit),]
Profitable_stores[1:10,]
## store Sales Profit MTenure CTenure Pop Comp Visibility
## 74 74 1782957 518998 171.09720 29.519510 10913 2.319850 3
## 7 7 1809256 476355 62.53080 7.326488 17754 3.377900 2
## 9 9 2113089 474725 108.99350 6.061602 26519 2.637630 2
## 6 6 1703140 469050 149.93590 11.351130 16926 3.184613 3
## 44 44 1807740 439781 182.23640 114.151900 20624 3.628561 3
## 2 2 1619874 424007 86.22219 6.636550 8630 4.235555 4
## 45 45 1602362 410149 47.64565 9.166325 17808 3.472609 5
## 18 18 1704826 394039 239.96980 33.774130 3807 3.994713 5
## 11 11 1583446 389886 44.81977 2.036961 21550 3.272398 2
## 47 47 1665657 387853 12.84790 6.636550 23623 2.422707 2
## PedCount Res Hours24 CrewSkill MgrSkill ServQual
## 74 4 1 0 3.50 4.405556 94.73878
## 7 5 1 1 3.94 4.100000 81.57837
## 9 4 1 1 3.22 3.583333 100.00000
## 6 4 1 0 3.58 4.605556 94.73510
## 44 4 0 1 4.06 4.172222 86.84327
## 2 3 1 1 3.20 3.556667 94.73510
## 45 3 1 1 3.58 4.622222 100.00000
## 18 3 1 1 3.18 3.866667 97.36939
## 11 5 1 1 3.43 3.200000 100.00000
## 47 5 1 1 4.23 3.950000 99.80105
View(Profitable_stores)
Profitable_stores <-store.df[order(store.df$Profit),]
Profitable_stores[1:10,]
## store Sales Profit MTenure CTenure Pop Comp Visibility
## 57 57 699306 122180 24.3485700 2.956879 3642 2.973376 3
## 66 66 879581 146058 115.2039000 3.876797 1046 6.569790 2
## 41 41 744211 147327 14.9180200 11.926080 9701 4.364600 2
## 55 55 925744 147672 6.6703910 18.365500 10532 6.389294 4
## 32 32 828918 149033 36.0792600 6.636550 9697 4.641468 3
## 13 13 857843 152513 0.6571813 1.577002 14186 4.435671 3
## 54 54 811190 159792 6.6703910 3.876797 3747 3.756011 3
## 52 52 1073008 169201 24.1185600 3.416838 14859 6.585143 3
## 61 61 716589 177046 21.8184200 13.305950 3014 3.263994 3
## 37 37 1202917 187765 23.1985000 1.347023 8870 4.491863 3
## PedCount Res Hours24 CrewSkill MgrSkill ServQual
## 57 2 1 1 3.35 2.956667 84.21266
## 66 3 1 1 4.03 3.673333 80.26675
## 41 3 1 1 3.03 3.672222 81.13993
## 55 3 1 1 3.49 3.477778 76.31346
## 32 3 1 0 3.28 3.550000 73.68654
## 13 2 1 1 4.10 3.000000 76.30609
## 54 2 1 1 3.08 3.933333 65.78734
## 52 3 1 1 3.83 3.833333 94.73510
## 61 1 1 1 3.07 3.126667 73.68654
## 37 3 1 1 3.38 4.016667 73.68654
View(Profitable_stores)
Task 4g. scatter plots of Profit vs MTenure
plot( Profitable_stores$MTenure,Profitable_stores$Profit, main="ScatterPlot of Profit Vs MTenure",xlab= "MTenure", ylab="Profit",)
Task 4h:scatter plot of Profit vs. CTenure.
plot( Profitable_stores$CTenure,Profitable_stores$Profit, main="ScatterPlot of Profit Vs CTenure",xlab= "CTenure", ylab="Profit",)
Task 4i:
Use R to construct a Correlation Matrix for all the variables in the datase
round(cor(store.df),2)
## store Sales Profit MTenure CTenure Pop Comp Visibility
## store 1.00 -0.23 -0.20 -0.06 0.02 -0.29 0.03 -0.03
## Sales -0.23 1.00 0.92 0.45 0.25 0.40 -0.24 0.13
## Profit -0.20 0.92 1.00 0.44 0.26 0.43 -0.33 0.14
## MTenure -0.06 0.45 0.44 1.00 0.24 -0.06 0.18 0.16
## CTenure 0.02 0.25 0.26 0.24 1.00 0.00 -0.07 0.07
## Pop -0.29 0.40 0.43 -0.06 0.00 1.00 -0.27 -0.05
## Comp 0.03 -0.24 -0.33 0.18 -0.07 -0.27 1.00 0.03
## Visibility -0.03 0.13 0.14 0.16 0.07 -0.05 0.03 1.00
## PedCount -0.22 0.42 0.45 0.06 -0.08 0.61 -0.15 -0.14
## Res -0.03 -0.17 -0.16 -0.06 -0.34 -0.24 0.22 0.02
## Hours24 0.03 0.06 -0.03 -0.17 0.07 -0.22 0.13 0.05
## CrewSkill 0.05 0.16 0.16 0.10 0.26 0.28 -0.04 -0.20
## MgrSkill -0.07 0.31 0.32 0.23 0.12 0.08 0.22 0.07
## ServQual -0.32 0.39 0.36 0.18 0.08 0.12 0.02 0.21
## PedCount Res Hours24 CrewSkill MgrSkill ServQual
## store -0.22 -0.03 0.03 0.05 -0.07 -0.32
## Sales 0.42 -0.17 0.06 0.16 0.31 0.39
## Profit 0.45 -0.16 -0.03 0.16 0.32 0.36
## MTenure 0.06 -0.06 -0.17 0.10 0.23 0.18
## CTenure -0.08 -0.34 0.07 0.26 0.12 0.08
## Pop 0.61 -0.24 -0.22 0.28 0.08 0.12
## Comp -0.15 0.22 0.13 -0.04 0.22 0.02
## Visibility -0.14 0.02 0.05 -0.20 0.07 0.21
## PedCount 1.00 -0.28 -0.28 0.21 0.09 -0.01
## Res -0.28 1.00 -0.09 -0.15 -0.03 0.09
## Hours24 -0.28 -0.09 1.00 0.11 -0.04 0.06
## CrewSkill 0.21 -0.15 0.11 1.00 -0.02 -0.03
## MgrSkill 0.09 -0.03 -0.04 -0.02 1.00 0.36
## ServQual -0.01 0.09 0.06 -0.03 0.36 1.00
Task 4j: Correlation 9. R to measure the correlation between Profit and MTenure. (Display the numbers up to 2 Decimal places)
round(cor(store.df$Profit, store.df$MTenure), 2)
## [1] 0.44
round(cor(store.df$Profit, store.df$CTenure), 2)
## [1] 0.26
Task 4k.
11.Use R to construct the following Corrgram based on all variables in the dataset.
library("corrgram")
corrgram(store.df, main="Corrgram of Store Variables")
TASK 4l - Pearson’s Correlation Tests
Run a Pearson’s Correlation test on the correlation between Profit and MTenure. What is the p-value?
cor.test(store.df$Profit, store.df$MTenure)
##
## Pearson's product-moment correlation
##
## data: store.df$Profit and store.df$MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
p value = 8.193e-05
cor.test(store.df$Profit, store.df$MTenure)
##
## Pearson's product-moment correlation
##
## data: store.df$Profit and store.df$MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
p- value= 8.193e-05
TASK 3m - Regression Analysis
Run a regression of Profit on {MTenure, CTenure Comp, Pop, PedCount, Res, Hours24, Visibility}.
store<- lm(store.df$Profit~store.df$MTenure+store.df$CTenure+store.df$Comp+store.df$Pop+store.df$PedCount+store.df$Res+store.df$Hours24+store.df$Visibility)
summary(store)
##
## Call:
## lm(formula = store.df$Profit ~ store.df$MTenure + store.df$CTenure +
## store.df$Comp + store.df$Pop + store.df$PedCount + store.df$Res +
## store.df$Hours24 + store.df$Visibility)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105789 -35946 -7069 33780 112390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7610.041 66821.994 0.114 0.909674
## store.df$MTenure 760.993 127.086 5.988 9.72e-08 ***
## store.df$CTenure 944.978 421.687 2.241 0.028400 *
## store.df$Comp -25286.887 5491.937 -4.604 1.94e-05 ***
## store.df$Pop 3.667 1.466 2.501 0.014890 *
## store.df$PedCount 34087.359 9073.196 3.757 0.000366 ***
## store.df$Res 91584.675 39231.283 2.334 0.022623 *
## store.df$Hours24 63233.307 19641.114 3.219 0.001994 **
## store.df$Visibility 12625.447 9087.620 1.389 0.169411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56970 on 66 degrees of freedom
## Multiple R-squared: 0.6379, Adjusted R-squared: 0.594
## F-statistic: 14.53 on 8 and 66 DF, p-value: 5.382e-12
Task 4o. 17. What is expected change in the Profit at a store, if the Manager’s tenure i.e. number of months of experience with Store24, increases by one month?
predict(store)
## 1 2 3 4 5 6 7 8
## 282884.6 311616.6 247387.2 188867.1 308773.0 379779.2 392304.9 371985.2
## 9 10 11 12 13 14 15 16
## 443237.0 300474.6 390414.7 420779.0 210319.6 268639.8 279296.3 202381.0
## 17 18 19 20 21 22 23 24
## 352534.2 455293.3 256081.6 275088.3 277490.0 271166.4 309003.2 214340.6
## 25 26 27 28 29 30 31 32
## 246051.2 219299.0 258929.7 280699.0 210844.3 260034.8 197082.6 191247.4
## 33 34 35 36 37 38 39 40
## 207234.6 370486.2 318628.6 232328.1 240430.8 199026.7 260630.9 173787.2
## 41 42 43 44 45 46 47 48
## 237766.0 277755.6 375932.0 475485.8 350220.8 279391.3 399517.8 208750.4
## 49 50 51 52 53 54 55 56
## 215972.9 307812.7 282907.8 212113.7 252711.1 195979.6 214674.3 167063.9
## 57 58 59 60 61 62 63 64
## 227968.7 218550.3 265067.8 331875.7 192084.1 218925.7 238526.9 318618.1
## 65 66 67 68 69 70 71 72
## 293397.2 218979.5 261546.3 240964.4 280082.4 282110.4 205893.0 262434.7
## 73 74 75
## 269862.0 412871.4 252828.2