Store.df <- read.csv("E:/Documents/internship-R/Store.df.csv")
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
View(Store.df)
describe(Store.df)
## vars n mean sd median trimmed mad
## store 1 75 38.00 21.79 38.00 38.00 28.17
## Sales 2 75 1205413.12 304531.31 1127332.00 1182031.25 288422.04
## Profit 3 75 276313.61 89404.08 265014.00 270260.34 90532.00
## MTenure 4 75 45.30 57.67 24.12 33.58 29.67
## CTenure 5 75 13.93 17.70 7.21 10.60 6.14
## Pop 6 75 9825.59 5911.67 8896.00 9366.07 7266.22
## Comp 7 75 3.79 1.31 3.63 3.66 0.82
## Visibility 8 75 3.08 0.75 3.00 3.07 0.00
## PedCount 9 75 2.96 0.99 3.00 2.97 1.48
## Res 10 75 0.96 0.20 1.00 1.00 0.00
## Hours24 11 75 0.84 0.37 1.00 0.92 0.00
## CrewSkill 12 75 3.46 0.41 3.50 3.47 0.34
## MgrSkill 13 75 3.64 0.41 3.59 3.62 0.45
## ServQual 14 75 87.15 12.61 89.47 88.62 15.61
## min max range skew kurtosis se
## store 1.00 75.00 74.00 0.00 -1.25 2.52
## Sales 699306.00 2113089.00 1413783.00 0.71 -0.09 35164.25
## Profit 122180.00 518998.00 396818.00 0.62 -0.21 10323.49
## MTenure 0.00 277.99 277.99 2.01 3.90 6.66
## CTenure 0.89 114.15 113.26 3.52 15.00 2.04
## Pop 1046.00 26519.00 25473.00 0.62 -0.23 682.62
## Comp 1.65 11.13 9.48 2.48 11.31 0.15
## Visibility 2.00 5.00 3.00 0.25 -0.38 0.09
## PedCount 1.00 5.00 4.00 0.00 -0.52 0.11
## Res 0.00 1.00 1.00 -4.60 19.43 0.02
## Hours24 0.00 1.00 1.00 -1.82 1.32 0.04
## CrewSkill 2.06 4.64 2.58 -0.43 1.64 0.05
## MgrSkill 2.96 4.62 1.67 0.27 -0.53 0.05
## ServQual 57.90 100.00 42.10 -0.66 -0.72 1.46
mean and standard deviation of Profit
mean(Store.df$Profit)
## [1] 276313.6
sd(Store.df$Profit)
## [1] 89404.08
mean and standard deviation of MTenure
mean(Store.df$MTenure)
## [1] 45.29644
sd(Store.df$MTenure)
## [1] 57.67155
mean and standard deviation of CTenure
mean(Store.df$CTenure)
## [1] 13.9315
sd(Store.df$CTenure)
## [1] 17.69752
Print the {StoreID, Sales, Profit, MTenure, CTenure} of the top 10 most profitable stores
attach(Store.df)
storeasc <- Store.df[order(-Profit),]
storeasc[1:10,1:5]
## store Sales Profit MTenure CTenure
## 74 74 1782957 518998 171.09720 29.519510
## 7 7 1809256 476355 62.53080 7.326488
## 9 9 2113089 474725 108.99350 6.061602
## 6 6 1703140 469050 149.93590 11.351130
## 44 44 1807740 439781 182.23640 114.151900
## 2 2 1619874 424007 86.22219 6.636550
## 45 45 1602362 410149 47.64565 9.166325
## 18 18 1704826 394039 239.96980 33.774130
## 11 11 1583446 389886 44.81977 2.036961
## 47 47 1665657 387853 12.84790 6.636550
Print the {StoreID, Sales, Profit, MTenure, CTenure} of the top 10 least profitable stores
attach(Store.df)
## The following objects are masked from Store.df (pos = 3):
##
## Comp, CrewSkill, CTenure, Hours24, MgrSkill, MTenure,
## PedCount, Pop, Profit, Res, Sales, ServQual, store, Visibility
storedec <- Store.df[order(Profit),]
storedec[1:10,1:5]
## store Sales Profit MTenure CTenure
## 57 57 699306 122180 24.3485700 2.956879
## 66 66 879581 146058 115.2039000 3.876797
## 41 41 744211 147327 14.9180200 11.926080
## 55 55 925744 147672 6.6703910 18.365500
## 32 32 828918 149033 36.0792600 6.636550
## 13 13 857843 152513 0.6571813 1.577002
## 54 54 811190 159792 6.6703910 3.876797
## 52 52 1073008 169201 24.1185600 3.416838
## 61 61 716589 177046 21.8184200 13.305950
## 37 37 1202917 187765 23.1985000 1.347023
Draw a scatter plot of Profit vs. MTenure
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(Profit ~ MTenure,data = Store.df)
Draw a scatter plot of Profit vs. CTenure
library(car)
scatterplot(Profit ~ CTenure,data = Store.df)
measure the correlation between Profit and MTenure
round(cor(Store.df$Profit,Store.df$MTenure),digits = 2)
## [1] 0.44
measure the correlation between Profit and CTenure
round(cor(Store.df$Profit,Store.df$CTenure),digits = 2)
## [1] 0.26
Run a Pearson’s Correlation test on the correlation between Profit and MTenure
cor.test(Store.df$Profit,Store.df$MTenure,method = "pearson")
##
## Pearson's product-moment correlation
##
## data: Store.df$Profit and Store.df$MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
p-vlaue= 8.193e-05 .This implies that p-value is much smaller(<0.05).So we reject the null hypothesis,which implies that Profit and MTenure have a relation.
Run a Pearson’s Correlation test on the correlation between Profit and CTenure
cor.test(Store.df$Profit,Store.df$CTenure,method = "pearson")
##
## Pearson's product-moment correlation
##
## data: Store.df$Profit and Store.df$CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03262507 0.45786339
## sample estimates:
## cor
## 0.2576789
p-vlaue= 0.02562 .This implies that p-value is smaller(<0.05).So reject the null hypothesis,which means Profit and CTenure have a relation.
Run a regression of Profit on {MTenure, CTenure Comp, Pop, PedCount, Res, Hours24, Visibility}
regprofit <- lm(Profit ~ MTenure+CTenure+Comp+Pop+PedCount+Res+Hours24+Visibility, data = Store.df)
summary(regprofit)
##
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Comp + Pop + PedCount +
## Res + Hours24 + Visibility, data = Store.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105789 -35946 -7069 33780 112390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7610.041 66821.994 0.114 0.909674
## MTenure 760.993 127.086 5.988 9.72e-08 ***
## CTenure 944.978 421.687 2.241 0.028400 *
## Comp -25286.887 5491.937 -4.604 1.94e-05 ***
## Pop 3.667 1.466 2.501 0.014890 *
## PedCount 34087.359 9073.196 3.757 0.000366 ***
## Res 91584.675 39231.283 2.334 0.022623 *
## Hours24 63233.307 19641.114 3.219 0.001994 **
## Visibility 12625.447 9087.620 1.389 0.169411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56970 on 66 degrees of freedom
## Multiple R-squared: 0.6379, Adjusted R-squared: 0.594
## F-statistic: 14.53 on 8 and 66 DF, p-value: 5.382e-12
For every unit increase in MTenure , the profit increases by 760.993.
For every unit increase in CTenure , there is an increase of 944.978 in Profit.
EXECUTIVE SUMMARY
The regression results show that Managers’s tenure is correlated with store’s Annual profits. This means that the profit of stores tend to increase when they have more experienced managers.
Crew’s tenure is also correlated to store’s Annual profits. Therefore, the years of experience of crew members adds to the profits of the stores.
The R- square value is 0.6379, which means that all the explanatory variables explain 63.79% of variation in the explained variables i.e. Profits.
4.The adjusted R-squared value is 0.594, which reduces when we add explanatory variables, which means that adding additional explanatory variables can be problematic.