setwd("C:/Users/alouk/Downloads")
store <- read.csv("Store24.csv")
summary(store)
## store Sales Profit MTenure
## Min. : 1.0 Min. : 699306 Min. :122180 Min. : 0.00
## 1st Qu.:19.5 1st Qu.: 984579 1st Qu.:211004 1st Qu.: 6.67
## Median :38.0 Median :1127332 Median :265014 Median : 24.12
## Mean :38.0 Mean :1205413 Mean :276314 Mean : 45.30
## 3rd Qu.:56.5 3rd Qu.:1362388 3rd Qu.:331314 3rd Qu.: 50.92
## Max. :75.0 Max. :2113089 Max. :518998 Max. :277.99
## CTenure Pop Comp Visibility
## Min. : 0.8871 Min. : 1046 Min. : 1.651 Min. :2.00
## 1st Qu.: 4.3943 1st Qu.: 5616 1st Qu.: 3.151 1st Qu.:3.00
## Median : 7.2115 Median : 8896 Median : 3.629 Median :3.00
## Mean : 13.9315 Mean : 9826 Mean : 3.788 Mean :3.08
## 3rd Qu.: 17.2156 3rd Qu.:14104 3rd Qu.: 4.230 3rd Qu.:4.00
## Max. :114.1519 Max. :26519 Max. :11.128 Max. :5.00
## PedCount Res Hours24 CrewSkill
## Min. :1.00 Min. :0.00 Min. :0.00 Min. :2.060
## 1st Qu.:2.00 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:3.225
## Median :3.00 Median :1.00 Median :1.00 Median :3.500
## Mean :2.96 Mean :0.96 Mean :0.84 Mean :3.457
## 3rd Qu.:4.00 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:3.655
## Max. :5.00 Max. :1.00 Max. :1.00 Max. :4.640
## MgrSkill ServQual
## Min. :2.957 Min. : 57.90
## 1st Qu.:3.344 1st Qu.: 78.95
## Median :3.589 Median : 89.47
## Mean :3.638 Mean : 87.15
## 3rd Qu.:3.925 3rd Qu.: 99.90
## Max. :4.622 Max. :100.00
#for Profit
mean(store$Profit)
## [1] 276313.6
sd(store$Profit)
## [1] 89404.08
#for Mtenure
mean(store$MTenure)
## [1] 45.29644
sd(store$MTenure)
## [1] 57.67155
##for Ctenure
mean(store$CTenure)
## [1] 13.9315
sd(store$CTenure)
## [1] 17.69752
attach(mtcars)
View(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg (ascending)
View(newdata)
newdata[1:5,] # see the first 5 rows
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
newdata <- mtcars[order(-mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)
#Top most 10 profitable store
attach(store)
## The following object is masked _by_ .GlobalEnv:
##
## store
newdata1 <- store[order(-Profit),c("store","Profit","MTenure","CTenure")]
newdata1[1:10,]
## store Profit MTenure CTenure
## 74 74 518998 171.09720 29.519510
## 7 7 476355 62.53080 7.326488
## 9 9 474725 108.99350 6.061602
## 6 6 469050 149.93590 11.351130
## 44 44 439781 182.23640 114.151900
## 2 2 424007 86.22219 6.636550
## 45 45 410149 47.64565 9.166325
## 18 18 394039 239.96980 33.774130
## 11 11 389886 44.81977 2.036961
## 47 47 387853 12.84790 6.636550
#Bottom 10 least profitable stores
newdata2 <- store[order(Profit),c("store","Profit","MTenure","CTenure")]
newdata2[1:10,]
## store Profit MTenure CTenure
## 57 57 122180 24.3485700 2.956879
## 66 66 146058 115.2039000 3.876797
## 41 41 147327 14.9180200 11.926080
## 55 55 147672 6.6703910 18.365500
## 32 32 149033 36.0792600 6.636550
## 13 13 152513 0.6571813 1.577002
## 54 54 159792 6.6703910 3.876797
## 52 52 169201 24.1185600 3.416838
## 61 61 177046 21.8184200 13.305950
## 37 37 187765 23.1985000 1.347023
library(car)
scatterplot(Profit~MTenure,main = "Scatterplot of Profit vs Mtenure",pch=19)
scatterplot(Profit~CTenure,main = "Scatterplot of Profit vs Ctenure",pch=19)
round(cor(store),2)
## store Sales Profit MTenure CTenure Pop Comp Visibility
## store 1.00 -0.23 -0.20 -0.06 0.02 -0.29 0.03 -0.03
## Sales -0.23 1.00 0.92 0.45 0.25 0.40 -0.24 0.13
## Profit -0.20 0.92 1.00 0.44 0.26 0.43 -0.33 0.14
## MTenure -0.06 0.45 0.44 1.00 0.24 -0.06 0.18 0.16
## CTenure 0.02 0.25 0.26 0.24 1.00 0.00 -0.07 0.07
## Pop -0.29 0.40 0.43 -0.06 0.00 1.00 -0.27 -0.05
## Comp 0.03 -0.24 -0.33 0.18 -0.07 -0.27 1.00 0.03
## Visibility -0.03 0.13 0.14 0.16 0.07 -0.05 0.03 1.00
## PedCount -0.22 0.42 0.45 0.06 -0.08 0.61 -0.15 -0.14
## Res -0.03 -0.17 -0.16 -0.06 -0.34 -0.24 0.22 0.02
## Hours24 0.03 0.06 -0.03 -0.17 0.07 -0.22 0.13 0.05
## CrewSkill 0.05 0.16 0.16 0.10 0.26 0.28 -0.04 -0.20
## MgrSkill -0.07 0.31 0.32 0.23 0.12 0.08 0.22 0.07
## ServQual -0.32 0.39 0.36 0.18 0.08 0.12 0.02 0.21
## PedCount Res Hours24 CrewSkill MgrSkill ServQual
## store -0.22 -0.03 0.03 0.05 -0.07 -0.32
## Sales 0.42 -0.17 0.06 0.16 0.31 0.39
## Profit 0.45 -0.16 -0.03 0.16 0.32 0.36
## MTenure 0.06 -0.06 -0.17 0.10 0.23 0.18
## CTenure -0.08 -0.34 0.07 0.26 0.12 0.08
## Pop 0.61 -0.24 -0.22 0.28 0.08 0.12
## Comp -0.15 0.22 0.13 -0.04 0.22 0.02
## Visibility -0.14 0.02 0.05 -0.20 0.07 0.21
## PedCount 1.00 -0.28 -0.28 0.21 0.09 -0.01
## Res -0.28 1.00 -0.09 -0.15 -0.03 0.09
## Hours24 -0.28 -0.09 1.00 0.11 -0.04 0.06
## CrewSkill 0.21 -0.15 0.11 1.00 -0.02 -0.03
## MgrSkill 0.09 -0.03 -0.04 -0.02 1.00 0.36
## ServQual -0.01 0.09 0.06 -0.03 0.36 1.00
#Mtenure
round(cor(Profit,MTenure),2)
## [1] 0.44
#Ctenure
round(cor(Profit,CTenure),2)
## [1] 0.26
library(corrplot)
## corrplot 0.84 loaded
corrplot.mixed(main = "Corrgram of store variables",corr = cor(store),lower = "shade",upper="pie")
#Run a Pearson's Correlation test on the correlation between Profit and MTenure. What is the p-value?
cor.test(Profit,MTenure)
##
## Pearson's product-moment correlation
##
## data: Profit and MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
#The p-value is 8.193e-05
#Run a Pearson's Correlation test on the correlation between Profit and CTenure. What is the p-value?
cor.test(Profit,CTenure)
##
## Pearson's product-moment correlation
##
## data: Profit and CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03262507 0.45786339
## sample estimates:
## cor
## 0.2576789
#The p-value is 0.02562
mten<-lm(Profit~MTenure)
summary(mten)
##
## Call:
## lm(formula = Profit ~ MTenure)
##
## Residuals:
## Min 1Q Median 3Q Max
## -177817 -52029 -8635 50871 188316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 245496.3 11906.4 20.619 < 2e-16 ***
## MTenure 680.3 163.0 4.173 8.19e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 80880 on 73 degrees of freedom
## Multiple R-squared: 0.1926, Adjusted R-squared: 0.1815
## F-statistic: 17.41 on 1 and 73 DF, p-value: 8.193e-05
cten<-lm(Profit~CTenure)
summary(cten)
##
## Call:
## lm(formula = Profit ~ CTenure)
##
## Residuals:
## Min 1Q Median 3Q Max
## -139848 -64869 -9022 45057 222393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 258178.4 12814.4 20.148 <2e-16 ***
## CTenure 1301.7 571.3 2.279 0.0256 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 86970 on 73 degrees of freedom
## Multiple R-squared: 0.0664, Adjusted R-squared: 0.05361
## F-statistic: 5.192 on 1 and 73 DF, p-value: 0.02562
comp<-lm(Profit~Comp)
summary(comp)
##
## Call:
## lm(formula = Profit ~ Comp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -172707 -65521 -24559 56628 209205
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 362702 30119 12.042 < 2e-16 ***
## Comp -22807 7520 -3.033 0.00335 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 84830 on 73 degrees of freedom
## Multiple R-squared: 0.1119, Adjusted R-squared: 0.09975
## F-statistic: 9.2 on 1 and 73 DF, p-value: 0.003351
pop<-lm(Profit~Pop)
summary(pop)
##
## Call:
## lm(formula = Profit ~ Pop)
##
## Residuals:
## Min 1Q Median 3Q Max
## -152198 -52285 -17228 43501 235602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.123e+05 1.829e+04 11.611 < 2e-16 ***
## Pop 6.513e+00 1.598e+00 4.077 0.000115 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 81240 on 73 degrees of freedom
## Multiple R-squared: 0.1854, Adjusted R-squared: 0.1743
## F-statistic: 16.62 on 1 and 73 DF, p-value: 0.000115
pc<-lm(Profit~PedCount)
summary(pc)
##
## Call:
## lm(formula = Profit ~ PedCount)
##
## Residuals:
## Min 1Q Median 3Q Max
## -131878 -57678 -1538 45741 200501
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 156254 29373 5.320 1.09e-06 ***
## PedCount 40561 9415 4.308 5.06e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 80370 on 73 degrees of freedom
## Multiple R-squared: 0.2027, Adjusted R-squared: 0.1918
## F-statistic: 18.56 on 1 and 73 DF, p-value: 5.057e-05
res<-lm(Profit~Res)
summary(res)
##
## Call:
## lm(formula = Profit ~ Res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -151243 -62419 -9467 57891 245575
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 345696 51305 6.738 3.18e-09 ***
## Res -72273 52363 -1.380 0.172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 88860 on 73 degrees of freedom
## Multiple R-squared: 0.02543, Adjusted R-squared: 0.01208
## F-statistic: 1.905 on 1 and 73 DF, p-value: 0.1717
h24<-lm(Profit~Hours24)
summary(h24)
##
## Call:
## lm(formula = Profit ~ Hours24)
##
## Residuals:
## Min 1Q Median 3Q Max
## -153138 -64315 -11246 52884 237458
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 281540 25976 10.84 <2e-16 ***
## Hours24 -6222 28343 -0.22 0.827
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 89980 on 73 degrees of freedom
## Multiple R-squared: 0.0006598, Adjusted R-squared: -0.01303
## F-statistic: 0.0482 on 1 and 73 DF, p-value: 0.8268
vis<-lm(Profit~Visibility)
summary(vis)
##
## Call:
## lm(formula = Profit ~ Visibility)
##
## Residuals:
## Min 1Q Median 3Q Max
## -152838 -63359 -10946 43839 243980
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 226431 43855 5.163 2.02e-06 ***
## Visibility 16196 13840 1.170 0.246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 89180 on 73 degrees of freedom
## Multiple R-squared: 0.01841, Adjusted R-squared: 0.004966
## F-statistic: 1.369 on 1 and 73 DF, p-value: 0.2457
The explanatory variables whose beta-coefficients are statistically significant (p<0.05) are : MTenure, CTenure ,Comp,Pop,PedCount
The explanatory variables whose beta-coefficients are not statistically significant (p>0.05) are : Res,Hours24 , Visibility
The expected change in Profit with 1 month increase in Mtenure is 680.3
The expected change in Profit with 1 month increase in Ctenure is 1301.7
Based on the regression analysis we can see that the Profit is significanly dependent upon MTenure, CTenure, Comp,Pop and PedCount.(p-value < 0.05)
Pop doesnt have a much significant change in the Profit(since the expectedchange in profit with 1 change in Pop is very low.