Read and view the data
store<- read.csv(paste("Store24.csv", sep=""))
View(store)
2.c Get the summary of dataset and compare it with Exibit 3
library(psych)
summary(store)
## store Sales Profit MTenure
## Min. : 1.0 Min. : 699306 Min. :122180 Min. : 0.00
## 1st Qu.:19.5 1st Qu.: 984579 1st Qu.:211004 1st Qu.: 6.67
## Median :38.0 Median :1127332 Median :265014 Median : 24.12
## Mean :38.0 Mean :1205413 Mean :276314 Mean : 45.30
## 3rd Qu.:56.5 3rd Qu.:1362388 3rd Qu.:331314 3rd Qu.: 50.92
## Max. :75.0 Max. :2113089 Max. :518998 Max. :277.99
## CTenure Pop Comp Visibility
## Min. : 0.8871 Min. : 1046 Min. : 1.651 Min. :2.00
## 1st Qu.: 4.3943 1st Qu.: 5616 1st Qu.: 3.151 1st Qu.:3.00
## Median : 7.2115 Median : 8896 Median : 3.629 Median :3.00
## Mean : 13.9315 Mean : 9826 Mean : 3.788 Mean :3.08
## 3rd Qu.: 17.2156 3rd Qu.:14104 3rd Qu.: 4.230 3rd Qu.:4.00
## Max. :114.1519 Max. :26519 Max. :11.128 Max. :5.00
## PedCount Res Hours24 CrewSkill
## Min. :1.00 Min. :0.00 Min. :0.00 Min. :2.060
## 1st Qu.:2.00 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:3.225
## Median :3.00 Median :1.00 Median :1.00 Median :3.500
## Mean :2.96 Mean :0.96 Mean :0.84 Mean :3.457
## 3rd Qu.:4.00 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:3.655
## Max. :5.00 Max. :1.00 Max. :1.00 Max. :4.640
## MgrSkill ServQual
## Min. :2.957 Min. : 57.90
## 1st Qu.:3.344 1st Qu.: 78.95
## Median :3.589 Median : 89.47
## Mean :3.638 Mean : 87.15
## 3rd Qu.:3.925 3rd Qu.: 99.90
## Max. :4.622 Max. :100.00
2.d.1to measure the mean and standard deviation of Profit .
mean(store$Profit)
## [1] 276313.6
sd(store$Profit)
## [1] 89404.08
2.d.2 To measure the mean and standard deviation of MTenure .
mean(store$MTenure)
## [1] 45.29644
sd(store$MTenure)
## [1] 57.67155
2.d.3 To measure the mean and standard deviation of CTenure .
mean(store$CTenure)
## [1] 13.9315
sd(store$CTenure)
## [1] 17.69752
2.e Sorting and Subsetting data in R
attach(mtcars)
View(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg (ascending)
View(newdata)
newdata[1:5,] # see the first 5 rows
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
newdata1 <- mtcars[order(-mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)
2.f Replication of Exhibit 1 ,shown in the case
attach(store)
## The following object is masked _by_ .GlobalEnv:
##
## store
most<- store[order(Profit),c(1:5)]
View(most)
least<- store[order(-Profit),c(1:5)]
View(least)
head(most, 10) #10 most profitable stores
## store Sales Profit MTenure CTenure
## 57 57 699306 122180 24.3485700 2.956879
## 66 66 879581 146058 115.2039000 3.876797
## 41 41 744211 147327 14.9180200 11.926080
## 55 55 925744 147672 6.6703910 18.365500
## 32 32 828918 149033 36.0792600 6.636550
## 13 13 857843 152513 0.6571813 1.577002
## 54 54 811190 159792 6.6703910 3.876797
## 52 52 1073008 169201 24.1185600 3.416838
## 61 61 716589 177046 21.8184200 13.305950
## 37 37 1202917 187765 23.1985000 1.347023
head(least, 10) #10 least profitable stores
## store Sales Profit MTenure CTenure
## 74 74 1782957 518998 171.09720 29.519510
## 7 7 1809256 476355 62.53080 7.326488
## 9 9 2113089 474725 108.99350 6.061602
## 6 6 1703140 469050 149.93590 11.351130
## 44 44 1807740 439781 182.23640 114.151900
## 2 2 1619874 424007 86.22219 6.636550
## 45 45 1602362 410149 47.64565 9.166325
## 18 18 1704826 394039 239.96980 33.774130
## 11 11 1583446 389886 44.81977 2.036961
## 47 47 1665657 387853 12.84790 6.636550
detach(store)
2.g to create a scatter plot
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot( store$MTenure,store$Profit)

2.h to draw a scatter plot of Profit vs. CTenure .
library(car)
scatterplot( store$CTenure,store$Profit)

2.i to construct a Correlation Matrix for all the variables in the dataset.
cor1<-cor(store,use = "complete.obs")
round(cor1,2)
## store Sales Profit MTenure CTenure Pop Comp Visibility
## store 1.00 -0.23 -0.20 -0.06 0.02 -0.29 0.03 -0.03
## Sales -0.23 1.00 0.92 0.45 0.25 0.40 -0.24 0.13
## Profit -0.20 0.92 1.00 0.44 0.26 0.43 -0.33 0.14
## MTenure -0.06 0.45 0.44 1.00 0.24 -0.06 0.18 0.16
## CTenure 0.02 0.25 0.26 0.24 1.00 0.00 -0.07 0.07
## Pop -0.29 0.40 0.43 -0.06 0.00 1.00 -0.27 -0.05
## Comp 0.03 -0.24 -0.33 0.18 -0.07 -0.27 1.00 0.03
## Visibility -0.03 0.13 0.14 0.16 0.07 -0.05 0.03 1.00
## PedCount -0.22 0.42 0.45 0.06 -0.08 0.61 -0.15 -0.14
## Res -0.03 -0.17 -0.16 -0.06 -0.34 -0.24 0.22 0.02
## Hours24 0.03 0.06 -0.03 -0.17 0.07 -0.22 0.13 0.05
## CrewSkill 0.05 0.16 0.16 0.10 0.26 0.28 -0.04 -0.20
## MgrSkill -0.07 0.31 0.32 0.23 0.12 0.08 0.22 0.07
## ServQual -0.32 0.39 0.36 0.18 0.08 0.12 0.02 0.21
## PedCount Res Hours24 CrewSkill MgrSkill ServQual
## store -0.22 -0.03 0.03 0.05 -0.07 -0.32
## Sales 0.42 -0.17 0.06 0.16 0.31 0.39
## Profit 0.45 -0.16 -0.03 0.16 0.32 0.36
## MTenure 0.06 -0.06 -0.17 0.10 0.23 0.18
## CTenure -0.08 -0.34 0.07 0.26 0.12 0.08
## Pop 0.61 -0.24 -0.22 0.28 0.08 0.12
## Comp -0.15 0.22 0.13 -0.04 0.22 0.02
## Visibility -0.14 0.02 0.05 -0.20 0.07 0.21
## PedCount 1.00 -0.28 -0.28 0.21 0.09 -0.01
## Res -0.28 1.00 -0.09 -0.15 -0.03 0.09
## Hours24 -0.28 -0.09 1.00 0.11 -0.04 0.06
## CrewSkill 0.21 -0.15 0.11 1.00 -0.02 -0.03
## MgrSkill 0.09 -0.03 -0.04 -0.02 1.00 0.36
## ServQual -0.01 0.09 0.06 -0.03 0.36 1.00
2.j to measure the correlation between Profit and MTenure.
cor(store$Profit, store$MTenure)
## [1] 0.4388692
cor(store$Profit, store$CTenure)
## [1] 0.2576789
2.kto construct the following Corrgram based on all variables in the dataset.
library(corrgram)
col.corrgram <- function(ncol){
colorRampPalette(c("darkgoldenrod4", "red",
"blue", "darkgreen"))(ncol)}
corrgram(store, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Correlogram of Store variables)")

2.i.12 Pearson’s Correlation test on the correlation between Profit and MTenure .
cor.test(store$Profit, store$MTenure)
##
## Pearson's product-moment correlation
##
## data: store$Profit and store$MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2353497 0.6055175
## sample estimates:
## cor
## 0.4388692
2.i.13 Pearson’s Correlation test on the correlation between Profit and CTenure .
cor.test(store$Profit, store$CTenure)
##
## Pearson's product-moment correlation
##
## data: store$Profit and store$CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03262507 0.45786339
## sample estimates:
## cor
## 0.2576789
2.mRun a regression of Profit on {MTenure, CTenure Comp, Pop, PedCount, Res, Hours24, Visibility}.
MTenure <- lm(MTenure ~ ., data=store)
summary(MTenure)
##
## Call:
## lm(formula = MTenure ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -69.548 -23.631 -7.398 15.860 140.351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.496e+01 8.763e+01 -0.399 0.691343
## store -1.764e-01 2.651e-01 -0.665 0.508317
## Sales 6.713e-05 4.687e-05 1.432 0.157168
## Profit 2.949e-04 1.691e-04 1.744 0.086193 .
## CTenure -6.240e-02 3.542e-01 -0.176 0.860753
## Pop -3.239e-03 1.212e-03 -2.672 0.009651 **
## Comp 1.809e+01 4.853e+00 3.727 0.000426 ***
## Visibility 4.393e+00 7.352e+00 0.598 0.552341
## PedCount -1.341e+01 7.919e+00 -1.693 0.095597 .
## Res -5.379e+01 3.165e+01 -1.699 0.094380 .
## Hours24 -6.246e+01 1.598e+01 -3.908 0.000236 ***
## CrewSkill 2.283e+01 1.457e+01 1.567 0.122218
## MgrSkill -1.142e+01 1.503e+01 -0.760 0.450133
## ServQual -2.112e-01 4.992e-01 -0.423 0.673747
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.14 on 61 degrees of freedom
## Multiple R-squared: 0.5171, Adjusted R-squared: 0.4142
## F-statistic: 5.025 on 13 and 61 DF, p-value: 6.308e-06
CTenure <- lm(CTenure ~ ., data=store)
summary(CTenure)
##
## Call:
## lm(formula = CTenure ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.165 -6.648 -1.298 5.372 66.397
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.125e+01 3.168e+01 0.355 0.7238
## store -4.502e-02 9.597e-02 -0.469 0.6407
## Sales 5.481e-06 1.721e-05 0.319 0.7512
## Profit 6.566e-05 6.205e-05 1.058 0.2941
## MTenure -8.149e-03 4.626e-02 -0.176 0.8608
## Pop -4.035e-04 4.601e-04 -0.877 0.3839
## Comp 1.242e+00 1.937e+00 0.641 0.5239
## Visibility 5.136e-01 2.664e+00 0.193 0.8478
## PedCount -7.181e+00 2.780e+00 -2.583 0.0122 *
## Res -3.673e+01 1.072e+01 -3.426 0.0011 **
## Hours24 -6.956e+00 6.397e+00 -1.087 0.2812
## CrewSkill 1.210e+01 5.142e+00 2.354 0.0218 *
## MgrSkill 4.970e-01 5.456e+00 0.091 0.9277
## ServQual -4.126e-02 1.806e-01 -0.228 0.8201
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.95 on 61 degrees of freedom
## Multiple R-squared: 0.3303, Adjusted R-squared: 0.1876
## F-statistic: 2.314 on 13 and 61 DF, p-value: 0.01435
Comp <- lm(Comp ~ ., data=store)
summary(Comp)
##
## Call:
## lm(formula = Comp ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4631 -0.5820 -0.1754 0.4074 4.7140
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.906e+00 2.075e+00 -0.919 0.361968
## store 1.284e-03 6.332e-03 0.203 0.839918
## Sales 6.755e-07 1.131e-06 0.597 0.552651
## Profit -1.332e-05 3.756e-06 -3.546 0.000759 ***
## MTenure 1.025e-02 2.751e-03 3.727 0.000426 ***
## CTenure 5.389e-03 8.407e-03 0.641 0.523867
## Pop 2.153e-06 3.050e-05 0.071 0.943966
## Visibility 7.649e-02 1.753e-01 0.436 0.664089
## PedCount 4.156e-01 1.854e-01 2.241 0.028653 *
## Res 1.819e+00 7.353e-01 2.474 0.016165 *
## Hours24 1.031e+00 4.045e-01 2.549 0.013327 *
## CrewSkill -9.684e-02 3.536e-01 -0.274 0.785085
## MgrSkill 1.067e+00 3.325e-01 3.211 0.002115 **
## ServQual 3.704e-03 1.189e-02 0.311 0.756577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.051 on 61 degrees of freedom
## Multiple R-squared: 0.4706, Adjusted R-squared: 0.3578
## F-statistic: 4.172 on 13 and 61 DF, p-value: 6.431e-05
PedCount <- lm(PedCount ~ ., data=store)
summary(PedCount)
##
## Call:
## lm(formula = PedCount ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3216 -0.3949 0.0517 0.4661 1.3962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.473e+00 1.313e+00 2.645 0.01038 *
## store -4.781e-03 4.159e-03 -1.150 0.25481
## Sales 5.535e-07 7.496e-07 0.738 0.46312
## Profit 4.607e-06 2.673e-06 1.724 0.08986 .
## MTenure -3.347e-03 1.977e-03 -1.693 0.09560 .
## CTenure -1.373e-02 5.315e-03 -2.583 0.01222 *
## Pop 3.993e-05 1.958e-05 2.039 0.04578 *
## Comp 1.831e-01 8.168e-02 2.241 0.02865 *
## Visibility -1.234e-01 1.154e-01 -1.069 0.28915
## Res -1.413e+00 4.788e-01 -2.951 0.00448 **
## Hours24 -7.873e-01 2.638e-01 -2.985 0.00408 **
## CrewSkill 2.765e-01 2.321e-01 1.191 0.23809
## MgrSkill -1.595e-01 2.377e-01 -0.671 0.50466
## ServQual -1.138e-02 7.764e-03 -1.466 0.14777
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6974 on 61 degrees of freedom
## Multiple R-squared: 0.5929, Adjusted R-squared: 0.5061
## F-statistic: 6.833 on 13 and 61 DF, p-value: 7.01e-08
Res <- lm(Res ~ ., data=store)
summary(Res)
##
## Call:
## lm(formula = Res ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81339 -0.04105 0.00735 0.07665 0.24330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.085e+00 3.178e-01 3.414 0.00114 **
## store -9.573e-04 1.044e-03 -0.917 0.36293
## Sales -5.534e-09 1.883e-07 -0.029 0.97666
## Profit 9.472e-07 6.740e-07 1.405 0.16496
## MTenure -8.402e-04 4.945e-04 -1.699 0.09438 .
## CTenure -4.394e-03 1.282e-03 -3.426 0.00110 **
## Pop -6.827e-06 4.987e-06 -1.369 0.17603
## Comp 5.013e-02 2.026e-02 2.474 0.01617 *
## Visibility -6.716e-03 2.913e-02 -0.231 0.81844
## PedCount -8.843e-02 2.996e-02 -2.951 0.00448 **
## Hours24 -1.692e-01 6.723e-02 -2.516 0.01450 *
## CrewSkill 5.138e-02 5.836e-02 0.880 0.38207
## MgrSkill -5.881e-02 5.920e-02 -0.994 0.32439
## ServQual 1.075e-03 1.971e-03 0.545 0.58751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1745 on 61 degrees of freedom
## Multiple R-squared: 0.3553, Adjusted R-squared: 0.2179
## F-statistic: 2.586 on 13 and 61 DF, p-value: 0.006416
Hours24 <- lm(Hours24 ~ ., data=store)
summary(Hours24)
##
## Call:
## lm(formula = Hours24 ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62502 -0.09926 0.04684 0.16918 0.51304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.690e-01 6.228e-01 1.074 0.286982
## store -1.415e-03 1.897e-03 -0.746 0.458489
## Sales 6.913e-07 3.297e-07 2.097 0.040185 *
## Profit 3.440e-07 1.241e-06 0.277 0.782502
## MTenure -3.206e-03 8.203e-04 -3.908 0.000236 ***
## CTenure -2.733e-03 2.514e-03 -1.087 0.281177
## Pop -1.959e-05 8.828e-06 -2.219 0.030221 *
## Comp 9.337e-02 3.663e-02 2.549 0.013327 *
## Visibility 1.236e-02 5.280e-02 0.234 0.815662
## PedCount -1.619e-01 5.423e-02 -2.985 0.004079 **
## Res -5.559e-01 2.209e-01 -2.516 0.014501 *
## CrewSkill 2.171e-01 1.028e-01 2.113 0.038692 *
## MgrSkill -1.182e-01 1.071e-01 -1.104 0.273963
## ServQual -3.119e-04 3.582e-03 -0.087 0.930881
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3162 on 61 degrees of freedom
## Multiple R-squared: 0.3948, Adjusted R-squared: 0.2659
## F-statistic: 3.062 on 13 and 61 DF, p-value: 0.001571
Visibility <- lm(Visibility ~ ., data=store)
summary(Visibility)
##
## Call:
## lm(formula = Visibility ~ ., data = store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.55435 -0.37048 -0.00491 0.46334 1.70508
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.983e+00 1.436e+00 2.774 0.00734 **
## store 1.406e-03 4.616e-03 0.305 0.76173
## Sales -1.610e-07 8.273e-07 -0.195 0.84638
## Profit 1.901e-06 2.999e-06 0.634 0.52844
## MTenure 1.325e-03 2.217e-03 0.598 0.55234
## CTenure 1.186e-03 6.150e-03 0.193 0.84776
## Pop 1.105e-05 2.220e-05 0.498 0.62060
## Comp 4.069e-02 9.324e-02 0.436 0.66409
## PedCount -1.491e-01 1.394e-01 -1.069 0.28915
## Res -1.296e-01 5.623e-01 -0.231 0.81844
## Hours24 7.263e-02 3.102e-01 0.234 0.81566
## CrewSkill -4.190e-01 2.524e-01 -1.660 0.10201
## MgrSkill -1.231e-01 2.617e-01 -0.470 0.63983
## ServQual 8.878e-03 8.607e-03 1.031 0.30638
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7665 on 61 degrees of freedom
## Multiple R-squared: 0.1368, Adjusted R-squared: -0.04714
## F-statistic: 0.7437 on 13 and 61 DF, p-value: 0.7135