Read and view the data

store<- read.csv(paste("Store24.csv", sep=""))
View(store)

2.c Get the summary of dataset and compare it with Exibit 3

library(psych)
summary(store)
##      store          Sales             Profit          MTenure      
##  Min.   : 1.0   Min.   : 699306   Min.   :122180   Min.   :  0.00  
##  1st Qu.:19.5   1st Qu.: 984579   1st Qu.:211004   1st Qu.:  6.67  
##  Median :38.0   Median :1127332   Median :265014   Median : 24.12  
##  Mean   :38.0   Mean   :1205413   Mean   :276314   Mean   : 45.30  
##  3rd Qu.:56.5   3rd Qu.:1362388   3rd Qu.:331314   3rd Qu.: 50.92  
##  Max.   :75.0   Max.   :2113089   Max.   :518998   Max.   :277.99  
##     CTenure              Pop             Comp          Visibility  
##  Min.   :  0.8871   Min.   : 1046   Min.   : 1.651   Min.   :2.00  
##  1st Qu.:  4.3943   1st Qu.: 5616   1st Qu.: 3.151   1st Qu.:3.00  
##  Median :  7.2115   Median : 8896   Median : 3.629   Median :3.00  
##  Mean   : 13.9315   Mean   : 9826   Mean   : 3.788   Mean   :3.08  
##  3rd Qu.: 17.2156   3rd Qu.:14104   3rd Qu.: 4.230   3rd Qu.:4.00  
##  Max.   :114.1519   Max.   :26519   Max.   :11.128   Max.   :5.00  
##     PedCount         Res          Hours24       CrewSkill    
##  Min.   :1.00   Min.   :0.00   Min.   :0.00   Min.   :2.060  
##  1st Qu.:2.00   1st Qu.:1.00   1st Qu.:1.00   1st Qu.:3.225  
##  Median :3.00   Median :1.00   Median :1.00   Median :3.500  
##  Mean   :2.96   Mean   :0.96   Mean   :0.84   Mean   :3.457  
##  3rd Qu.:4.00   3rd Qu.:1.00   3rd Qu.:1.00   3rd Qu.:3.655  
##  Max.   :5.00   Max.   :1.00   Max.   :1.00   Max.   :4.640  
##     MgrSkill        ServQual     
##  Min.   :2.957   Min.   : 57.90  
##  1st Qu.:3.344   1st Qu.: 78.95  
##  Median :3.589   Median : 89.47  
##  Mean   :3.638   Mean   : 87.15  
##  3rd Qu.:3.925   3rd Qu.: 99.90  
##  Max.   :4.622   Max.   :100.00

2.d.1to measure the mean and standard deviation of Profit .

mean(store$Profit)
## [1] 276313.6
sd(store$Profit)
## [1] 89404.08

2.d.2 To measure the mean and standard deviation of MTenure .

mean(store$MTenure)
## [1] 45.29644
sd(store$MTenure)
## [1] 57.67155

2.d.3 To measure the mean and standard deviation of CTenure .

mean(store$CTenure)
## [1] 13.9315
sd(store$CTenure)
## [1] 17.69752

2.e Sorting and Subsetting data in R

attach(mtcars)
View(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg (ascending)
View(newdata)
newdata[1:5,] # see the first 5 rows
##                      mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood  10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## Camaro Z28          13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## Duster 360          14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial   14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
newdata1 <- mtcars[order(-mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)

2.f Replication of Exhibit 1 ,shown in the case

attach(store)
## The following object is masked _by_ .GlobalEnv:
## 
##     store
most<- store[order(Profit),c(1:5)]  
View(most)

least<- store[order(-Profit),c(1:5)]  
View(least)

head(most, 10)  #10 most profitable stores
##    store   Sales Profit     MTenure   CTenure
## 57    57  699306 122180  24.3485700  2.956879
## 66    66  879581 146058 115.2039000  3.876797
## 41    41  744211 147327  14.9180200 11.926080
## 55    55  925744 147672   6.6703910 18.365500
## 32    32  828918 149033  36.0792600  6.636550
## 13    13  857843 152513   0.6571813  1.577002
## 54    54  811190 159792   6.6703910  3.876797
## 52    52 1073008 169201  24.1185600  3.416838
## 61    61  716589 177046  21.8184200 13.305950
## 37    37 1202917 187765  23.1985000  1.347023
head(least, 10)  #10 least profitable stores
##    store   Sales Profit   MTenure    CTenure
## 74    74 1782957 518998 171.09720  29.519510
## 7      7 1809256 476355  62.53080   7.326488
## 9      9 2113089 474725 108.99350   6.061602
## 6      6 1703140 469050 149.93590  11.351130
## 44    44 1807740 439781 182.23640 114.151900
## 2      2 1619874 424007  86.22219   6.636550
## 45    45 1602362 410149  47.64565   9.166325
## 18    18 1704826 394039 239.96980  33.774130
## 11    11 1583446 389886  44.81977   2.036961
## 47    47 1665657 387853  12.84790   6.636550
detach(store)

2.g to create a scatter plot

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot( store$MTenure,store$Profit)

2.h to draw a scatter plot of Profit vs. CTenure .

library(car)
scatterplot( store$CTenure,store$Profit)

2.i to construct a Correlation Matrix for all the variables in the dataset.

cor1<-cor(store,use = "complete.obs")
round(cor1,2)
##            store Sales Profit MTenure CTenure   Pop  Comp Visibility
## store       1.00 -0.23  -0.20   -0.06    0.02 -0.29  0.03      -0.03
## Sales      -0.23  1.00   0.92    0.45    0.25  0.40 -0.24       0.13
## Profit     -0.20  0.92   1.00    0.44    0.26  0.43 -0.33       0.14
## MTenure    -0.06  0.45   0.44    1.00    0.24 -0.06  0.18       0.16
## CTenure     0.02  0.25   0.26    0.24    1.00  0.00 -0.07       0.07
## Pop        -0.29  0.40   0.43   -0.06    0.00  1.00 -0.27      -0.05
## Comp        0.03 -0.24  -0.33    0.18   -0.07 -0.27  1.00       0.03
## Visibility -0.03  0.13   0.14    0.16    0.07 -0.05  0.03       1.00
## PedCount   -0.22  0.42   0.45    0.06   -0.08  0.61 -0.15      -0.14
## Res        -0.03 -0.17  -0.16   -0.06   -0.34 -0.24  0.22       0.02
## Hours24     0.03  0.06  -0.03   -0.17    0.07 -0.22  0.13       0.05
## CrewSkill   0.05  0.16   0.16    0.10    0.26  0.28 -0.04      -0.20
## MgrSkill   -0.07  0.31   0.32    0.23    0.12  0.08  0.22       0.07
## ServQual   -0.32  0.39   0.36    0.18    0.08  0.12  0.02       0.21
##            PedCount   Res Hours24 CrewSkill MgrSkill ServQual
## store         -0.22 -0.03    0.03      0.05    -0.07    -0.32
## Sales          0.42 -0.17    0.06      0.16     0.31     0.39
## Profit         0.45 -0.16   -0.03      0.16     0.32     0.36
## MTenure        0.06 -0.06   -0.17      0.10     0.23     0.18
## CTenure       -0.08 -0.34    0.07      0.26     0.12     0.08
## Pop            0.61 -0.24   -0.22      0.28     0.08     0.12
## Comp          -0.15  0.22    0.13     -0.04     0.22     0.02
## Visibility    -0.14  0.02    0.05     -0.20     0.07     0.21
## PedCount       1.00 -0.28   -0.28      0.21     0.09    -0.01
## Res           -0.28  1.00   -0.09     -0.15    -0.03     0.09
## Hours24       -0.28 -0.09    1.00      0.11    -0.04     0.06
## CrewSkill      0.21 -0.15    0.11      1.00    -0.02    -0.03
## MgrSkill       0.09 -0.03   -0.04     -0.02     1.00     0.36
## ServQual      -0.01  0.09    0.06     -0.03     0.36     1.00

2.j to measure the correlation between Profit and MTenure.

cor(store$Profit, store$MTenure)
## [1] 0.4388692
cor(store$Profit, store$CTenure)
## [1] 0.2576789

2.kto construct the following Corrgram based on all variables in the dataset.

library(corrgram) 
col.corrgram <- function(ncol){   
  colorRampPalette(c("darkgoldenrod4", "red",
  "blue", "darkgreen"))(ncol)} 
corrgram(store, order=TRUE, lower.panel=panel.shade, 
   upper.panel=panel.pie, text.panel=panel.txt, 
   main="Correlogram of Store variables)")

2.i.12 Pearson’s Correlation test on the correlation between Profit and MTenure .

cor.test(store$Profit, store$MTenure)
## 
##  Pearson's product-moment correlation
## 
## data:  store$Profit and store$MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2353497 0.6055175
## sample estimates:
##       cor 
## 0.4388692

2.i.13 Pearson’s Correlation test on the correlation between Profit and CTenure .

cor.test(store$Profit, store$CTenure)
## 
##  Pearson's product-moment correlation
## 
## data:  store$Profit and store$CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03262507 0.45786339
## sample estimates:
##       cor 
## 0.2576789

2.mRun a regression of Profit on {MTenure, CTenure Comp, Pop, PedCount, Res, Hours24, Visibility}.

MTenure <- lm(MTenure ~ ., data=store)
summary(MTenure)
## 
## Call:
## lm(formula = MTenure ~ ., data = store)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -69.548 -23.631  -7.398  15.860 140.351 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.496e+01  8.763e+01  -0.399 0.691343    
## store       -1.764e-01  2.651e-01  -0.665 0.508317    
## Sales        6.713e-05  4.687e-05   1.432 0.157168    
## Profit       2.949e-04  1.691e-04   1.744 0.086193 .  
## CTenure     -6.240e-02  3.542e-01  -0.176 0.860753    
## Pop         -3.239e-03  1.212e-03  -2.672 0.009651 ** 
## Comp         1.809e+01  4.853e+00   3.727 0.000426 ***
## Visibility   4.393e+00  7.352e+00   0.598 0.552341    
## PedCount    -1.341e+01  7.919e+00  -1.693 0.095597 .  
## Res         -5.379e+01  3.165e+01  -1.699 0.094380 .  
## Hours24     -6.246e+01  1.598e+01  -3.908 0.000236 ***
## CrewSkill    2.283e+01  1.457e+01   1.567 0.122218    
## MgrSkill    -1.142e+01  1.503e+01  -0.760 0.450133    
## ServQual    -2.112e-01  4.992e-01  -0.423 0.673747    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.14 on 61 degrees of freedom
## Multiple R-squared:  0.5171, Adjusted R-squared:  0.4142 
## F-statistic: 5.025 on 13 and 61 DF,  p-value: 6.308e-06
CTenure <- lm(CTenure ~ ., data=store)
summary(CTenure)
## 
## Call:
## lm(formula = CTenure ~ ., data = store)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.165  -6.648  -1.298   5.372  66.397 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.125e+01  3.168e+01   0.355   0.7238   
## store       -4.502e-02  9.597e-02  -0.469   0.6407   
## Sales        5.481e-06  1.721e-05   0.319   0.7512   
## Profit       6.566e-05  6.205e-05   1.058   0.2941   
## MTenure     -8.149e-03  4.626e-02  -0.176   0.8608   
## Pop         -4.035e-04  4.601e-04  -0.877   0.3839   
## Comp         1.242e+00  1.937e+00   0.641   0.5239   
## Visibility   5.136e-01  2.664e+00   0.193   0.8478   
## PedCount    -7.181e+00  2.780e+00  -2.583   0.0122 * 
## Res         -3.673e+01  1.072e+01  -3.426   0.0011 **
## Hours24     -6.956e+00  6.397e+00  -1.087   0.2812   
## CrewSkill    1.210e+01  5.142e+00   2.354   0.0218 * 
## MgrSkill     4.970e-01  5.456e+00   0.091   0.9277   
## ServQual    -4.126e-02  1.806e-01  -0.228   0.8201   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.95 on 61 degrees of freedom
## Multiple R-squared:  0.3303, Adjusted R-squared:  0.1876 
## F-statistic: 2.314 on 13 and 61 DF,  p-value: 0.01435
Comp <- lm(Comp ~ ., data=store)
summary(Comp)
## 
## Call:
## lm(formula = Comp ~ ., data = store)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4631 -0.5820 -0.1754  0.4074  4.7140 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.906e+00  2.075e+00  -0.919 0.361968    
## store        1.284e-03  6.332e-03   0.203 0.839918    
## Sales        6.755e-07  1.131e-06   0.597 0.552651    
## Profit      -1.332e-05  3.756e-06  -3.546 0.000759 ***
## MTenure      1.025e-02  2.751e-03   3.727 0.000426 ***
## CTenure      5.389e-03  8.407e-03   0.641 0.523867    
## Pop          2.153e-06  3.050e-05   0.071 0.943966    
## Visibility   7.649e-02  1.753e-01   0.436 0.664089    
## PedCount     4.156e-01  1.854e-01   2.241 0.028653 *  
## Res          1.819e+00  7.353e-01   2.474 0.016165 *  
## Hours24      1.031e+00  4.045e-01   2.549 0.013327 *  
## CrewSkill   -9.684e-02  3.536e-01  -0.274 0.785085    
## MgrSkill     1.067e+00  3.325e-01   3.211 0.002115 ** 
## ServQual     3.704e-03  1.189e-02   0.311 0.756577    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.051 on 61 degrees of freedom
## Multiple R-squared:  0.4706, Adjusted R-squared:  0.3578 
## F-statistic: 4.172 on 13 and 61 DF,  p-value: 6.431e-05
PedCount <- lm(PedCount ~ ., data=store)
summary(PedCount)
## 
## Call:
## lm(formula = PedCount ~ ., data = store)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3216 -0.3949  0.0517  0.4661  1.3962 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  3.473e+00  1.313e+00   2.645  0.01038 * 
## store       -4.781e-03  4.159e-03  -1.150  0.25481   
## Sales        5.535e-07  7.496e-07   0.738  0.46312   
## Profit       4.607e-06  2.673e-06   1.724  0.08986 . 
## MTenure     -3.347e-03  1.977e-03  -1.693  0.09560 . 
## CTenure     -1.373e-02  5.315e-03  -2.583  0.01222 * 
## Pop          3.993e-05  1.958e-05   2.039  0.04578 * 
## Comp         1.831e-01  8.168e-02   2.241  0.02865 * 
## Visibility  -1.234e-01  1.154e-01  -1.069  0.28915   
## Res         -1.413e+00  4.788e-01  -2.951  0.00448 **
## Hours24     -7.873e-01  2.638e-01  -2.985  0.00408 **
## CrewSkill    2.765e-01  2.321e-01   1.191  0.23809   
## MgrSkill    -1.595e-01  2.377e-01  -0.671  0.50466   
## ServQual    -1.138e-02  7.764e-03  -1.466  0.14777   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6974 on 61 degrees of freedom
## Multiple R-squared:  0.5929, Adjusted R-squared:  0.5061 
## F-statistic: 6.833 on 13 and 61 DF,  p-value: 7.01e-08
Res <- lm(Res ~ ., data=store)
summary(Res)
## 
## Call:
## lm(formula = Res ~ ., data = store)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81339 -0.04105  0.00735  0.07665  0.24330 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.085e+00  3.178e-01   3.414  0.00114 **
## store       -9.573e-04  1.044e-03  -0.917  0.36293   
## Sales       -5.534e-09  1.883e-07  -0.029  0.97666   
## Profit       9.472e-07  6.740e-07   1.405  0.16496   
## MTenure     -8.402e-04  4.945e-04  -1.699  0.09438 . 
## CTenure     -4.394e-03  1.282e-03  -3.426  0.00110 **
## Pop         -6.827e-06  4.987e-06  -1.369  0.17603   
## Comp         5.013e-02  2.026e-02   2.474  0.01617 * 
## Visibility  -6.716e-03  2.913e-02  -0.231  0.81844   
## PedCount    -8.843e-02  2.996e-02  -2.951  0.00448 **
## Hours24     -1.692e-01  6.723e-02  -2.516  0.01450 * 
## CrewSkill    5.138e-02  5.836e-02   0.880  0.38207   
## MgrSkill    -5.881e-02  5.920e-02  -0.994  0.32439   
## ServQual     1.075e-03  1.971e-03   0.545  0.58751   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1745 on 61 degrees of freedom
## Multiple R-squared:  0.3553, Adjusted R-squared:  0.2179 
## F-statistic: 2.586 on 13 and 61 DF,  p-value: 0.006416
Hours24 <- lm(Hours24 ~ ., data=store)
summary(Hours24)
## 
## Call:
## lm(formula = Hours24 ~ ., data = store)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62502 -0.09926  0.04684  0.16918  0.51304 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.690e-01  6.228e-01   1.074 0.286982    
## store       -1.415e-03  1.897e-03  -0.746 0.458489    
## Sales        6.913e-07  3.297e-07   2.097 0.040185 *  
## Profit       3.440e-07  1.241e-06   0.277 0.782502    
## MTenure     -3.206e-03  8.203e-04  -3.908 0.000236 ***
## CTenure     -2.733e-03  2.514e-03  -1.087 0.281177    
## Pop         -1.959e-05  8.828e-06  -2.219 0.030221 *  
## Comp         9.337e-02  3.663e-02   2.549 0.013327 *  
## Visibility   1.236e-02  5.280e-02   0.234 0.815662    
## PedCount    -1.619e-01  5.423e-02  -2.985 0.004079 ** 
## Res         -5.559e-01  2.209e-01  -2.516 0.014501 *  
## CrewSkill    2.171e-01  1.028e-01   2.113 0.038692 *  
## MgrSkill    -1.182e-01  1.071e-01  -1.104 0.273963    
## ServQual    -3.119e-04  3.582e-03  -0.087 0.930881    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3162 on 61 degrees of freedom
## Multiple R-squared:  0.3948, Adjusted R-squared:  0.2659 
## F-statistic: 3.062 on 13 and 61 DF,  p-value: 0.001571
Visibility <- lm(Visibility ~ ., data=store)
summary(Visibility)
## 
## Call:
## lm(formula = Visibility ~ ., data = store)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.55435 -0.37048 -0.00491  0.46334  1.70508 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  3.983e+00  1.436e+00   2.774  0.00734 **
## store        1.406e-03  4.616e-03   0.305  0.76173   
## Sales       -1.610e-07  8.273e-07  -0.195  0.84638   
## Profit       1.901e-06  2.999e-06   0.634  0.52844   
## MTenure      1.325e-03  2.217e-03   0.598  0.55234   
## CTenure      1.186e-03  6.150e-03   0.193  0.84776   
## Pop          1.105e-05  2.220e-05   0.498  0.62060   
## Comp         4.069e-02  9.324e-02   0.436  0.66409   
## PedCount    -1.491e-01  1.394e-01  -1.069  0.28915   
## Res         -1.296e-01  5.623e-01  -0.231  0.81844   
## Hours24      7.263e-02  3.102e-01   0.234  0.81566   
## CrewSkill   -4.190e-01  2.524e-01  -1.660  0.10201   
## MgrSkill    -1.231e-01  2.617e-01  -0.470  0.63983   
## ServQual     8.878e-03  8.607e-03   1.031  0.30638   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7665 on 61 degrees of freedom
## Multiple R-squared:  0.1368, Adjusted R-squared:  -0.04714 
## F-statistic: 0.7437 on 13 and 61 DF,  p-value: 0.7135