4c setting directory and summerise

setwd("C:/Users/alouk/Downloads")
store <- read.csv("Store24.csv")
summary(store)
##      store          Sales             Profit          MTenure      
##  Min.   : 1.0   Min.   : 699306   Min.   :122180   Min.   :  0.00  
##  1st Qu.:19.5   1st Qu.: 984579   1st Qu.:211004   1st Qu.:  6.67  
##  Median :38.0   Median :1127332   Median :265014   Median : 24.12  
##  Mean   :38.0   Mean   :1205413   Mean   :276314   Mean   : 45.30  
##  3rd Qu.:56.5   3rd Qu.:1362388   3rd Qu.:331314   3rd Qu.: 50.92  
##  Max.   :75.0   Max.   :2113089   Max.   :518998   Max.   :277.99  
##     CTenure              Pop             Comp          Visibility  
##  Min.   :  0.8871   Min.   : 1046   Min.   : 1.651   Min.   :2.00  
##  1st Qu.:  4.3943   1st Qu.: 5616   1st Qu.: 3.151   1st Qu.:3.00  
##  Median :  7.2115   Median : 8896   Median : 3.629   Median :3.00  
##  Mean   : 13.9315   Mean   : 9826   Mean   : 3.788   Mean   :3.08  
##  3rd Qu.: 17.2156   3rd Qu.:14104   3rd Qu.: 4.230   3rd Qu.:4.00  
##  Max.   :114.1519   Max.   :26519   Max.   :11.128   Max.   :5.00  
##     PedCount         Res          Hours24       CrewSkill    
##  Min.   :1.00   Min.   :0.00   Min.   :0.00   Min.   :2.060  
##  1st Qu.:2.00   1st Qu.:1.00   1st Qu.:1.00   1st Qu.:3.225  
##  Median :3.00   Median :1.00   Median :1.00   Median :3.500  
##  Mean   :2.96   Mean   :0.96   Mean   :0.84   Mean   :3.457  
##  3rd Qu.:4.00   3rd Qu.:1.00   3rd Qu.:1.00   3rd Qu.:3.655  
##  Max.   :5.00   Max.   :1.00   Max.   :1.00   Max.   :4.640  
##     MgrSkill        ServQual     
##  Min.   :2.957   Min.   : 57.90  
##  1st Qu.:3.344   1st Qu.: 78.95  
##  Median :3.589   Median : 89.47  
##  Mean   :3.638   Mean   : 87.15  
##  3rd Qu.:3.925   3rd Qu.: 99.90  
##  Max.   :4.622   Max.   :100.00

4d standard daviation and mean of profit

#for Profit
mean(store$Profit)
## [1] 276313.6
sd(store$Profit)
## [1] 89404.08
#for Mtenure
mean(store$MTenure)
## [1] 45.29644
sd(store$MTenure)
## [1] 57.67155
##for Ctenure
mean(store$CTenure)
## [1] 13.9315
sd(store$CTenure)
## [1] 17.69752

TASK 4e - Sorting and Subsetting data in R

attach(mtcars)
View(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg (ascending)
View(newdata)
newdata[1:5,] # see the first 5 rows
##                      mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood  10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## Camaro Z28          13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## Duster 360          14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial   14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
newdata <- mtcars[order(-mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)

TASK 4f- Replicate Exhibit 1 shown in the case, using R

#Top most 10 profitable store
attach(store)
## The following object is masked _by_ .GlobalEnv:
## 
##     store
newdata1 <- store[order(-Profit),c("store","Profit","MTenure","CTenure")]
newdata1[1:10,]
##    store Profit   MTenure    CTenure
## 74    74 518998 171.09720  29.519510
## 7      7 476355  62.53080   7.326488
## 9      9 474725 108.99350   6.061602
## 6      6 469050 149.93590  11.351130
## 44    44 439781 182.23640 114.151900
## 2      2 424007  86.22219   6.636550
## 45    45 410149  47.64565   9.166325
## 18    18 394039 239.96980  33.774130
## 11    11 389886  44.81977   2.036961
## 47    47 387853  12.84790   6.636550
#Bottom 10 least profitable stores
newdata2 <- store[order(Profit),c("store","Profit","MTenure","CTenure")]
newdata2[1:10,]
##    store Profit     MTenure   CTenure
## 57    57 122180  24.3485700  2.956879
## 66    66 146058 115.2039000  3.876797
## 41    41 147327  14.9180200 11.926080
## 55    55 147672   6.6703910 18.365500
## 32    32 149033  36.0792600  6.636550
## 13    13 152513   0.6571813  1.577002
## 54    54 159792   6.6703910  3.876797
## 52    52 169201  24.1185600  3.416838
## 61    61 177046  21.8184200 13.305950
## 37    37 187765  23.1985000  1.347023

TASK 4g - Scatter Plots

library(car)
scatterplot(Profit~MTenure,main = "Scatterplot of Profit vs Mtenure",pch=19)

TASK 4h - Scatter Plots (contd.)

scatterplot(Profit~CTenure,main = "Scatterplot of Profit vs Ctenure",pch=19)

TASK 4i - Correlation Matrix

round(cor(store),2)
##            store Sales Profit MTenure CTenure   Pop  Comp Visibility
## store       1.00 -0.23  -0.20   -0.06    0.02 -0.29  0.03      -0.03
## Sales      -0.23  1.00   0.92    0.45    0.25  0.40 -0.24       0.13
## Profit     -0.20  0.92   1.00    0.44    0.26  0.43 -0.33       0.14
## MTenure    -0.06  0.45   0.44    1.00    0.24 -0.06  0.18       0.16
## CTenure     0.02  0.25   0.26    0.24    1.00  0.00 -0.07       0.07
## Pop        -0.29  0.40   0.43   -0.06    0.00  1.00 -0.27      -0.05
## Comp        0.03 -0.24  -0.33    0.18   -0.07 -0.27  1.00       0.03
## Visibility -0.03  0.13   0.14    0.16    0.07 -0.05  0.03       1.00
## PedCount   -0.22  0.42   0.45    0.06   -0.08  0.61 -0.15      -0.14
## Res        -0.03 -0.17  -0.16   -0.06   -0.34 -0.24  0.22       0.02
## Hours24     0.03  0.06  -0.03   -0.17    0.07 -0.22  0.13       0.05
## CrewSkill   0.05  0.16   0.16    0.10    0.26  0.28 -0.04      -0.20
## MgrSkill   -0.07  0.31   0.32    0.23    0.12  0.08  0.22       0.07
## ServQual   -0.32  0.39   0.36    0.18    0.08  0.12  0.02       0.21
##            PedCount   Res Hours24 CrewSkill MgrSkill ServQual
## store         -0.22 -0.03    0.03      0.05    -0.07    -0.32
## Sales          0.42 -0.17    0.06      0.16     0.31     0.39
## Profit         0.45 -0.16   -0.03      0.16     0.32     0.36
## MTenure        0.06 -0.06   -0.17      0.10     0.23     0.18
## CTenure       -0.08 -0.34    0.07      0.26     0.12     0.08
## Pop            0.61 -0.24   -0.22      0.28     0.08     0.12
## Comp          -0.15  0.22    0.13     -0.04     0.22     0.02
## Visibility    -0.14  0.02    0.05     -0.20     0.07     0.21
## PedCount       1.00 -0.28   -0.28      0.21     0.09    -0.01
## Res           -0.28  1.00   -0.09     -0.15    -0.03     0.09
## Hours24       -0.28 -0.09    1.00      0.11    -0.04     0.06
## CrewSkill      0.21 -0.15    0.11      1.00    -0.02    -0.03
## MgrSkill       0.09 -0.03   -0.04     -0.02     1.00     0.36
## ServQual      -0.01  0.09    0.06     -0.03     0.36     1.00

TASK 4j - Correlations

#Mtenure
round(cor(Profit,MTenure),2)
## [1] 0.44
#Ctenure
round(cor(Profit,CTenure),2)
## [1] 0.26

TASK 4k

library(corrplot)
## corrplot 0.84 loaded
corrplot.mixed(main = "Corrgram of store variables",corr = cor(store),lower = "shade",upper="pie")

TASK 4l - Pearson’s Correlation Tests

#Run a Pearson's Correlation test on the correlation between Profit and MTenure. What is the p-value?
cor.test(Profit,MTenure)
## 
##  Pearson's product-moment correlation
## 
## data:  Profit and MTenure
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2353497 0.6055175
## sample estimates:
##       cor 
## 0.4388692
#The p-value is 8.193e-05
#Run a Pearson's Correlation test on the correlation between Profit and CTenure. What is the p-value?
cor.test(Profit,CTenure)
## 
##  Pearson's product-moment correlation
## 
## data:  Profit and CTenure
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03262507 0.45786339
## sample estimates:
##       cor 
## 0.2576789
#The p-value is 0.02562

TASK 3m - Regression Analysis

mten<-lm(Profit~MTenure)
summary(mten)
## 
## Call:
## lm(formula = Profit ~ MTenure)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -177817  -52029   -8635   50871  188316 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 245496.3    11906.4  20.619  < 2e-16 ***
## MTenure        680.3      163.0   4.173 8.19e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 80880 on 73 degrees of freedom
## Multiple R-squared:  0.1926, Adjusted R-squared:  0.1815 
## F-statistic: 17.41 on 1 and 73 DF,  p-value: 8.193e-05
cten<-lm(Profit~CTenure)
summary(cten)
## 
## Call:
## lm(formula = Profit ~ CTenure)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -139848  -64869   -9022   45057  222393 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 258178.4    12814.4  20.148   <2e-16 ***
## CTenure       1301.7      571.3   2.279   0.0256 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86970 on 73 degrees of freedom
## Multiple R-squared:  0.0664, Adjusted R-squared:  0.05361 
## F-statistic: 5.192 on 1 and 73 DF,  p-value: 0.02562
comp<-lm(Profit~Comp)
summary(comp)
## 
## Call:
## lm(formula = Profit ~ Comp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -172707  -65521  -24559   56628  209205 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   362702      30119  12.042  < 2e-16 ***
## Comp          -22807       7520  -3.033  0.00335 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 84830 on 73 degrees of freedom
## Multiple R-squared:  0.1119, Adjusted R-squared:  0.09975 
## F-statistic:   9.2 on 1 and 73 DF,  p-value: 0.003351
pop<-lm(Profit~Pop)
summary(pop)
## 
## Call:
## lm(formula = Profit ~ Pop)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -152198  -52285  -17228   43501  235602 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.123e+05  1.829e+04  11.611  < 2e-16 ***
## Pop         6.513e+00  1.598e+00   4.077 0.000115 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 81240 on 73 degrees of freedom
## Multiple R-squared:  0.1854, Adjusted R-squared:  0.1743 
## F-statistic: 16.62 on 1 and 73 DF,  p-value: 0.000115
pc<-lm(Profit~PedCount)
summary(pc)
## 
## Call:
## lm(formula = Profit ~ PedCount)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -131878  -57678   -1538   45741  200501 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   156254      29373   5.320 1.09e-06 ***
## PedCount       40561       9415   4.308 5.06e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 80370 on 73 degrees of freedom
## Multiple R-squared:  0.2027, Adjusted R-squared:  0.1918 
## F-statistic: 18.56 on 1 and 73 DF,  p-value: 5.057e-05
res<-lm(Profit~Res)
summary(res)
## 
## Call:
## lm(formula = Profit ~ Res)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -151243  -62419   -9467   57891  245575 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   345696      51305   6.738 3.18e-09 ***
## Res           -72273      52363  -1.380    0.172    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 88860 on 73 degrees of freedom
## Multiple R-squared:  0.02543,    Adjusted R-squared:  0.01208 
## F-statistic: 1.905 on 1 and 73 DF,  p-value: 0.1717
h24<-lm(Profit~Hours24)
summary(h24)
## 
## Call:
## lm(formula = Profit ~ Hours24)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -153138  -64315  -11246   52884  237458 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   281540      25976   10.84   <2e-16 ***
## Hours24        -6222      28343   -0.22    0.827    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 89980 on 73 degrees of freedom
## Multiple R-squared:  0.0006598,  Adjusted R-squared:  -0.01303 
## F-statistic: 0.0482 on 1 and 73 DF,  p-value: 0.8268
vis<-lm(Profit~Visibility)
summary(vis)
## 
## Call:
## lm(formula = Profit ~ Visibility)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -152838  -63359  -10946   43839  243980 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   226431      43855   5.163 2.02e-06 ***
## Visibility     16196      13840   1.170    0.246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 89180 on 73 degrees of freedom
## Multiple R-squared:  0.01841,    Adjusted R-squared:  0.004966 
## F-statistic: 1.369 on 1 and 73 DF,  p-value: 0.2457

The explanatory variables whose beta-coefficients are statistically significant (p<0.05) are : MTenure, CTenure ,Comp,Pop,PedCount

The explanatory variables whose beta-coefficients are not statistically significant (p>0.05) are : Res,Hours24 , Visibility

The expected change in Profit with 1 month increase in Mtenure is 680.3

The expected change in Profit with 1 month increase in Ctenure is 1301.7

Executive Summary

Based on the regression analysis we can see that the Profit is significanly dependent upon MTenure, CTenure, Comp,Pop and PedCount.(p-value < 0.05)

Pop doesnt have a much significant change in the Profit(since the expectedchange in profit with 1 change in Pop is very low.