store24 <- read.csv(paste("Store24.csv", sep=""))  
View(store24)

data frame

attach
## function (what, pos = 2L, name = deparse(substitute(what)), warn.conflicts = TRUE) 
## {
##     checkConflicts <- function(env) {
##         dont.mind <- c("last.dump", "last.warning", ".Last.value", 
##             ".Random.seed", ".Last.lib", ".onDetach", ".packageName", 
##             ".noGenerics", ".required", ".no_S3_generics", ".requireCachedGenerics")
##         sp <- search()
##         for (i in seq_along(sp)) {
##             if (identical(env, as.environment(i))) {
##                 db.pos <- i
##                 break
##             }
##         }
##         ob <- names(as.environment(db.pos))
##         if (.isMethodsDispatchOn()) {
##             these <- ob[startsWith(ob, ".__T__")]
##             gen <- gsub(".__T__(.*):([^:]+)", "\\\\1", these)
##             from <- gsub(".__T__(.*):([^:]+)", "\\\\2", these)
##             gen <- gen[from != ".GlobalEnv"]
##             ob <- ob[!(ob %in% gen)]
##         }
##         ipos <- seq_along(sp)[-c(db.pos, match(c("Autoloads", 
##             "CheckExEnv"), sp, 0L))]
##         for (i in ipos) {
##             obj.same <- match(names(as.environment(i)), ob, nomatch = 0L)
##             if (any(obj.same > 0L)) {
##                 same <- ob[obj.same]
##                 same <- same[!(same %in% dont.mind)]
##                 Classobjs <- which(startsWith(same, ".__"))
##                 if (length(Classobjs)) 
##                   same <- same[-Classobjs]
##                 same.isFn <- function(where) vapply(same, exists, 
##                   NA, where = where, mode = "function", inherits = FALSE)
##                 same <- same[same.isFn(i) == same.isFn(db.pos)]
##                 if (length(same)) {
##                   pkg <- if (sum(sp == sp[i]) > 1L) 
##                     sprintf("%s (pos = %d)", sp[i], i)
##                   else sp[i]
##                   message(.maskedMsg(sort(same), pkg, by = i < 
##                     db.pos), domain = NA)
##                 }
##             }
##         }
##     }
##     if (pos == 1L) {
##         warning("*** 'pos=1' is not possible; setting 'pos=2' for now.\\n", 
##             "*** Note that 'pos=1' will give an error in the future")
##         pos <- 2L
##     }
##     if (is.character(what) && (length(what) == 1L)) {
##         if (!file.exists(what)) 
##             stop(gettextf("file '%s' not found", what), domain = NA)
##         if (missing(name)) 
##             name <- paste0("file:", what)
##         value <- .Internal(attach(NULL, pos, name))
##         load(what, envir = as.environment(pos))
##     }
##     else value <- .Internal(attach(what, pos, name))
##     if (warn.conflicts && !exists(".conflicts.OK", envir = value, 
##         inherits = FALSE)) {
##         checkConflicts(value)
##     }
##     if (length(names(value)) && .isMethodsDispatchOn()) 
##         methods::cacheMetaData(value, TRUE)
##     invisible(value)
## }
## <bytecode: 0x00000000066b1198>
## <environment: namespace:base>
#summarize

library(psych)
## Warning: package 'psych' was built under R version 3.4.3
summary(store24)
##      store          Sales             Profit          MTenure      
##  Min.   : 1.0   Min.   : 699306   Min.   :122180   Min.   :  0.00  
##  1st Qu.:19.5   1st Qu.: 984579   1st Qu.:211004   1st Qu.:  6.67  
##  Median :38.0   Median :1127332   Median :265014   Median : 24.12  
##  Mean   :38.0   Mean   :1205413   Mean   :276314   Mean   : 45.30  
##  3rd Qu.:56.5   3rd Qu.:1362388   3rd Qu.:331314   3rd Qu.: 50.92  
##  Max.   :75.0   Max.   :2113089   Max.   :518998   Max.   :277.99  
##     CTenure              Pop             Comp          Visibility  
##  Min.   :  0.8871   Min.   : 1046   Min.   : 1.651   Min.   :2.00  
##  1st Qu.:  4.3943   1st Qu.: 5616   1st Qu.: 3.151   1st Qu.:3.00  
##  Median :  7.2115   Median : 8896   Median : 3.629   Median :3.00  
##  Mean   : 13.9315   Mean   : 9826   Mean   : 3.788   Mean   :3.08  
##  3rd Qu.: 17.2156   3rd Qu.:14104   3rd Qu.: 4.230   3rd Qu.:4.00  
##  Max.   :114.1519   Max.   :26519   Max.   :11.128   Max.   :5.00  
##     PedCount         Res          Hours24       CrewSkill    
##  Min.   :1.00   Min.   :0.00   Min.   :0.00   Min.   :2.060  
##  1st Qu.:2.00   1st Qu.:1.00   1st Qu.:1.00   1st Qu.:3.225  
##  Median :3.00   Median :1.00   Median :1.00   Median :3.500  
##  Mean   :2.96   Mean   :0.96   Mean   :0.84   Mean   :3.457  
##  3rd Qu.:4.00   3rd Qu.:1.00   3rd Qu.:1.00   3rd Qu.:3.655  
##  Max.   :5.00   Max.   :1.00   Max.   :1.00   Max.   :4.640  
##     MgrSkill        ServQual     
##  Min.   :2.957   Min.   : 57.90  
##  1st Qu.:3.344   1st Qu.: 78.95  
##  Median :3.589   Median : 89.47  
##  Mean   :3.638   Mean   : 87.15  
##  3rd Qu.:3.925   3rd Qu.: 99.90  
##  Max.   :4.622   Max.   :100.00

describing

describe(store24)
##            vars  n       mean        sd     median    trimmed       mad
## store         1 75      38.00     21.79      38.00      38.00     28.17
## Sales         2 75 1205413.12 304531.31 1127332.00 1182031.25 288422.04
## Profit        3 75  276313.61  89404.08  265014.00  270260.34  90532.00
## MTenure       4 75      45.30     57.67      24.12      33.58     29.67
## CTenure       5 75      13.93     17.70       7.21      10.60      6.14
## Pop           6 75    9825.59   5911.67    8896.00    9366.07   7266.22
## Comp          7 75       3.79      1.31       3.63       3.66      0.82
## Visibility    8 75       3.08      0.75       3.00       3.07      0.00
## PedCount      9 75       2.96      0.99       3.00       2.97      1.48
## Res          10 75       0.96      0.20       1.00       1.00      0.00
## Hours24      11 75       0.84      0.37       1.00       0.92      0.00
## CrewSkill    12 75       3.46      0.41       3.50       3.47      0.34
## MgrSkill     13 75       3.64      0.41       3.59       3.62      0.45
## ServQual     14 75      87.15     12.61      89.47      88.62     15.61
##                  min        max      range  skew kurtosis       se
## store           1.00      75.00      74.00  0.00    -1.25     2.52
## Sales      699306.00 2113089.00 1413783.00  0.71    -0.09 35164.25
## Profit     122180.00  518998.00  396818.00  0.62    -0.21 10323.49
## MTenure         0.00     277.99     277.99  2.01     3.90     6.66
## CTenure         0.89     114.15     113.26  3.52    15.00     2.04
## Pop          1046.00   26519.00   25473.00  0.62    -0.23   682.62
## Comp            1.65      11.13       9.48  2.48    11.31     0.15
## Visibility      2.00       5.00       3.00  0.25    -0.38     0.09
## PedCount        1.00       5.00       4.00  0.00    -0.52     0.11
## Res             0.00       1.00       1.00 -4.60    19.43     0.02
## Hours24         0.00       1.00       1.00 -1.82     1.32     0.04
## CrewSkill       2.06       4.64       2.58 -0.43     1.64     0.05
## MgrSkill        2.96       4.62       1.67  0.27    -0.53     0.05
## ServQual       57.90     100.00      42.10 -0.66    -0.72     1.46

2(d).1 Use R to measure the mean and standard deviation of Profit.

mean(store24$Profit)
## [1] 276313.6
sd(store24$Profit)
## [1] 89404.08

2(d).2Use R to measure the mean and standard deviation of MTenure.

mean(store24$MTenure)
## [1] 45.29644
sd(store24$MTenure)
## [1] 57.67155

2(d).3Use R to measure the mean and standard deviation of CTenure.

mean(store24$CTenure)
## [1] 13.9315
sd(store24$CTenure)
## [1] 17.69752

2(e) Understand how Sorting and subsetting can done using R

attach(mtcars)
View(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg (ascending)
View(newdata)
newdata[1:5,] # see the first 5 rows
##                      mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood  10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## Camaro Z28          13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## Duster 360          14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial   14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
newdata <- mtcars[order(-mpg),] # sort by mpg (descending)
View(newdata)
detach(mtcars)

TASK 2f- Replicate Exhibit 1 shown in the case, using R

Q4 Use R to print the {StoreID, Sales, Profit, MTenure, CTenure} of the top 10 most profitable stores.

attach(store24)
store <- store24[order(-Profit),]
View(store)
store[1:10,1:5]
##    store   Sales Profit   MTenure    CTenure
## 74    74 1782957 518998 171.09720  29.519510
## 7      7 1809256 476355  62.53080   7.326488
## 9      9 2113089 474725 108.99350   6.061602
## 6      6 1703140 469050 149.93590  11.351130
## 44    44 1807740 439781 182.23640 114.151900
## 2      2 1619874 424007  86.22219   6.636550
## 45    45 1602362 410149  47.64565   9.166325
## 18    18 1704826 394039 239.96980  33.774130
## 11    11 1583446 389886  44.81977   2.036961
## 47    47 1665657 387853  12.84790   6.636550

Q5 Use R to print the {StoreID, Sales, Profit, MTenure, CTenure} of the bottom 10 least profitable stores.

store1 <- store24[order(Profit),]
View(store1)
store1[1:10,1:5]
##    store   Sales Profit     MTenure   CTenure
## 57    57  699306 122180  24.3485700  2.956879
## 66    66  879581 146058 115.2039000  3.876797
## 41    41  744211 147327  14.9180200 11.926080
## 55    55  925744 147672   6.6703910 18.365500
## 32    32  828918 149033  36.0792600  6.636550
## 13    13  857843 152513   0.6571813  1.577002
## 54    54  811190 159792   6.6703910  3.876797
## 52    52 1073008 169201  24.1185600  3.416838
## 61    61  716589 177046  21.8184200 13.305950
## 37    37 1202917 187765  23.1985000  1.347023

2(g)Scatter plot

Q6 Use R to draw a scatter plot of Profit vs. MTenure.

library(car)
## Warning: package 'car' was built under R version 3.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(x=store24$CTenure , y=store24$Profit)

#2(h)Scatter plot #Q7 Use R to draw a scatter plot of Profit vs. CTenure.

library(car)
scatterplot(x = store24$CTenure ,  y = store24$Profit)

2(i)Correlation Matrix

Q8Use R to construct a Correlation Matrix for all the variables in the dataset. (Display the numbers up to 2 Decimal places)

matt <- cor(store24)
round(matt,2)
##            store Sales Profit MTenure CTenure   Pop  Comp Visibility
## store       1.00 -0.23  -0.20   -0.06    0.02 -0.29  0.03      -0.03
## Sales      -0.23  1.00   0.92    0.45    0.25  0.40 -0.24       0.13
## Profit     -0.20  0.92   1.00    0.44    0.26  0.43 -0.33       0.14
## MTenure    -0.06  0.45   0.44    1.00    0.24 -0.06  0.18       0.16
## CTenure     0.02  0.25   0.26    0.24    1.00  0.00 -0.07       0.07
## Pop        -0.29  0.40   0.43   -0.06    0.00  1.00 -0.27      -0.05
## Comp        0.03 -0.24  -0.33    0.18   -0.07 -0.27  1.00       0.03
## Visibility -0.03  0.13   0.14    0.16    0.07 -0.05  0.03       1.00
## PedCount   -0.22  0.42   0.45    0.06   -0.08  0.61 -0.15      -0.14
## Res        -0.03 -0.17  -0.16   -0.06   -0.34 -0.24  0.22       0.02
## Hours24     0.03  0.06  -0.03   -0.17    0.07 -0.22  0.13       0.05
## CrewSkill   0.05  0.16   0.16    0.10    0.26  0.28 -0.04      -0.20
## MgrSkill   -0.07  0.31   0.32    0.23    0.12  0.08  0.22       0.07
## ServQual   -0.32  0.39   0.36    0.18    0.08  0.12  0.02       0.21
##            PedCount   Res Hours24 CrewSkill MgrSkill ServQual
## store         -0.22 -0.03    0.03      0.05    -0.07    -0.32
## Sales          0.42 -0.17    0.06      0.16     0.31     0.39
## Profit         0.45 -0.16   -0.03      0.16     0.32     0.36
## MTenure        0.06 -0.06   -0.17      0.10     0.23     0.18
## CTenure       -0.08 -0.34    0.07      0.26     0.12     0.08
## Pop            0.61 -0.24   -0.22      0.28     0.08     0.12
## Comp          -0.15  0.22    0.13     -0.04     0.22     0.02
## Visibility    -0.14  0.02    0.05     -0.20     0.07     0.21
## PedCount       1.00 -0.28   -0.28      0.21     0.09    -0.01
## Res           -0.28  1.00   -0.09     -0.15    -0.03     0.09
## Hours24       -0.28 -0.09    1.00      0.11    -0.04     0.06
## CrewSkill      0.21 -0.15    0.11      1.00    -0.02    -0.03
## MgrSkill       0.09 -0.03   -0.04     -0.02     1.00     0.36
## ServQual      -0.01  0.09    0.06     -0.03     0.36     1.00

2(j)Correlations

Q9 Use R to measure the correlation between Profit and MTenure. (Display the numbers up to 2 Decimal places)

ab <- cor(Profit , MTenure)
round(ab , 2)
## [1] 0.44

Q10Use R to measure the correlation between Profit and CTenure. (Display the numbers up to 2 Decimal places)

ab <- cor(Profit , CTenure)
round(ab , 2)
## [1] 0.26

2(k)

Q11 Use R to construct the following Corrgram based on all variables in the dataset.

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(store24, order=FALSE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of store variables intercorrelations")

#2(l) Pearson’s Correlation Tests

Q12Run a Pearson’s Correlation test on the correlation between Profit and MTenure. What is the p-value?

cor.test(y=store$Profit, x=store$MTenure)
## 
##  Pearson's product-moment correlation
## 
## data:  store$MTenure and store$Profit
## t = 4.1731, df = 73, p-value = 8.193e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2353497 0.6055175
## sample estimates:
##       cor 
## 0.4388692

Q13Run a Pearson’s Correlation test on the correlation between Profit and CTenure. What is the p-value?

cor.test(y=store$Profit, x=store$CTenure)
## 
##  Pearson's product-moment correlation
## 
## data:  store$CTenure and store$Profit
## t = 2.2786, df = 73, p-value = 0.02562
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03262507 0.45786339
## sample estimates:
##       cor 
## 0.2576789

2m - Regression Analysis

Q14 Run a regression of Profit on {MTenure, CTenure Comp, Pop, PedCount, Res, Hours24, Visibility}.

fit <- lm(Profit ~ MTenure + CTenure + Comp + Pop + PedCount + Res + Hours24 + Visibility  ,data=store24)
summary(fit)
## 
## Call:
## lm(formula = Profit ~ MTenure + CTenure + Comp + Pop + PedCount + 
##     Res + Hours24 + Visibility, data = store24)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -105789  -35946   -7069   33780  112390 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7610.041  66821.994   0.114 0.909674    
## MTenure        760.993    127.086   5.988 9.72e-08 ***
## CTenure        944.978    421.687   2.241 0.028400 *  
## Comp        -25286.887   5491.937  -4.604 1.94e-05 ***
## Pop              3.667      1.466   2.501 0.014890 *  
## PedCount     34087.359   9073.196   3.757 0.000366 ***
## Res          91584.675  39231.283   2.334 0.022623 *  
## Hours24      63233.307  19641.114   3.219 0.001994 ** 
## Visibility   12625.447   9087.620   1.389 0.169411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56970 on 66 degrees of freedom
## Multiple R-squared:  0.6379, Adjusted R-squared:  0.594 
## F-statistic: 14.53 on 8 and 66 DF,  p-value: 5.382e-12

2(n)Based on TASK 2m, answer the following questions:

Q15 List the explanatory variable(s) whose beta-coefficients are statistically significant (p < 0.05).

summary(fit)$coef[summary(fit)$coef[,4] <= .05, 4]
##      MTenure      CTenure         Comp          Pop     PedCount 
## 9.715897e-08 2.839955e-02 1.938381e-05 1.489046e-02 3.664408e-04 
##          Res      Hours24 
## 2.262320e-02 1.993586e-03

Q16 List the explanatory variable(s) whose beta-coefficients are not statistically significant (p > 0.05).

summary(fit)$coef[summary(fit)$coef[,4] > .05, 4]
## (Intercept)  Visibility 
##   0.9096745   0.1694106

Based on task2(m)

Q17 What is expected change in the Profit at a store, if the Manager’s tenure i.e. number of months of experience with Store24, increases by one month?

round(summary(fit)$coefficients["MTenure",1], digits=0)
## [1] 761

Q18 What is expected change in the Profit at a store, if the Crew’s tenure i.e. number of months of experience with Store24, increases by one month?

round(summary(fit)$coefficients["CTenure",1], digits=0)
## [1] 945

Based on task 2(n)

Q19 “Executive Summary”

1 The mean Of profit, Mtenure,Ctenure is 276313.6,45.29,13.93 respectively

2 The Sd deviation is 89404.08,57.67,17.63 resp.

3 We can do sorting and subsetting using r

4 The min profit occurs on Stoe:ID 57(122180) and Max profit is made by store:ID 74(518998)

5 The correalation between profit and Mtenure is 0.44 And between profit and Ctenure is 0.26.

While in the pearson correlation test the value is 0.4388 and 0.2577 respectively.