Reading the dataset

MarCamp.df <- read.csv(paste("WA_Fn-UseC_-Marketing-Campaign-Eff-UseC_-FastF.csv", sep=""))
str(MarCamp.df)
## 'data.frame':    548 obs. of  7 variables:
##  $ MarketID        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ MarketSize      : Factor w/ 3 levels "Large","Medium",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ LocationID      : int  1 1 1 1 2 2 2 2 3 3 ...
##  $ AgeOfStore      : int  4 4 4 4 5 5 5 5 12 12 ...
##  $ Promotion       : int  3 3 3 3 2 2 2 2 1 1 ...
##  $ week            : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ SalesInThousands: num  33.7 35.7 29 39.2 27.8 ...
dim(MarCamp.df)
## [1] 548   7
## Converting sex, quarter and first language columns to factors from integers

MarCamp.df$Promotion <- as.factor(MarCamp.df$Promotion)
MarCamp.df$week <- as.factor(MarCamp.df$week)
MarCamp.df$MarketID <- as.factor(MarCamp.df$MarketID)
str(MarCamp.df)
## 'data.frame':    548 obs. of  7 variables:
##  $ MarketID        : Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ MarketSize      : Factor w/ 3 levels "Large","Medium",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ LocationID      : int  1 1 1 1 2 2 2 2 3 3 ...
##  $ AgeOfStore      : int  4 4 4 4 5 5 5 5 12 12 ...
##  $ Promotion       : Factor w/ 3 levels "1","2","3": 3 3 3 3 2 2 2 2 1 1 ...
##  $ week            : Factor w/ 4 levels "1","2","3","4": 1 2 3 4 1 2 3 4 1 2 ...
##  $ SalesInThousands: num  33.7 35.7 29 39.2 27.8 ...

Descriptive statistics (min, max, median etc) of each variable

summary(MarCamp.df)
##     MarketID    MarketSize    LocationID      AgeOfStore     Promotion
##  3      : 88   Large :168   Min.   :  1.0   Min.   : 1.000   1:172    
##  10     : 80   Medium:320   1st Qu.:216.0   1st Qu.: 4.000   2:188    
##  5      : 60   Small : 60   Median :504.0   Median : 7.000   3:188    
##  6      : 60                Mean   :479.7   Mean   : 8.504            
##  7      : 60                3rd Qu.:708.0   3rd Qu.:12.000            
##  1      : 52                Max.   :920.0   Max.   :28.000            
##  (Other):148                                                          
##  week    SalesInThousands
##  1:137   Min.   :17.34   
##  2:137   1st Qu.:42.55   
##  3:137   Median :50.20   
##  4:137   Mean   :53.47   
##          3rd Qu.:60.48   
##          Max.   :99.65   
## 
library(psych)
describe(MarCamp.df)
##                  vars   n   mean     sd median trimmed    mad   min    max
## MarketID*           1 548   5.72   2.88    6.0    5.76   4.45  1.00  10.00
## MarketSize*         2 548   1.80   0.61    2.0    1.75   0.00  1.00   3.00
## LocationID          3 548 479.66 287.97  504.0  483.96 421.06  1.00 920.00
## AgeOfStore          4 548   8.50   6.64    7.0    7.63   5.93  1.00  28.00
## Promotion*          5 548   2.03   0.81    2.0    2.04   1.48  1.00   3.00
## week*               6 548   2.50   1.12    2.5    2.50   1.48  1.00   4.00
## SalesInThousands    7 548  53.47  16.76   50.2   52.02  12.76 17.34  99.65
##                   range  skew kurtosis    se
## MarketID*          9.00 -0.02    -1.18  0.12
## MarketSize*        2.00  0.14    -0.53  0.03
## LocationID       919.00 -0.02    -1.16 12.30
## AgeOfStore        27.00  1.04     0.35  0.28
## Promotion*         2.00 -0.05    -1.48  0.03
## week*              3.00  0.00    -1.37  0.05
## SalesInThousands  82.31  0.80     0.14  0.72

Effect of the three promotion strategies on sales

boxplot(SalesInThousands ~ Promotion,data=MarCamp.df, 
        main="Plot of Promotion Strategy vs Sales", ylab="Promotion Strategy", 
        xlab="Sales in Thousands", horizontal=TRUE,
        col=c("red","blue","yellow"))

Effect of Age of Store on Sales

plot(SalesInThousands ~ AgeOfStore , data=MarCamp.df, 
    xlab="Age Of Store", ylab="Sales in Thousands", 
    main="Visualization of Sales wrt Age of store")

Effect of Market Size on Sales

boxplot(SalesInThousands ~ MarketSize ,data=MarCamp.df, 
        main="Plot of Market Size and Sales", ylab="Market Size", 
        xlab="Sales In Thousands", horizontal=TRUE,
        col=c("red","blue","peachpuff","yellow", "green", "pink"))

Effect of Location ID on Promotional Strategy

boxplot(LocationID ~ Promotion ,data=MarCamp.df, 
        main="Plot of Promotion Strategy and Location ID", ylab="Promotion Strategy", 
        xlab="Location ID", horizontal=TRUE,
        col=c("red","blue","peachpuff","yellow", "green", "pink"))

Effect of Market Size on Sales

library(lattice)
histogram(~SalesInThousands | MarketSize, data=MarCamp.df)

Relationship between Market Size and Promotional Strategy

library(lattice)
histogram(~Promotion | MarketSize, data=MarCamp.df)

Percentage of Sales wrt market size and Promotion Strategies

# histogram of percentages
histogram(~SalesInThousands | Promotion + MarketSize, data=MarCamp.df,
#          type="count", 
          layout=c(3,3), 
          col=c("burlywood", "darkolivegreen", "red", "yellow", "peachpuff", "blue"))

Relation between market size, Promotion Strategies and week

# histogram of counts
histogram(~week | Promotion + MarketSize, data=MarCamp.df,
          type="count", 
          layout=c(3,3), 
          col=c("burlywood", "darkolivegreen", "red", "yellow", "peachpuff", "blue"))

Mean Sales wrt Market Size, Promotion, Age of Store and week

aggregate(SalesInThousands ~ MarketSize, data = MarCamp.df, mean)
##   MarketSize SalesInThousands
## 1      Large         70.11673
## 2     Medium         43.98534
## 3      Small         57.40933
aggregate(SalesInThousands ~ Promotion, data = MarCamp.df, mean)
##   Promotion SalesInThousands
## 1         1         58.09901
## 2         2         47.32941
## 3         3         55.36447
aggregate(SalesInThousands ~ AgeOfStore, data = MarCamp.df, mean)
##    AgeOfStore SalesInThousands
## 1           1         58.41562
## 2           2         59.17950
## 3           3         60.22750
## 4           4         53.43773
## 5           5         48.81864
## 6           6         51.36667
## 7           7         52.12875
## 8           8         50.47575
## 9           9         48.99607
## 10         10         39.31375
## 11         11         57.15937
## 12         12         47.48292
## 13         13         59.64250
## 14         14         49.06333
## 15         15         42.67375
## 16         17         49.93750
## 17         18         50.71000
## 18         19         63.63800
## 19         20         60.20250
## 20         22         59.68833
## 21         23         65.09750
## 22         24         51.14083
## 23         25         45.42500
## 24         27         52.39250
## 25         28         52.28500
aggregate(SalesInThousands ~ week, data = MarCamp.df, mean)
##   week SalesInThousands
## 1    1         53.79058
## 2    2         53.38657
## 3    3         53.47460
## 4    4         53.21307

Combined mean sales and age of store wrt market size and promotional strategies

aggregate(cbind(SalesInThousands, AgeOfStore) ~ MarketSize, data = MarCamp.df, mean)
##   MarketSize SalesInThousands AgeOfStore
## 1      Large         70.11673   7.142857
## 2     Medium         43.98534   8.787500
## 3      Small         57.40933  10.800000
aggregate(cbind(SalesInThousands, AgeOfStore) ~ Promotion, data = MarCamp.df, mean)
##   Promotion SalesInThousands AgeOfStore
## 1         1         58.09901   8.279070
## 2         2         47.32941   7.978723
## 3         3         55.36447   9.234043

Number of stores wrt Promotion strategies and market size in One way Contingency Tables

with(MarCamp.df, table(Promotion))
## Promotion
##   1   2   3 
## 172 188 188
with(MarCamp.df, table(MarketSize))
## MarketSize
##  Large Medium  Small 
##    168    320     60
prop.table(with(MarCamp.df, table(Promotion)))*100 # percentages
## Promotion
##        1        2        3 
## 31.38686 34.30657 34.30657
prop.table(with(MarCamp.df, table(MarketSize)))*100 # percentages
## MarketSize
##    Large   Medium    Small 
## 30.65693 58.39416 10.94891

Number of stores wrt Promotion strategies and market size in two way Contingency Tables

mytable1 <- xtabs(~ MarketSize+Promotion, data=MarCamp.df)
mytable1 # frequencies
##           Promotion
## MarketSize   1   2   3
##     Large   56  64  48
##     Medium  96 108 116
##     Small   20  16  24
addmargins(mytable1)
##           Promotion
## MarketSize   1   2   3 Sum
##     Large   56  64  48 168
##     Medium  96 108 116 320
##     Small   20  16  24  60
##     Sum    172 188 188 548
library(gmodels) 
CrossTable(MarCamp.df$MarketSize, MarCamp.df$Promotion)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  548 
## 
##  
##                       | MarCamp.df$Promotion 
## MarCamp.df$MarketSize |         1 |         2 |         3 | Row Total | 
## ----------------------|-----------|-----------|-----------|-----------|
##                 Large |        56 |        64 |        48 |       168 | 
##                       |     0.203 |     0.703 |     1.611 |           | 
##                       |     0.333 |     0.381 |     0.286 |     0.307 | 
##                       |     0.326 |     0.340 |     0.255 |           | 
##                       |     0.102 |     0.117 |     0.088 |           | 
## ----------------------|-----------|-----------|-----------|-----------|
##                Medium |        96 |       108 |       116 |       320 | 
##                       |     0.196 |     0.029 |     0.352 |           | 
##                       |     0.300 |     0.338 |     0.362 |     0.584 | 
##                       |     0.558 |     0.574 |     0.617 |           | 
##                       |     0.175 |     0.197 |     0.212 |           | 
## ----------------------|-----------|-----------|-----------|-----------|
##                 Small |        20 |        16 |        24 |        60 | 
##                       |     0.072 |     1.021 |     0.567 |           | 
##                       |     0.333 |     0.267 |     0.400 |     0.109 | 
##                       |     0.116 |     0.085 |     0.128 |           | 
##                       |     0.036 |     0.029 |     0.044 |           | 
## ----------------------|-----------|-----------|-----------|-----------|
##          Column Total |       172 |       188 |       188 |       548 | 
##                       |     0.314 |     0.343 |     0.343 |           | 
## ----------------------|-----------|-----------|-----------|-----------|
## 
## 

Number of stores wrt Promotion strategies, market size and week in three way Contingency Tables

mytable2 <- xtabs(~ MarketSize+week+Promotion, data=MarCamp.df)
mytable2
## , , Promotion = 1
## 
##           week
## MarketSize  1  2  3  4
##     Large  14 14 14 14
##     Medium 24 24 24 24
##     Small   5  5  5  5
## 
## , , Promotion = 2
## 
##           week
## MarketSize  1  2  3  4
##     Large  16 16 16 16
##     Medium 27 27 27 27
##     Small   4  4  4  4
## 
## , , Promotion = 3
## 
##           week
## MarketSize  1  2  3  4
##     Large  12 12 12 12
##     Medium 29 29 29 29
##     Small   6  6  6  6
ftable(mytable2) # Compact 3-way Table
##                 Promotion  1  2  3
## MarketSize week                   
## Large      1              14 16 12
##            2              14 16 12
##            3              14 16 12
##            4              14 16 12
## Medium     1              24 27 29
##            2              24 27 29
##            3              24 27 29
##            4              24 27 29
## Small      1               5  4  6
##            2               5  4  6
##            3               5  4  6
##            4               5  4  6

Chi-sq test for effect of Market Size on Promotion

library(psych)
#Conversion from factor to numeric form for making the correlation matrix
MarCamp.df$Promotion <- as.integer(MarCamp.df$Promotion)
MarCamp.df$week <- as.integer(MarCamp.df$week)
MarCamp.df$MarketID <- as.integer(MarCamp.df$MarketID)
MarCamp.df$MarketSize <- as.integer(MarCamp.df$MarketSize) # 1-Small, 2-Medium, 3-Large
str(MarCamp.df)
## 'data.frame':    548 obs. of  7 variables:
##  $ MarketID        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ MarketSize      : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ LocationID      : int  1 1 1 1 2 2 2 2 3 3 ...
##  $ AgeOfStore      : int  4 4 4 4 5 5 5 5 12 12 ...
##  $ Promotion       : int  3 3 3 3 2 2 2 2 1 1 ...
##  $ week            : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ SalesInThousands: num  33.7 35.7 29 39.2 27.8 ...
corr.test(MarCamp.df, use="complete")
## Call:corr.test(x = MarCamp.df, use = "complete")
## Correlation matrix 
##                  MarketID MarketSize LocationID AgeOfStore Promotion  week
## MarketID             1.00      -0.26       1.00      -0.05     -0.05  0.00
## MarketSize          -0.26       1.00      -0.27       0.16      0.06  0.00
## LocationID           1.00      -0.27       1.00      -0.05     -0.05  0.00
## AgeOfStore          -0.05       0.16      -0.05       1.00      0.06  0.00
## Promotion           -0.05       0.06      -0.05       0.06      1.00  0.00
## week                 0.00       0.00       0.00       0.00      0.00  1.00
## SalesInThousands    -0.19      -0.45      -0.19      -0.03     -0.06 -0.01
##                  SalesInThousands
## MarketID                    -0.19
## MarketSize                  -0.45
## LocationID                  -0.19
## AgeOfStore                  -0.03
## Promotion                   -0.06
## week                        -0.01
## SalesInThousands             1.00
## Sample Size 
## [1] 548
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##                  MarketID MarketSize LocationID AgeOfStore Promotion week
## MarketID             0.00       0.00       0.00       1.00      1.00  1.0
## MarketSize           0.00       0.00       0.00       0.00      1.00  1.0
## LocationID           0.00       0.00       0.00       1.00      1.00  1.0
## AgeOfStore           0.24       0.00       0.24       0.00      1.00  1.0
## Promotion            0.28       0.19       0.24       0.16      0.00  1.0
## week                 1.00       1.00       1.00       1.00      1.00  0.0
## SalesInThousands     0.00       0.00       0.00       0.51      0.17  0.8
##                  SalesInThousands
## MarketID                        0
## MarketSize                      0
## LocationID                      0
## AgeOfStore                      1
## Promotion                       1
## week                            1
## SalesInThousands                0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

Construct a Corrgram based on all variables in the dataset.

library(corrgram)
corrgram(MarCamp.df[, names(MarCamp.df)], order=FALSE,
         main="Corrgram of dataset variables",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)

Construct a scatter plot matrix of the dataset.

# Converting from Integer to Factor
MarCamp.df$Promotion <- as.factor(MarCamp.df$Promotion)
MarCamp.df$week <- as.factor(MarCamp.df$week)
MarCamp.df$MarketID <- as.factor(MarCamp.df$MarketID)
MarCamp.df$MarketSize <- as.factor(MarCamp.df$MarketSize)
library(car)
scatterplotMatrix(formula = ~ MarketID + MarketSize + LocationID + AgeOfStore +
                  Promotion + week + SalesInThousands, cex=0.6, data=MarCamp.df, 
                  diagonal="histogram")