MarCamp.df <- read.csv(paste("WA_Fn-UseC_-Marketing-Campaign-Eff-UseC_-FastF.csv", sep=""))
str(MarCamp.df)
## 'data.frame': 548 obs. of 7 variables:
## $ MarketID : int 1 1 1 1 1 1 1 1 1 1 ...
## $ MarketSize : Factor w/ 3 levels "Large","Medium",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LocationID : int 1 1 1 1 2 2 2 2 3 3 ...
## $ AgeOfStore : int 4 4 4 4 5 5 5 5 12 12 ...
## $ Promotion : int 3 3 3 3 2 2 2 2 1 1 ...
## $ week : int 1 2 3 4 1 2 3 4 1 2 ...
## $ SalesInThousands: num 33.7 35.7 29 39.2 27.8 ...
dim(MarCamp.df)
## [1] 548 7
## Converting sex, quarter and first language columns to factors from integers
MarCamp.df$Promotion <- as.factor(MarCamp.df$Promotion)
MarCamp.df$week <- as.factor(MarCamp.df$week)
MarCamp.df$MarketID <- as.factor(MarCamp.df$MarketID)
str(MarCamp.df)
## 'data.frame': 548 obs. of 7 variables:
## $ MarketID : Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ MarketSize : Factor w/ 3 levels "Large","Medium",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LocationID : int 1 1 1 1 2 2 2 2 3 3 ...
## $ AgeOfStore : int 4 4 4 4 5 5 5 5 12 12 ...
## $ Promotion : Factor w/ 3 levels "1","2","3": 3 3 3 3 2 2 2 2 1 1 ...
## $ week : Factor w/ 4 levels "1","2","3","4": 1 2 3 4 1 2 3 4 1 2 ...
## $ SalesInThousands: num 33.7 35.7 29 39.2 27.8 ...
summary(MarCamp.df)
## MarketID MarketSize LocationID AgeOfStore Promotion
## 3 : 88 Large :168 Min. : 1.0 Min. : 1.000 1:172
## 10 : 80 Medium:320 1st Qu.:216.0 1st Qu.: 4.000 2:188
## 5 : 60 Small : 60 Median :504.0 Median : 7.000 3:188
## 6 : 60 Mean :479.7 Mean : 8.504
## 7 : 60 3rd Qu.:708.0 3rd Qu.:12.000
## 1 : 52 Max. :920.0 Max. :28.000
## (Other):148
## week SalesInThousands
## 1:137 Min. :17.34
## 2:137 1st Qu.:42.55
## 3:137 Median :50.20
## 4:137 Mean :53.47
## 3rd Qu.:60.48
## Max. :99.65
##
library(psych)
describe(MarCamp.df)
## vars n mean sd median trimmed mad min max
## MarketID* 1 548 5.72 2.88 6.0 5.76 4.45 1.00 10.00
## MarketSize* 2 548 1.80 0.61 2.0 1.75 0.00 1.00 3.00
## LocationID 3 548 479.66 287.97 504.0 483.96 421.06 1.00 920.00
## AgeOfStore 4 548 8.50 6.64 7.0 7.63 5.93 1.00 28.00
## Promotion* 5 548 2.03 0.81 2.0 2.04 1.48 1.00 3.00
## week* 6 548 2.50 1.12 2.5 2.50 1.48 1.00 4.00
## SalesInThousands 7 548 53.47 16.76 50.2 52.02 12.76 17.34 99.65
## range skew kurtosis se
## MarketID* 9.00 -0.02 -1.18 0.12
## MarketSize* 2.00 0.14 -0.53 0.03
## LocationID 919.00 -0.02 -1.16 12.30
## AgeOfStore 27.00 1.04 0.35 0.28
## Promotion* 2.00 -0.05 -1.48 0.03
## week* 3.00 0.00 -1.37 0.05
## SalesInThousands 82.31 0.80 0.14 0.72
boxplot(SalesInThousands ~ Promotion,data=MarCamp.df,
main="Plot of Promotion Strategy vs Sales", ylab="Promotion Strategy",
xlab="Sales in Thousands", horizontal=TRUE,
col=c("red","blue","yellow"))
plot(SalesInThousands ~ AgeOfStore , data=MarCamp.df,
xlab="Age Of Store", ylab="Sales in Thousands",
main="Visualization of Sales wrt Age of store")
boxplot(SalesInThousands ~ MarketSize ,data=MarCamp.df,
main="Plot of Market Size and Sales", ylab="Market Size",
xlab="Sales In Thousands", horizontal=TRUE,
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(LocationID ~ Promotion ,data=MarCamp.df,
main="Plot of Promotion Strategy and Location ID", ylab="Promotion Strategy",
xlab="Location ID", horizontal=TRUE,
col=c("red","blue","peachpuff","yellow", "green", "pink"))
library(lattice)
histogram(~SalesInThousands | MarketSize, data=MarCamp.df)
library(lattice)
histogram(~Promotion | MarketSize, data=MarCamp.df)
# histogram of percentages
histogram(~SalesInThousands | Promotion + MarketSize, data=MarCamp.df,
# type="count",
layout=c(3,3),
col=c("burlywood", "darkolivegreen", "red", "yellow", "peachpuff", "blue"))
# histogram of counts
histogram(~week | Promotion + MarketSize, data=MarCamp.df,
type="count",
layout=c(3,3),
col=c("burlywood", "darkolivegreen", "red", "yellow", "peachpuff", "blue"))
aggregate(SalesInThousands ~ MarketSize, data = MarCamp.df, mean)
## MarketSize SalesInThousands
## 1 Large 70.11673
## 2 Medium 43.98534
## 3 Small 57.40933
aggregate(SalesInThousands ~ Promotion, data = MarCamp.df, mean)
## Promotion SalesInThousands
## 1 1 58.09901
## 2 2 47.32941
## 3 3 55.36447
aggregate(SalesInThousands ~ AgeOfStore, data = MarCamp.df, mean)
## AgeOfStore SalesInThousands
## 1 1 58.41562
## 2 2 59.17950
## 3 3 60.22750
## 4 4 53.43773
## 5 5 48.81864
## 6 6 51.36667
## 7 7 52.12875
## 8 8 50.47575
## 9 9 48.99607
## 10 10 39.31375
## 11 11 57.15937
## 12 12 47.48292
## 13 13 59.64250
## 14 14 49.06333
## 15 15 42.67375
## 16 17 49.93750
## 17 18 50.71000
## 18 19 63.63800
## 19 20 60.20250
## 20 22 59.68833
## 21 23 65.09750
## 22 24 51.14083
## 23 25 45.42500
## 24 27 52.39250
## 25 28 52.28500
aggregate(SalesInThousands ~ week, data = MarCamp.df, mean)
## week SalesInThousands
## 1 1 53.79058
## 2 2 53.38657
## 3 3 53.47460
## 4 4 53.21307
aggregate(cbind(SalesInThousands, AgeOfStore) ~ MarketSize, data = MarCamp.df, mean)
## MarketSize SalesInThousands AgeOfStore
## 1 Large 70.11673 7.142857
## 2 Medium 43.98534 8.787500
## 3 Small 57.40933 10.800000
aggregate(cbind(SalesInThousands, AgeOfStore) ~ Promotion, data = MarCamp.df, mean)
## Promotion SalesInThousands AgeOfStore
## 1 1 58.09901 8.279070
## 2 2 47.32941 7.978723
## 3 3 55.36447 9.234043
with(MarCamp.df, table(Promotion))
## Promotion
## 1 2 3
## 172 188 188
with(MarCamp.df, table(MarketSize))
## MarketSize
## Large Medium Small
## 168 320 60
prop.table(with(MarCamp.df, table(Promotion)))*100 # percentages
## Promotion
## 1 2 3
## 31.38686 34.30657 34.30657
prop.table(with(MarCamp.df, table(MarketSize)))*100 # percentages
## MarketSize
## Large Medium Small
## 30.65693 58.39416 10.94891
mytable1 <- xtabs(~ MarketSize+Promotion, data=MarCamp.df)
mytable1 # frequencies
## Promotion
## MarketSize 1 2 3
## Large 56 64 48
## Medium 96 108 116
## Small 20 16 24
addmargins(mytable1)
## Promotion
## MarketSize 1 2 3 Sum
## Large 56 64 48 168
## Medium 96 108 116 320
## Small 20 16 24 60
## Sum 172 188 188 548
library(gmodels)
CrossTable(MarCamp.df$MarketSize, MarCamp.df$Promotion)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 548
##
##
## | MarCamp.df$Promotion
## MarCamp.df$MarketSize | 1 | 2 | 3 | Row Total |
## ----------------------|-----------|-----------|-----------|-----------|
## Large | 56 | 64 | 48 | 168 |
## | 0.203 | 0.703 | 1.611 | |
## | 0.333 | 0.381 | 0.286 | 0.307 |
## | 0.326 | 0.340 | 0.255 | |
## | 0.102 | 0.117 | 0.088 | |
## ----------------------|-----------|-----------|-----------|-----------|
## Medium | 96 | 108 | 116 | 320 |
## | 0.196 | 0.029 | 0.352 | |
## | 0.300 | 0.338 | 0.362 | 0.584 |
## | 0.558 | 0.574 | 0.617 | |
## | 0.175 | 0.197 | 0.212 | |
## ----------------------|-----------|-----------|-----------|-----------|
## Small | 20 | 16 | 24 | 60 |
## | 0.072 | 1.021 | 0.567 | |
## | 0.333 | 0.267 | 0.400 | 0.109 |
## | 0.116 | 0.085 | 0.128 | |
## | 0.036 | 0.029 | 0.044 | |
## ----------------------|-----------|-----------|-----------|-----------|
## Column Total | 172 | 188 | 188 | 548 |
## | 0.314 | 0.343 | 0.343 | |
## ----------------------|-----------|-----------|-----------|-----------|
##
##
mytable2 <- xtabs(~ MarketSize+week+Promotion, data=MarCamp.df)
mytable2
## , , Promotion = 1
##
## week
## MarketSize 1 2 3 4
## Large 14 14 14 14
## Medium 24 24 24 24
## Small 5 5 5 5
##
## , , Promotion = 2
##
## week
## MarketSize 1 2 3 4
## Large 16 16 16 16
## Medium 27 27 27 27
## Small 4 4 4 4
##
## , , Promotion = 3
##
## week
## MarketSize 1 2 3 4
## Large 12 12 12 12
## Medium 29 29 29 29
## Small 6 6 6 6
ftable(mytable2) # Compact 3-way Table
## Promotion 1 2 3
## MarketSize week
## Large 1 14 16 12
## 2 14 16 12
## 3 14 16 12
## 4 14 16 12
## Medium 1 24 27 29
## 2 24 27 29
## 3 24 27 29
## 4 24 27 29
## Small 1 5 4 6
## 2 5 4 6
## 3 5 4 6
## 4 5 4 6
library(psych)
#Conversion from factor to numeric form for making the correlation matrix
MarCamp.df$Promotion <- as.integer(MarCamp.df$Promotion)
MarCamp.df$week <- as.integer(MarCamp.df$week)
MarCamp.df$MarketID <- as.integer(MarCamp.df$MarketID)
MarCamp.df$MarketSize <- as.integer(MarCamp.df$MarketSize) # 1-Small, 2-Medium, 3-Large
str(MarCamp.df)
## 'data.frame': 548 obs. of 7 variables:
## $ MarketID : int 1 1 1 1 1 1 1 1 1 1 ...
## $ MarketSize : int 2 2 2 2 2 2 2 2 2 2 ...
## $ LocationID : int 1 1 1 1 2 2 2 2 3 3 ...
## $ AgeOfStore : int 4 4 4 4 5 5 5 5 12 12 ...
## $ Promotion : int 3 3 3 3 2 2 2 2 1 1 ...
## $ week : int 1 2 3 4 1 2 3 4 1 2 ...
## $ SalesInThousands: num 33.7 35.7 29 39.2 27.8 ...
corr.test(MarCamp.df, use="complete")
## Call:corr.test(x = MarCamp.df, use = "complete")
## Correlation matrix
## MarketID MarketSize LocationID AgeOfStore Promotion week
## MarketID 1.00 -0.26 1.00 -0.05 -0.05 0.00
## MarketSize -0.26 1.00 -0.27 0.16 0.06 0.00
## LocationID 1.00 -0.27 1.00 -0.05 -0.05 0.00
## AgeOfStore -0.05 0.16 -0.05 1.00 0.06 0.00
## Promotion -0.05 0.06 -0.05 0.06 1.00 0.00
## week 0.00 0.00 0.00 0.00 0.00 1.00
## SalesInThousands -0.19 -0.45 -0.19 -0.03 -0.06 -0.01
## SalesInThousands
## MarketID -0.19
## MarketSize -0.45
## LocationID -0.19
## AgeOfStore -0.03
## Promotion -0.06
## week -0.01
## SalesInThousands 1.00
## Sample Size
## [1] 548
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## MarketID MarketSize LocationID AgeOfStore Promotion week
## MarketID 0.00 0.00 0.00 1.00 1.00 1.0
## MarketSize 0.00 0.00 0.00 0.00 1.00 1.0
## LocationID 0.00 0.00 0.00 1.00 1.00 1.0
## AgeOfStore 0.24 0.00 0.24 0.00 1.00 1.0
## Promotion 0.28 0.19 0.24 0.16 0.00 1.0
## week 1.00 1.00 1.00 1.00 1.00 0.0
## SalesInThousands 0.00 0.00 0.00 0.51 0.17 0.8
## SalesInThousands
## MarketID 0
## MarketSize 0
## LocationID 0
## AgeOfStore 1
## Promotion 1
## week 1
## SalesInThousands 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
library(corrgram)
corrgram(MarCamp.df[, names(MarCamp.df)], order=FALSE,
main="Corrgram of dataset variables",
lower.panel=panel.shade, upper.panel=panel.pie,
diag.panel=panel.minmax, text.panel=panel.txt)
# Converting from Integer to Factor
MarCamp.df$Promotion <- as.factor(MarCamp.df$Promotion)
MarCamp.df$week <- as.factor(MarCamp.df$week)
MarCamp.df$MarketID <- as.factor(MarCamp.df$MarketID)
MarCamp.df$MarketSize <- as.factor(MarCamp.df$MarketSize)
library(car)
scatterplotMatrix(formula = ~ MarketID + MarketSize + LocationID + AgeOfStore +
Promotion + week + SalesInThousands, cex=0.6, data=MarCamp.df,
diagonal="histogram")