Import Libraries

library(readr)
library(plyr)
library(corrplot)
## corrplot 0.92 loaded

Import the file

The file is merged with both the Black friday data and Non-Black Friday data and read into R.

PBI <- read_csv("rmd.csv",show_col_types = FALSE)
PBI$Product <- revalue(PBI$Product,c("Entertainment"="1", 
                                       "Movies"="2", 
                                       "Sports"="3", 
                                       "Kids"="4"))
PBI$Product <- as.numeric(PBI$Product)

Dividing the data into two groups Black friday and Non-Black Friday

The Indicator field is used to identify the sales of Black Friday and rest of year sales. The data is divided into the 2 groups and the respective data frames are produced.

BF <- PBI[PBI$Indicator == "0",]
BF <- BF[c(-59,-71),]
NBF <- PBI[PBI$Indicator == "1",]
NBF <- NBF[c(-41,-85,-58,-68),]

orrelation Plots

Correlation plots are drawn to understand if the numerical features have correlations between them. In order to find if the difference exists between the means, correlations should be understood. If the correlations are high, then there is a need to perform multivariate testing of hypothesis instead of carrying out individual univariate tests on variables.

corr_brew <- cor(BF[,c(4:6)])
mat <- cor(corr_brew)
corrplot(mat,order = "AOE", method = "color", addCoef.col = "gray")
Correlations between variables in Black friday

Correlations between variables in Black friday

corr_brew <- cor(NBF[,c(4:6)])
mat <- cor(corr_brew)
corrplot(mat,order = "AOE", method = "color", addCoef.col = "gray")
Correlations between variables in Non-Black friday

Correlations between variables in Non-Black friday

Due to the high correlations, both the variables are considered and multivariate hypothesis testing (Hotelling’s T2 Test) is carried out to check the if the significant difference exists between the means of variables Price_paid and Days_before_activation.

y_bar_BF <- as.matrix(apply(BF[,5:6],2,mean))
y_bar_BF
##                            [,1]
## Price_paid             64.65408
## Days_before_activation 33.85714
y_bar_NBF <- as.matrix(apply(NBF[,5:6],2,mean))
y_bar_NBF
##                             [,1]
## Price_paid             44.728437
## Days_before_activation  2.302083
n1 = 98
n2 = 96
p = 2

S_BF <- cov(BF[c(-59,-71),5:6])
S_NBF <- cov(NBF[c(-41),5:6])

Sp <- ( ( (n1-1)* S_BF + (n2-1)*S_NBF ) / (n1+n2-2) ) * ((1/n1) + (1/n2) )

T2 <- t(y_bar_BF - y_bar_NBF) %*% solve(Sp) %*% (y_bar_BF - y_bar_NBF)
T2
##          [,1]
## [1,] 9617.976
F <- ((n1+n2- p-1) / ((n1+n2-2)*p) ) * T2
F
##          [,1]
## [1,] 4783.941
1-pf(F,2,194) # way less than 0.05 so we reject null that both means are equal.
##      [,1]
## [1,]    0

Histograms of the continuous varaibles

Black Friday

hist(BF$Price_paid,freq = FALSE ,xlab = "Price Paid",
     main = "Histogram of Price Paid by Black Friday Customers")
lines(0:240,dnorm(0:240,mean = mean(BF$Price_paid),sd= sd(BF$Price_paid)),
      type = "l",lwd = 2, col = "maroon")
Histogram of Price_paid

Histogram of Price_paid

hist(BF$Days_before_activation,freq = FALSE ,xlab = "Days before Activation",
     main = "Histogram of Days before Activation for Black Friday Customers")
lines(0:50,dnorm(0:50,mean = mean(BF$Days_before_activation),sd= sd(BF$Days_before_activation)),
      type = "l",lwd = 2, col = "maroon")
Histogram of Days_before_activaton

Histogram of Days_before_activaton

Non-Black Fridaay

hist(NBF$Price_paid,freq = FALSE ,
     xlab = "Price Paid",
     main = "Histogram of Price Paid by Non Black Friday Customers")
lines(0:240,dnorm(0:240,mean = mean(NBF$Price_paid),sd= sd(NBF$Price_paid)),
      type = "l",lwd = 2, col = "maroon")
Histogram of Price_paid

Histogram of Price_paid

hist(NBF$Days_before_activation,freq = FALSE ,
     xlab = "Days before Activation",
     main = "Histogram of Days before Activation for Non Black Friday Customers")
lines(0:50,dnorm(0:50,mean = mean(NBF$Days_before_activation),sd= sd(NBF$Days_before_activation)),
      type = "l",lwd = 2, col = "maroon")
Histogram of Days_before_activaton

Histogram of Days_before_activaton