setwd("C:/Users/vaibhav/Desktop/DataSets")
abc.df<-read.csv("CRMData.csv")
View(abc.df)
Mean Online Spent
mean(abc.df$online.spend)
## [1] 170.3182
Mean Offline Spend
mean(abc.df$store.spend)
## [1] 47.5821
Summarizing data
summary(abc.df)
## cust.id age credit.score email
## Min. : 1.0 Min. :19.34 Min. :543.0 no :186
## 1st Qu.: 250.8 1st Qu.:31.43 1st Qu.:691.7 yes:814
## Median : 500.5 Median :35.10 Median :725.5
## Mean : 500.5 Mean :34.92 Mean :725.5
## 3rd Qu.: 750.2 3rd Qu.:38.20 3rd Qu.:757.2
## Max. :1000.0 Max. :51.86 Max. :880.8
##
## distance.to.store online.visits online.trans online.spend
## Min. : 0.2136 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.3383 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.00
## Median : 7.1317 Median : 6.00 Median : 2.000 Median : 37.03
## Mean : 14.6553 Mean : 28.29 Mean : 8.385 Mean : 170.32
## 3rd Qu.: 16.6589 3rd Qu.: 31.00 3rd Qu.: 9.000 3rd Qu.: 177.89
## Max. :267.0864 Max. :606.00 Max. :169.000 Max. :3593.03
##
## store.trans store.spend sat.service sat.selection
## Min. : 0.000 Min. : 0.00 Min. :1.00 Min. :1.000
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.:3.00 1st Qu.:2.000
## Median : 1.000 Median : 30.05 Median :3.00 Median :2.000
## Mean : 1.323 Mean : 47.58 Mean :3.07 Mean :2.401
## 3rd Qu.: 2.000 3rd Qu.: 66.49 3rd Qu.:4.00 3rd Qu.:3.000
## Max. :12.000 Max. :705.66 Max. :5.00 Max. :5.000
## NA's :341 NA's :341
Visulaizing Online vs offline data
par(mfrow=c(1,2))
hist(abc.df$online.spend,col="red",breaks=50)
hist(abc.df$store.spend,col="Blue",breaks=50)
Boxplots for the spend online vs offline
par(mfrow=c(1,2))
boxplot(abc.df$online.spend,horizontal=TRUE,xlab="Online Spend",main="Online spend")
boxplot(abc.df$store.spend,horizontal=TRUE,xlab="Offline Spend",main="Offline Spend")
Checking Simultaneously
my.col<-c("black","red")
my.pch<-c(1,19)
plot(x=abc.df$online.spend+1,y=abc.df$store.spend+1,log="xy",col=my.col[abc.df$email],pch=my.pch[abc.df$email],main="Online vs Offline")
legend(x="topright",legend=paste("email:"),levels(abc.df$email))
Checking correlation between distance and offline spend
cor.test(abc.df$distance.to.store,abc.df$store.spend)
##
## Pearson's product-moment correlation
##
## data: abc.df$distance.to.store and abc.df$store.spend
## t = -7.8618, df = 998, p-value = 9.782e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2990114 -0.1822300
## sample estimates:
## cor
## -0.2414949
Making a correlation matrix
library(corrplot)
## corrplot 0.84 loaded
corrplot(cor(abc.df[,c(3,5,6,7,8,9,11)],use="complete.obs"),method="ellipse")
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
corrplot.mixed(cor(abc.df[,c(3,5,6,7,8,9,11)],use="complete.obs"),upper="ellipse",tl.pos="lt")
Regression
fit<-lm(abc.df$store.spend~abc.df$distance.to.store)
summary(fit)
##
## Call:
## lm(formula = abc.df$store.spend ~ abc.df$distance.to.store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.76 -43.12 -17.80 17.54 648.44
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58.06911 2.47656 23.447 < 2e-16 ***
## abc.df$distance.to.store -0.71558 0.09102 -7.862 9.78e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 65.98 on 998 degrees of freedom
## Multiple R-squared: 0.05832, Adjusted R-squared: 0.05738
## F-statistic: 61.81 on 1 and 998 DF, p-value: 9.782e-15
fit1<-lm(abc.df$online.spend~abc.df$distance.to.store)
summary(fit1)
##
## Call:
## lm(formula = abc.df$online.spend ~ abc.df$distance.to.store)
##
## Residuals:
## Min 1Q Median 3Q Max
## -174.5 -170.8 -132.8 10.1 3420.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 174.5923 12.3072 14.186 <2e-16 ***
## abc.df$distance.to.store -0.2916 0.4523 -0.645 0.519
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 327.9 on 998 degrees of freedom
## Multiple R-squared: 0.0004164, Adjusted R-squared: -0.0005852
## F-statistic: 0.4157 on 1 and 998 DF, p-value: 0.5192