setwd("C:/Users/vaibhav/Desktop/DataSets")
abc.df<-read.csv("CRMData.csv")
View(abc.df)

Mean Online Spent

mean(abc.df$online.spend)
## [1] 170.3182

Mean Offline Spend

mean(abc.df$store.spend)
## [1] 47.5821

Summarizing data

summary(abc.df)
##     cust.id            age         credit.score   email    
##  Min.   :   1.0   Min.   :19.34   Min.   :543.0   no :186  
##  1st Qu.: 250.8   1st Qu.:31.43   1st Qu.:691.7   yes:814  
##  Median : 500.5   Median :35.10   Median :725.5            
##  Mean   : 500.5   Mean   :34.92   Mean   :725.5            
##  3rd Qu.: 750.2   3rd Qu.:38.20   3rd Qu.:757.2            
##  Max.   :1000.0   Max.   :51.86   Max.   :880.8            
##                                                            
##  distance.to.store  online.visits     online.trans      online.spend    
##  Min.   :  0.2136   Min.   :  0.00   Min.   :  0.000   Min.   :   0.00  
##  1st Qu.:  3.3383   1st Qu.:  0.00   1st Qu.:  0.000   1st Qu.:   0.00  
##  Median :  7.1317   Median :  6.00   Median :  2.000   Median :  37.03  
##  Mean   : 14.6553   Mean   : 28.29   Mean   :  8.385   Mean   : 170.32  
##  3rd Qu.: 16.6589   3rd Qu.: 31.00   3rd Qu.:  9.000   3rd Qu.: 177.89  
##  Max.   :267.0864   Max.   :606.00   Max.   :169.000   Max.   :3593.03  
##                                                                         
##   store.trans      store.spend      sat.service   sat.selection  
##  Min.   : 0.000   Min.   :  0.00   Min.   :1.00   Min.   :1.000  
##  1st Qu.: 0.000   1st Qu.:  0.00   1st Qu.:3.00   1st Qu.:2.000  
##  Median : 1.000   Median : 30.05   Median :3.00   Median :2.000  
##  Mean   : 1.323   Mean   : 47.58   Mean   :3.07   Mean   :2.401  
##  3rd Qu.: 2.000   3rd Qu.: 66.49   3rd Qu.:4.00   3rd Qu.:3.000  
##  Max.   :12.000   Max.   :705.66   Max.   :5.00   Max.   :5.000  
##                                    NA's   :341    NA's   :341

Visulaizing Online vs offline data

par(mfrow=c(1,2))
hist(abc.df$online.spend,col="red",breaks=50)
hist(abc.df$store.spend,col="Blue",breaks=50)

Boxplots for the spend online vs offline

par(mfrow=c(1,2))
boxplot(abc.df$online.spend,horizontal=TRUE,xlab="Online Spend",main="Online spend")
boxplot(abc.df$store.spend,horizontal=TRUE,xlab="Offline Spend",main="Offline Spend")

Checking Simultaneously

my.col<-c("black","red")
my.pch<-c(1,19)
plot(x=abc.df$online.spend+1,y=abc.df$store.spend+1,log="xy",col=my.col[abc.df$email],pch=my.pch[abc.df$email],main="Online vs Offline")
legend(x="topright",legend=paste("email:"),levels(abc.df$email))

Checking correlation between distance and offline spend

cor.test(abc.df$distance.to.store,abc.df$store.spend)
## 
##  Pearson's product-moment correlation
## 
## data:  abc.df$distance.to.store and abc.df$store.spend
## t = -7.8618, df = 998, p-value = 9.782e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2990114 -0.1822300
## sample estimates:
##        cor 
## -0.2414949

Making a correlation matrix

library(corrplot)
## corrplot 0.84 loaded
corrplot(cor(abc.df[,c(3,5,6,7,8,9,11)],use="complete.obs"),method="ellipse")

library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
corrplot.mixed(cor(abc.df[,c(3,5,6,7,8,9,11)],use="complete.obs"),upper="ellipse",tl.pos="lt")

Regression

fit<-lm(abc.df$store.spend~abc.df$distance.to.store)
summary(fit)
## 
## Call:
## lm(formula = abc.df$store.spend ~ abc.df$distance.to.store)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57.76 -43.12 -17.80  17.54 648.44 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              58.06911    2.47656  23.447  < 2e-16 ***
## abc.df$distance.to.store -0.71558    0.09102  -7.862 9.78e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 65.98 on 998 degrees of freedom
## Multiple R-squared:  0.05832,    Adjusted R-squared:  0.05738 
## F-statistic: 61.81 on 1 and 998 DF,  p-value: 9.782e-15
fit1<-lm(abc.df$online.spend~abc.df$distance.to.store)
summary(fit1)
## 
## Call:
## lm(formula = abc.df$online.spend ~ abc.df$distance.to.store)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -174.5 -170.8 -132.8   10.1 3420.9 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              174.5923    12.3072  14.186   <2e-16 ***
## abc.df$distance.to.store  -0.2916     0.4523  -0.645    0.519    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 327.9 on 998 degrees of freedom
## Multiple R-squared:  0.0004164,  Adjusted R-squared:  -0.0005852 
## F-statistic: 0.4157 on 1 and 998 DF,  p-value: 0.5192