Reading the Data

df=read.csv('test_dad.csv')
df=df[,c(1,2,5,6)]
str(df)

## 'data.frame':    9346 obs. of  4 variables:
##  $ buckt  : Factor w/ 2 levels "control","test": 1 1 2 2 2 2 2 1 2 2 ...
##  $ slr_id : Factor w/ 9346 levels "seller_1","seller_10",..: 1 1112 2223 3334 4445 5556 6667 7778 8889 2 ...
##  $ pre_GMV: num  0 0 0 0 99.2 ...
##  $ pos_GMV: num  0 0 0 0 2.12 4.32 6.31 6.95 8.19 8.42 ...

merging pre and post GMV

df$gmv=apply(df[c("pre_GMV","pos_GMV")],1, max)
head(df)

plot(df$slr_id,df$pre_GMV,type='l')
points(df$slr_id,df$pos_GMV,col=2,type='l')

Dividing the data in control and test

df_control=df[df$buckt=='control',]
df_test=df[df$buckt=='test',]
head(df_test)

Analyze pre GMV trends of control

This will give 95% confidence limits for the GMV

brks=quantile(df_control$pre_GMV,p=seq(0,1,0.05))
df_control$cats=cut(df_control$pre_GMV,breaks=brks)
tbl1=data.frame(tapply(df_control$pre_GMV,df_control$cats,mean))

names(tbl1)<-c('mean')

tbl1$sd=tapply(df_control$pre_GMV,df_control$cats,sd)

#tbl1$ci1=round(tbl1$mean-1.96*tbl1$sd,2)
#tbl1$ci2=round(tbl1$mean+1.96*tbl1$sd,2)


tbl1

Analyze post GMV trends of control

This will give 95% confidence limits for the GMV

brks=quantile(df_control$pos_GMV,p=seq(0,1,0.05))
df_control$cats=cut(df_control$pos_GMV,breaks=brks[7:21])
tbl2=tapply(df_control$pos_GMV,df_control$cats,mean)

tbl2=data.frame(tbl2)
names(tbl2)<-c('mean')

tbl2$sd=tapply(df_control$pos_GMV,df_control$cats,sd)

#tbl2$ci1=round(tbl2$mean-1.96*tbl2$sd,2)
#tbl2$ci2=round(tbl2$mean+1.96*tbl2$sd,2)

tbl2

barplot(height = rbind(tbl2$mean,tbl1$mean),horiz = TRUE,col = c(2,4))

## Warning in rbind(tbl2$mean, tbl1$mean): number of columns of result is not
## a multiple of vector length (arg 1)

plot(tbl2$mean)

tabl3=table(cut(df$pre_GMV, brks[7:21]))

plot(tabl3,ylim = c(0,64000))
points(tbl2$mean,type='b',col=4)

barplots for pre and post periods of average GMV

df=read.csv("test_dad.csv")
##----------------------------------------------
cbrks=quantile(c(df$pre_GMV,df$pos_GMV),p=seq(0,1,0.05))
cbrks=cbrks[-(1:2)]

##------------both and control and test combined

df$pos_cat=cut(df$pos_GMV,breaks = cbrks)
df$pre_cat=cut(df$pre_GMV,breaks = cbrks)

tbl=data.frame(cbrks)

tbl=data.frame(tapply(df$pre_GMV,df$pre_cat,mean))

colnames(tbl)<-c('pre_mean')

tbl$pos_mean=tapply(df$pos_GMV,df$pre_cat,mean)
tbl

barplot(height=t(tbl),col = 1:2,beside = TRUE,axis.lty = 2
,main='average GMV pre and post periods (both control and test groups)')
        
legend("topleft",
       c('pre_GMV','post_GMV'),
       col = c(1,2),  
       lty=c(1,1),lwd=c(2,2)
)

###---------------------------------------

Separate Barplots for Control Group

##------------only control group ----------
df=df[df$buckt=='control',]

df$pos_cat=cut(df$pos_GMV,breaks = cbrks)
df$pre_cat=cut(df$pre_GMV,breaks = cbrks)

tbl=data.frame(cbrks)

tbl=data.frame(tapply(df$pre_GMV,df$pre_cat,mean))

colnames(tbl)<-c('pre_mean')

tbl$pos_mean=tapply(df$pos_GMV,df$pre_cat,mean)
tbl

barplot(height=t(tbl),col = c(1,4),beside = TRUE,axis.lty = 2
,main='average GMV pre and post periods (control groups)')
        
legend("topleft",
       c('pre_GMV','post_GMV'),
       col = c(1,4),  
       lty=c(1,1),lwd=c(2,2)
)

###---------------------------------------

Barplot for Test Group

##------------only test group ----------
df=read.csv('test_dad.csv')

df=df[df$buckt=='test',]

df$pos_cat=cut(df$pos_GMV,breaks = cbrks)
df$pre_cat=cut(df$pre_GMV,breaks = cbrks)

tbl=data.frame(cbrks)

tbl=data.frame(tapply(df$pre_GMV,df$pre_cat,mean))

colnames(tbl)<-c('pre_mean')

tbl$pos_mean=tapply(df$pos_GMV,df$pre_cat,mean)
tbl

barplot(height=t(tbl),col = c(1,3),beside = TRUE,axis.lty = 2
,main='average GMV pre and post periods (test group)')
        
legend("topleft",
       c('pre_GMV','post_GMV'),
       col = c(1,3),  
       lty=c(1,1),lwd=c(2,2)
)

###---------------------------------------

Customer Project

Reading the Data

merging pre and post GMV

Dividing the data in control and test

Analyze pre GMV trends of control

This will give 95% confidence limits for the GMV

Analyze post GMV trends of control

This will give 95% confidence limits for the GMV

barplots for pre and post periods of average GMV

Separate Barplots for Control Group

Barplot for Test Group