Reading the Data

df=read.csv('test_dad.csv')
df=df[,c(1,2,5,6)]
str(df)

## 'data.frame':    9346 obs. of  4 variables:
##  $ buckt  : Factor w/ 2 levels "control","test": 1 1 2 2 2 2 2 1 2 2 ...
##  $ slr_id : Factor w/ 9346 levels "seller_1","seller_10",..: 1 1112 2223 3334 4445 5556 6667 7778 8889 2 ...
##  $ pre_GMV: num  0 0 0 0 99.2 ...
##  $ pos_GMV: num  0 0 0 0 2.12 4.32 6.31 6.95 8.19 8.42 ...

Dividing the data in control and test

df_control=df[df$buckt=='control',]
df_test=df[df$buckt=='test',]
head(df_test)

Analyze pre GMV trends of control

This will give 95% confidence limits for the GMV

brks=quantile(df_control$pre_GMV,p=seq(0,1,0.05))
df_control$cats=cut(df_control$pre_GMV,breaks=brks)
tbl1=data.frame(tapply(df_control$pre_GMV,df_control$cats,mean))

names(tbl1)<-c('mean')

tbl1$sd=tapply(df_control$pre_GMV,df_control$cats,sd)

tbl1$ci1=round(tbl1$mean-1.96*tbl1$sd,2)
tbl1$ci2=round(tbl1$mean+1.96*tbl1$sd,2)


tbl1

Analyze post GMV trends of control

This will give 95% confidence limits for the GMV

brks=quantile(df_control$pos_GMV,p=seq(0,1,0.05))
df_control$cats=cut(df_control$pos_GMV,breaks=brks[7:21])
tbl2=tapply(df_control$pos_GMV,df_control$cats,mean)

tbl2=data.frame(tbl2)
names(tbl2)<-c('mean')

tbl2$sd=tapply(df_control$pos_GMV,df_control$cats,sd)

tbl2$ci1=round(tbl2$mean-1.96*tbl2$sd,2)
tbl2$ci2=round(tbl2$mean+1.96*tbl2$sd,2)

tbl2

barplot(height = rbind(tbl2$mean,tbl1$mean),horiz = TRUE,col = c(2,4))

## Warning in rbind(tbl2$mean, tbl1$mean): number of columns of result is not
## a multiple of vector length (arg 1)