Reading the Data
df=read.csv('test_dad.csv')
df=df[,c(1,2,5,6)]
str(df)
## 'data.frame': 9346 obs. of 4 variables:
## $ buckt : Factor w/ 2 levels "control","test": 1 1 2 2 2 2 2 1 2 2 ...
## $ slr_id : Factor w/ 9346 levels "seller_1","seller_10",..: 1 1112 2223 3334 4445 5556 6667 7778 8889 2 ...
## $ pre_GMV: num 0 0 0 0 99.2 ...
## $ pos_GMV: num 0 0 0 0 2.12 4.32 6.31 6.95 8.19 8.42 ...
Dividing the data in control and test
df_control=df[df$buckt=='control',]
df_test=df[df$buckt=='test',]
head(df_test)
Analyze pre GMV trends of control
This will give 95% confidence limits for the GMV
brks=quantile(df_control$pre_GMV,p=seq(0,1,0.05))
df_control$cats=cut(df_control$pre_GMV,breaks=brks)
tbl1=data.frame(tapply(df_control$pre_GMV,df_control$cats,mean))
names(tbl1)<-c('mean')
tbl1$sd=tapply(df_control$pre_GMV,df_control$cats,sd)
tbl1$ci1=round(tbl1$mean-1.96*tbl1$sd,2)
tbl1$ci2=round(tbl1$mean+1.96*tbl1$sd,2)
tbl1
Analyze post GMV trends of control
This will give 95% confidence limits for the GMV
brks=quantile(df_control$pos_GMV,p=seq(0,1,0.05))
df_control$cats=cut(df_control$pos_GMV,breaks=brks[7:21])
tbl2=tapply(df_control$pos_GMV,df_control$cats,mean)
tbl2=data.frame(tbl2)
names(tbl2)<-c('mean')
tbl2$sd=tapply(df_control$pos_GMV,df_control$cats,sd)
tbl2$ci1=round(tbl2$mean-1.96*tbl2$sd,2)
tbl2$ci2=round(tbl2$mean+1.96*tbl2$sd,2)
tbl2
barplot(height = rbind(tbl2$mean,tbl1$mean),horiz = TRUE,col = c(2,4))
## Warning in rbind(tbl2$mean, tbl1$mean): number of columns of result is not
## a multiple of vector length (arg 1)
