library(stargazer)
library(ggplot2)
library(dplyr)
library(nnet)
library(modelsummary)
library(tidyr)
library(fastDummies)
library(lubridate)
load('~/Documents/pp/Smartick New Data/Smartick New Data/data_20221018/Routine Formation.RData')
ses2017.gp.pvt = ses2017.gp %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2)) #quartile
stargazer(data.frame(ses2017.gp.pvt),summary=F,type='text')
##
## ==============================================================
## month_contract nusers avg.usage q1 q2 q3 retention
## --------------------------------------------------------------
## 1 -1 2,722 0.790 0.690 0.860 0.930 1
## 2 1 2,730 0.730 0.600 0.770 0.900 1
## 3 2 2,578 0.630 0.430 0.670 0.870 0.940
## 4 3 2,459 0.550 0.300 0.600 0.830 0.900
## 5 4 2,001 0.570 0.330 0.630 0.830 0.730
## 6 5 1,847 0.560 0.330 0.600 0.830 0.680
## 7 6 1,799 0.530 0.270 0.570 0.800 0.660
## 8 7 1,575 0.550 0.300 0.590 0.800 0.580
## 9 8 1,507 0.540 0.270 0.570 0.800 0.550
## 10 9 1,469 0.510 0.230 0.560 0.800 0.540
## 11 10 1,297 0.520 0.270 0.530 0.800 0.480
## 12 11 1,276 0.520 0.230 0.530 0.800 0.470
## 13 12 1,274 0.540 0.300 0.570 0.800 0.470
## 14 99 1,124 0.500 0.300 0.500 0.710 0.410
## --------------------------------------------------------------
data.frame(ses2017.gp.pvt) %>% ggplot(aes(x=month_contract))+ geom_errorbar(aes(ymin=q1,ymax=q3),alpha=.7,width=.1) + geom_line(aes(y=q2,group=1)) + ylab('Quartile 1,Median, Quartile 3') + ggtitle('Monthly Usage Rates Over Time Across Individuals') #+ geom_line(aes(y=avg.usage,group=1))

data.frame(ses2017.gp.pvt) %>% ggplot(aes(x=month_contract)) + geom_point(aes(y=retention))+ geom_line(aes(y=retention,group=1)) + ylab('Retention Rates') + ggtitle('Retention Rates Over Time') + theme_classic() + geom_text(aes(y=retention,label=retention),vjust=-1) #+ geom_line(aes(y=avg.usage,group=1))

ses2017.gp.monthly = ses2017[ses2017$math_tarifa==101,] %>% group_by(alumno,month_contract) %>% summarise(nses=n(),uses=sum(estado=='REALIZADA'),usagerate=uses/nses,max_d=max(day_contract)) %>% group_by(alumno) %>% mutate(max_d= max(max_d),uses_tot=sum(uses),nses_tot=sum(nses))
## `summarise()` has grouped output by 'alumno'. You can override using the
## `.groups` argument.
ses2017.gp.monthly = ses2017.gp.monthly[ses2017.gp.monthly$nses>=8,]
ses2017.gp.monthly.pvt = ses2017.gp.monthly %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))
stargazer(data.frame(ses2017.gp.monthly.pvt),summary=F,type='text')
##
## ==============================================================
## month_contract nusers avg.usage q1 q2 q3 retention
## --------------------------------------------------------------
## 1 -1 941 0.750 0.640 0.800 0.930 1
## 2 1 941 0.670 0.500 0.700 0.870 1
## 3 2 789 0.560 0.300 0.600 0.830 0.840
## 4 3 671 0.500 0.200 0.530 0.800 0.710
## 5 4 591 0.500 0.230 0.530 0.800 0.630
## 6 5 478 0.520 0.270 0.530 0.800 0.510
## 7 6 416 0.530 0.300 0.550 0.800 0.440
## 8 7 383 0.510 0.280 0.530 0.770 0.410
## 9 8 361 0.510 0.230 0.530 0.800 0.380
## 10 9 318 0.530 0.300 0.570 0.770 0.340
## 11 10 294 0.530 0.240 0.530 0.800 0.310
## 12 11 285 0.540 0.300 0.570 0.800 0.300
## 13 12 277 0.560 0.370 0.570 0.800 0.290
## 14 99 283 0.490 0.310 0.470 0.700 0.300
## --------------------------------------------------------------
ses2017.gp.quaterly = ses2017[ses2017$math_tarifa==102,] %>% group_by(alumno,month_contract) %>% summarise(nses=n(),uses=sum(estado=='REALIZADA'),usagerate=uses/nses,max_d=max(day_contract)) %>% group_by(alumno) %>% mutate(max_d= max(max_d),uses_tot=sum(uses),nses_tot=sum(nses))
## `summarise()` has grouped output by 'alumno'. You can override using the
## `.groups` argument.
ses2017.gp.quaterly = ses2017.gp.quaterly[ses2017.gp.quaterly$nses>=8,]
ses2017.gp.quaterly.pvt = ses2017.gp.quaterly %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))
stargazer(data.frame(ses2017.gp.quaterly.pvt),summary=F,type='text')
##
## ==============================================================
## month_contract nusers avg.usage q1 q2 q3 retention
## --------------------------------------------------------------
## 1 -1 1,449 0.800 0.710 0.870 0.930 1
## 2 1 1,452 0.740 0.630 0.790 0.900 1
## 3 2 1,452 0.630 0.430 0.670 0.870 1
## 4 3 1,451 0.540 0.300 0.570 0.800 1
## 5 4 1,073 0.580 0.330 0.630 0.830 0.740
## 6 5 1,032 0.540 0.300 0.570 0.800 0.710
## 7 6 1,046 0.500 0.200 0.530 0.770 0.720
## 8 7 855 0.540 0.270 0.570 0.830 0.590
## 9 8 809 0.530 0.270 0.570 0.800 0.560
## 10 9 814 0.490 0.170 0.530 0.800 0.560
## 11 10 666 0.530 0.270 0.550 0.800 0.460
## 12 11 654 0.530 0.230 0.570 0.800 0.450
## 13 12 660 0.550 0.320 0.600 0.830 0.450
## 14 99 610 0.490 0.280 0.490 0.700 0.420
## --------------------------------------------------------------
ses2017.gp.yearly = ses2017[ses2017$math_tarifa==103,] %>% group_by(alumno,month_contract) %>% summarise(nses=n(),uses=sum(estado=='REALIZADA'),usagerate=uses/nses,max_d=max(day_contract)) %>% group_by(alumno) %>% mutate(max_d= max(max_d),uses_tot=sum(uses),nses_tot=sum(nses))
## `summarise()` has grouped output by 'alumno'. You can override using the
## `.groups` argument.
ses2017.gp.yearly = ses2017.gp.yearly[ses2017.gp.yearly$nses>=8,]
ses2017.gp.yearly.pvt = ses2017.gp.yearly %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))
stargazer(data.frame(ses2017.gp.yearly.pvt),summary=F,type='text')
##
## ==============================================================
## month_contract nusers avg.usage q1 q2 q3 retention
## --------------------------------------------------------------
## 1 -1 332 0.850 0.770 0.930 1 0.990
## 2 1 337 0.830 0.770 0.870 0.970 1
## 3 2 337 0.760 0.630 0.800 0.930 1
## 4 3 337 0.720 0.600 0.770 0.900 1
## 5 4 337 0.690 0.530 0.770 0.900 1
## 6 5 337 0.660 0.500 0.700 0.870 1
## 7 6 337 0.630 0.430 0.700 0.870 1
## 8 7 337 0.610 0.400 0.670 0.870 1
## 9 8 337 0.580 0.370 0.630 0.830 1
## 10 9 337 0.540 0.300 0.570 0.800 1
## 11 10 337 0.510 0.230 0.570 0.800 1
## 12 11 337 0.490 0.200 0.500 0.800 1
## 13 12 337 0.510 0.200 0.570 0.800 1
## 14 99 231 0.520 0.350 0.550 0.730 0.690
## --------------------------------------------------------------
#2335 individuals
temp.ids = ses2017.gp$alumno[ses2017.gp$month_contract==3 & ses2017.gp$nses>24]
ses_m3 = ses2017[ses2017$month_contract %in% 1:3 & ses2017$alumno %in% temp.ids,]
ses_m3.gp = ses_m3 %>% group_by(alumno) %>% summarise(n=n(),usage=sum(estado=='REALIZADA')/n)
summary(ses_m3.gp$usage)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.4778 0.6889 0.6496 0.8444 1.0000
hist(ses_m3.gp$usage)

ses_m3.gp$usage_type= ifelse(ses_m3.gp$usage>=.69,'high','low')
ses_m3$dow = wday(ses_m3$fecha)
ses_m3$week = ses_m3$day_contract %/% 7
ses_m3 = ses_m3 %>% left_join(ses_m3.gp[,c('alumno','usage_type')],by='alumno')
ses_m3 %>% group_by(dow=factor(dow),week=factor(week),usage_type) %>% summarise(n=n(),usage=mean(estado=='REALIZADA')) %>% ggplot(aes(x=week,y=dow,fill=usage)) +geom_tile(col='white',alpha = .8) + facet_wrap(~usage_type,nrow=2) +scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0.5, limit = c(0,1), space = "Lab",name="usage rate") + # Change gradient color Pearson\nCorrelation
theme_minimal()+ coord_fixed()
## `summarise()` has grouped output by 'dow', 'week'. You can override using the
## `.groups` argument.
