library(stargazer)
library(ggplot2)
library(dplyr)
library(nnet)
library(modelsummary)
library(tidyr)
library(fastDummies)
library(lubridate)
load('~/Documents/pp/Smartick New Data/Smartick New Data/data_20221018/Routine Formation.RData')
ses2017.gp.pvt = ses2017.gp %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))  #quartile

stargazer(data.frame(ses2017.gp.pvt),summary=F,type='text')
## 
## ==============================================================
##    month_contract nusers avg.usage  q1    q2    q3   retention
## --------------------------------------------------------------
## 1        -1       2,722    0.790   0.690 0.860 0.930     1    
## 2        1        2,730    0.730   0.600 0.770 0.900     1    
## 3        2        2,578    0.630   0.430 0.670 0.870   0.940  
## 4        3        2,459    0.550   0.300 0.600 0.830   0.900  
## 5        4        2,001    0.570   0.330 0.630 0.830   0.730  
## 6        5        1,847    0.560   0.330 0.600 0.830   0.680  
## 7        6        1,799    0.530   0.270 0.570 0.800   0.660  
## 8        7        1,575    0.550   0.300 0.590 0.800   0.580  
## 9        8        1,507    0.540   0.270 0.570 0.800   0.550  
## 10       9        1,469    0.510   0.230 0.560 0.800   0.540  
## 11       10       1,297    0.520   0.270 0.530 0.800   0.480  
## 12       11       1,276    0.520   0.230 0.530 0.800   0.470  
## 13       12       1,274    0.540   0.300 0.570 0.800   0.470  
## 14       99       1,124    0.500   0.300 0.500 0.710   0.410  
## --------------------------------------------------------------
data.frame(ses2017.gp.pvt) %>% ggplot(aes(x=month_contract))+ geom_errorbar(aes(ymin=q1,ymax=q3),alpha=.7,width=.1) + geom_line(aes(y=q2,group=1)) + ylab('Quartile 1,Median, Quartile 3') + ggtitle('Monthly Usage Rates Over Time Across Individuals') #+ geom_line(aes(y=avg.usage,group=1))

data.frame(ses2017.gp.pvt) %>% ggplot(aes(x=month_contract)) + geom_point(aes(y=retention))+ geom_line(aes(y=retention,group=1)) + ylab('Retention Rates') + ggtitle('Retention Rates Over Time') + theme_classic() + geom_text(aes(y=retention,label=retention),vjust=-1) #+ geom_line(aes(y=avg.usage,group=1))

ses2017.gp.monthly = ses2017[ses2017$math_tarifa==101,] %>% group_by(alumno,month_contract) %>% summarise(nses=n(),uses=sum(estado=='REALIZADA'),usagerate=uses/nses,max_d=max(day_contract)) %>% group_by(alumno) %>% mutate(max_d= max(max_d),uses_tot=sum(uses),nses_tot=sum(nses))
## `summarise()` has grouped output by 'alumno'. You can override using the
## `.groups` argument.
ses2017.gp.monthly = ses2017.gp.monthly[ses2017.gp.monthly$nses>=8,]
ses2017.gp.monthly.pvt = ses2017.gp.monthly %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))

stargazer(data.frame(ses2017.gp.monthly.pvt),summary=F,type='text')
## 
## ==============================================================
##    month_contract nusers avg.usage  q1    q2    q3   retention
## --------------------------------------------------------------
## 1        -1        941     0.750   0.640 0.800 0.930     1    
## 2        1         941     0.670   0.500 0.700 0.870     1    
## 3        2         789     0.560   0.300 0.600 0.830   0.840  
## 4        3         671     0.500   0.200 0.530 0.800   0.710  
## 5        4         591     0.500   0.230 0.530 0.800   0.630  
## 6        5         478     0.520   0.270 0.530 0.800   0.510  
## 7        6         416     0.530   0.300 0.550 0.800   0.440  
## 8        7         383     0.510   0.280 0.530 0.770   0.410  
## 9        8         361     0.510   0.230 0.530 0.800   0.380  
## 10       9         318     0.530   0.300 0.570 0.770   0.340  
## 11       10        294     0.530   0.240 0.530 0.800   0.310  
## 12       11        285     0.540   0.300 0.570 0.800   0.300  
## 13       12        277     0.560   0.370 0.570 0.800   0.290  
## 14       99        283     0.490   0.310 0.470 0.700   0.300  
## --------------------------------------------------------------
ses2017.gp.quaterly = ses2017[ses2017$math_tarifa==102,] %>% group_by(alumno,month_contract) %>% summarise(nses=n(),uses=sum(estado=='REALIZADA'),usagerate=uses/nses,max_d=max(day_contract)) %>% group_by(alumno) %>% mutate(max_d= max(max_d),uses_tot=sum(uses),nses_tot=sum(nses))
## `summarise()` has grouped output by 'alumno'. You can override using the
## `.groups` argument.
ses2017.gp.quaterly = ses2017.gp.quaterly[ses2017.gp.quaterly$nses>=8,]
ses2017.gp.quaterly.pvt = ses2017.gp.quaterly %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))

stargazer(data.frame(ses2017.gp.quaterly.pvt),summary=F,type='text')
## 
## ==============================================================
##    month_contract nusers avg.usage  q1    q2    q3   retention
## --------------------------------------------------------------
## 1        -1       1,449    0.800   0.710 0.870 0.930     1    
## 2        1        1,452    0.740   0.630 0.790 0.900     1    
## 3        2        1,452    0.630   0.430 0.670 0.870     1    
## 4        3        1,451    0.540   0.300 0.570 0.800     1    
## 5        4        1,073    0.580   0.330 0.630 0.830   0.740  
## 6        5        1,032    0.540   0.300 0.570 0.800   0.710  
## 7        6        1,046    0.500   0.200 0.530 0.770   0.720  
## 8        7         855     0.540   0.270 0.570 0.830   0.590  
## 9        8         809     0.530   0.270 0.570 0.800   0.560  
## 10       9         814     0.490   0.170 0.530 0.800   0.560  
## 11       10        666     0.530   0.270 0.550 0.800   0.460  
## 12       11        654     0.530   0.230 0.570 0.800   0.450  
## 13       12        660     0.550   0.320 0.600 0.830   0.450  
## 14       99        610     0.490   0.280 0.490 0.700   0.420  
## --------------------------------------------------------------
ses2017.gp.yearly = ses2017[ses2017$math_tarifa==103,] %>% group_by(alumno,month_contract) %>% summarise(nses=n(),uses=sum(estado=='REALIZADA'),usagerate=uses/nses,max_d=max(day_contract)) %>% group_by(alumno) %>% mutate(max_d= max(max_d),uses_tot=sum(uses),nses_tot=sum(nses))
## `summarise()` has grouped output by 'alumno'. You can override using the
## `.groups` argument.
ses2017.gp.yearly = ses2017.gp.yearly[ses2017.gp.yearly$nses>=8,]
ses2017.gp.yearly.pvt = ses2017.gp.yearly %>% group_by(month_contract) %>% summarise(nusers=n(),avg.usage=round(mean(usagerate),2),q1=round(quantile(usagerate,.25),2),q2= round(quantile(usagerate,.5),2),q3 = round(quantile(usagerate,.75),2)) %>% mutate(retention=round(nusers/max(nusers),2))

stargazer(data.frame(ses2017.gp.yearly.pvt),summary=F,type='text')
## 
## ==============================================================
##    month_contract nusers avg.usage  q1    q2    q3   retention
## --------------------------------------------------------------
## 1        -1        332     0.850   0.770 0.930   1     0.990  
## 2        1         337     0.830   0.770 0.870 0.970     1    
## 3        2         337     0.760   0.630 0.800 0.930     1    
## 4        3         337     0.720   0.600 0.770 0.900     1    
## 5        4         337     0.690   0.530 0.770 0.900     1    
## 6        5         337     0.660   0.500 0.700 0.870     1    
## 7        6         337     0.630   0.430 0.700 0.870     1    
## 8        7         337     0.610   0.400 0.670 0.870     1    
## 9        8         337     0.580   0.370 0.630 0.830     1    
## 10       9         337     0.540   0.300 0.570 0.800     1    
## 11       10        337     0.510   0.230 0.570 0.800     1    
## 12       11        337     0.490   0.200 0.500 0.800     1    
## 13       12        337     0.510   0.200 0.570 0.800     1    
## 14       99        231     0.520   0.350 0.550 0.730   0.690  
## --------------------------------------------------------------
#2335 individuals
temp.ids = ses2017.gp$alumno[ses2017.gp$month_contract==3 & ses2017.gp$nses>24]
ses_m3 = ses2017[ses2017$month_contract %in% 1:3 & ses2017$alumno %in% temp.ids,] 
ses_m3.gp = ses_m3 %>% group_by(alumno) %>% summarise(n=n(),usage=sum(estado=='REALIZADA')/n)
summary(ses_m3.gp$usage)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.4778  0.6889  0.6496  0.8444  1.0000
hist(ses_m3.gp$usage)

ses_m3.gp$usage_type= ifelse(ses_m3.gp$usage>=.69,'high','low')
ses_m3$dow = wday(ses_m3$fecha)
ses_m3$week = ses_m3$day_contract %/% 7 
ses_m3 = ses_m3 %>% left_join(ses_m3.gp[,c('alumno','usage_type')],by='alumno')
ses_m3 %>% group_by(dow=factor(dow),week=factor(week),usage_type) %>% summarise(n=n(),usage=mean(estado=='REALIZADA')) %>% ggplot(aes(x=week,y=dow,fill=usage)) +geom_tile(col='white',alpha = .8) + facet_wrap(~usage_type,nrow=2) +scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0.5, limit = c(0,1), space = "Lab",name="usage rate") + # Change gradient color Pearson\nCorrelation
  theme_minimal()+  coord_fixed()
## `summarise()` has grouped output by 'dow', 'week'. You can override using the
## `.groups` argument.