Active users by cohort
#Import data
Data <- read_excel("C:\\Users\\lenovo\\Desktop\\Data.xlsx")
#Process
##Identify first join and last out
First_join <- Data %>%
arrange(CUSTOMER_ID, MONTH_ID) %>%
distinct(CUSTOMER_ID, .keep_all = TRUE) %>%
dplyr::rename(First = MONTH_ID)
Last_out <- Data %>%
arrange(CUSTOMER_ID, desc(MONTH_ID)) %>%
distinct(CUSTOMER_ID, .keep_all = TRUE) %>%
dplyr::rename(Last = MONTH_ID)
Data1 <- Data %>%
arrange(CUSTOMER_ID, MONTH_ID) %>%
distinct(CUSTOMER_ID) %>%
left_join(First_join, by = "CUSTOMER_ID") %>%
left_join(Last_out, by = "CUSTOMER_ID") %>%
mutate(Cohort = First,
First = as_date(str_c(First, "01")),
Last = as_date(str_c(Last, "01"))) %>%
mutate(Month_dif = 12*((year(Last)-year(First))) + (month(Last)-month(First)))
##Cohort
Cohort <- ddply(Data1,.(Cohort),summarize,
M01 = sum(Month_dif >= 0),
M02 = sum(Month_dif >= 1),
M03 = sum(Month_dif >= 2),
M04 = sum(Month_dif >= 3),
M05 = sum(Month_dif >= 4),
M06 = sum(Month_dif >= 5),
M07 = sum(Month_dif >= 6),
M08 = sum(Month_dif >= 7),
M09 = sum(Month_dif >= 8),
M10 = sum(Month_dif >= 9),
M11 = sum(Month_dif >= 10),
M12 = sum(Month_dif >= 11),
M13 = sum(Month_dif >= 12),
M14 = sum(Month_dif >= 13),
M15 = sum(Month_dif >= 14))
Cohort
|
M01
|
M02
|
M03
|
M04
|
M05
|
M06
|
M07
|
M08
|
M09
|
M10
|
M11
|
M12
|
M13
|
M14
|
M15
|
201401
|
11716
|
11375
|
11255
|
11070
|
10886
|
10668
|
10441
|
10232
|
10034
|
9805
|
9559
|
9266
|
9000
|
8748
|
8558
|
201402
|
0
|
251
|
190
|
188
|
182
|
181
|
178
|
173
|
167
|
164
|
159
|
151
|
144
|
137
|
127
|
201403
|
0
|
0
|
339
|
297
|
290
|
288
|
284
|
284
|
275
|
265
|
261
|
253
|
248
|
244
|
233
|
201404
|
0
|
0
|
0
|
386
|
348
|
344
|
340
|
334
|
326
|
321
|
312
|
304
|
294
|
284
|
278
|
201405
|
0
|
0
|
0
|
0
|
300
|
280
|
277
|
271
|
268
|
264
|
259
|
250
|
243
|
235
|
229
|
201406
|
0
|
0
|
0
|
0
|
0
|
352
|
310
|
306
|
302
|
293
|
288
|
279
|
269
|
258
|
248
|
201407
|
0
|
0
|
0
|
0
|
0
|
0
|
376
|
291
|
284
|
282
|
278
|
268
|
258
|
252
|
247
|
201408
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
284
|
259
|
254
|
250
|
245
|
236
|
227
|
219
|
201409
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
273
|
255
|
249
|
247
|
238
|
230
|
220
|
201410
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
302
|
279
|
277
|
269
|
264
|
258
|
201411
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
338
|
273
|
264
|
260
|
249
|
201412
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
293
|
269
|
264
|
260
|
201501
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
249
|
238
|
231
|
201502
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
188
|
183
|
201503
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
198
|

Retention rate by cohort
Cohort
|
M01
|
M02
|
M03
|
M04
|
M05
|
M06
|
M07
|
M08
|
M09
|
M10
|
M11
|
M12
|
M13
|
M14
|
M15
|
201401
|
100
|
97.09
|
96.07
|
94.49
|
92.92
|
91.05
|
89.12
|
87.33
|
85.64
|
83.69
|
81.59
|
79.09
|
76.82
|
74.67
|
73.05
|
201402
|
100
|
75.70
|
74.90
|
72.51
|
72.11
|
70.92
|
68.92
|
66.53
|
65.34
|
63.35
|
60.16
|
57.37
|
54.58
|
50.60
|
0.00
|
201403
|
100
|
87.61
|
85.55
|
84.96
|
83.78
|
83.78
|
81.12
|
78.17
|
76.99
|
74.63
|
73.16
|
71.98
|
68.73
|
0.00
|
0.00
|
201404
|
100
|
90.16
|
89.12
|
88.08
|
86.53
|
84.46
|
83.16
|
80.83
|
78.76
|
76.17
|
73.58
|
72.02
|
0.00
|
0.00
|
0.00
|
201405
|
100
|
93.33
|
92.33
|
90.33
|
89.33
|
88.00
|
86.33
|
83.33
|
81.00
|
78.33
|
76.33
|
0.00
|
0.00
|
0.00
|
0.00
|
201406
|
100
|
88.07
|
86.93
|
85.80
|
83.24
|
81.82
|
79.26
|
76.42
|
73.30
|
70.45
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201407
|
100
|
77.39
|
75.53
|
75.00
|
73.94
|
71.28
|
68.62
|
67.02
|
65.69
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201408
|
100
|
91.20
|
89.44
|
88.03
|
86.27
|
83.10
|
79.93
|
77.11
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201409
|
100
|
93.41
|
91.21
|
90.48
|
87.18
|
84.25
|
80.59
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201410
|
100
|
92.38
|
91.72
|
89.07
|
87.42
|
85.43
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201411
|
100
|
80.77
|
78.11
|
76.92
|
73.67
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201412
|
100
|
91.81
|
90.10
|
88.74
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201501
|
100
|
95.58
|
92.77
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201502
|
100
|
97.34
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
201503
|
100
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
0.00
|
Dynamics chart
Retention <- Retention[,-2] #remove M01 data because it is always 100%
#Dynamics analysis chart
Cohort.chart1 <- melt(Retention, id.vars = 'Cohort')
colnames(Cohort.chart1) <- c('Cohort', 'Month', 'Retention')
Cohort.chart1 <- Cohort.chart1 %>%
filter(Retention != 0) %>%
mutate(Cohort = as.factor(Cohort))
p <- ggplot(Cohort.chart1, aes(x=Month, y=Retention, group=Cohort, colour=Cohort)) +
geom_line(size = 2, alpha = 1/2) +
geom_point(size = 2, alpha = 1) +
#geom_smooth(aes(group=1), method = 'loess', size=2, colour='red', se=FALSE) +
labs(title="Cohorts retention ratio dynamics")
p

Cycle chart
