Cohort Analysis-Retention Rate

suppressWarnings(library(dplyr))

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

suppressWarnings(library(ggplot2))
library(reshape2)

cohort.clients <- data.frame(cohort=c('Cohort01','Cohort02',
'Cohort03','Cohort04','Cohort05','Cohort06','Cohort07',
 'Cohort08','Cohort09','Cohort10','Cohort11','Cohort12'),
 M01=c(11000,0,0,0,0,0,0,0,0,0,0,0),
 M02=c(1900,10000,0,0,0,0,0,0,0,0,0,0),
 M03=c(1400,2000,11500,0,0,0,0,0,0,0,0,0),
 M04=c(1100,1300,2400,13200,0,0,0,0,0,0,0,0),
 M05=c(1000,1100,1400,2400,11100,0,0,0,0,0,0,0),
 M06=c(900,900,1200,1600,1900,10300,0,0,0,0,0,0),
 M07=c(850,900,1100,1300,1300,1900,13000,0,0,0,0,0),
 M08=c(850,850,1000,1200,1100,1300,1900,11500,0,0,0,0),
 M09=c(800,800,950,1100,1100,1250,1000,1200,11000,0,0,0),
 M10=c(800,780,900,1050,1050,1200,900,1200,1900,13200,0,0),
 M11=c(750,750,900,1000,1000,1180,800,1100,1150,2000,11300,0),
 M12=c(740,700,870,1000,900,1100,700,1050,1025,1300,1800,20000))

cohort.clients.r <- cohort.clients #create new data frame
totcols <- ncol(cohort.clients.r) #count number of columns in data set
for (i in 1:nrow(cohort.clients.r)) { #for loop for shifting each row
  df <- cohort.clients.r[i,] #select row from data frame
  df <- df[ , !df[]==0] #remove columns with zeros
  partcols <- ncol(df) #count number of columns in row (w/o zeros)
  #fill columns after values by zeros
  if (partcols < totcols) df[, c((partcols+1):totcols)] <- 0
  cohort.clients.r[i,] <- df #replace initial row by new one
}
cohort.clients

##      cohort   M01   M02   M03   M04   M05   M06   M07   M08   M09   M10
## 1  Cohort01 11000  1900  1400  1100  1000   900   850   850   800   800
## 2  Cohort02     0 10000  2000  1300  1100   900   900   850   800   780
## 3  Cohort03     0     0 11500  2400  1400  1200  1100  1000   950   900
## 4  Cohort04     0     0     0 13200  2400  1600  1300  1200  1100  1050
## 5  Cohort05     0     0     0     0 11100  1900  1300  1100  1100  1050
## 6  Cohort06     0     0     0     0     0 10300  1900  1300  1250  1200
## 7  Cohort07     0     0     0     0     0     0 13000  1900  1000   900
## 8  Cohort08     0     0     0     0     0     0     0 11500  1200  1200
## 9  Cohort09     0     0     0     0     0     0     0     0 11000  1900
## 10 Cohort10     0     0     0     0     0     0     0     0     0 13200
## 11 Cohort11     0     0     0     0     0     0     0     0     0     0
## 12 Cohort12     0     0     0     0     0     0     0     0     0     0
##      M11   M12
## 1    750   740
## 2    750   700
## 3    900   870
## 4   1000  1000
## 5   1000   900
## 6   1180  1100
## 7    800   700
## 8   1100  1050
## 9   1150  1025
## 10  2000  1300
## 11 11300  1800
## 12     0 20000

x <- cohort.clients.r[,c(2:13)]
y <- cohort.clients.r[,2]
reten.r <- apply(x, 2, function(x) x/y )
reten.r <- data.frame(cohort=(cohort.clients.r$cohort), reten.r)
reten.r <- reten.r[,-2] #delete M1 Cause it is 100% awlways
cohort.chart1 <- melt(reten.r, id.vars = 'cohort')

colnames(cohort.chart1) <- c('cohort', 'month', 'retention')

cohort.chart1 <- subset(cohort.chart1, cohort.chart1$retention != 0)

p <- ggplot(cohort.chart1, aes(x=month, y=retention, group=cohort, colour=cohort))
p + geom_line(size=1, alpha=1/2) +
  geom_point(size=2, alpha=1) + 
  labs(title="Cohorts Retention ratio dynamics")

#There is no Cohort 12 presented beacause it is at its base month

x <- cohort.clients.r[,c(2:13)]
y <- cohort.clients.r[,2]
reten.r <- apply(x, 2, function(x) x/y )
reten.r <- data.frame(cohort=(cohort.clients.r$cohort), reten.r)
reten.r <- reten.r[,-2] 
cohort.chart1 <- melt(reten.r, id.vars = 'cohort')

colnames(cohort.chart1) <- c('cohort', 'month', 'retention')

cohort.chart1 <- subset(cohort.chart1, cohort.chart1$retention != 0)

p <- ggplot(cohort.chart1, aes(x=month, y=retention, group=cohort, colour=cohort))
p + geom_line(size=1, alpha=1/2) +
  geom_point(size=2, alpha=1) +
  facet_wrap(~ cohort) +
 theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
   labs(title="Cohorts Retention ratio")

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

#There is no Cohort 12 presented beacause it is at its base month

Cohort Analysis-Retention Rate

JHONG TING HUANG

2017年2月27日