suppressWarnings(library(dplyr))
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
suppressWarnings(library(ggplot2))
library(reshape2)
cohort.clients <- data.frame(cohort=c('Cohort01','Cohort02',
'Cohort03','Cohort04','Cohort05','Cohort06','Cohort07',
'Cohort08','Cohort09','Cohort10','Cohort11','Cohort12'),
M01=c(11000,0,0,0,0,0,0,0,0,0,0,0),
M02=c(1900,10000,0,0,0,0,0,0,0,0,0,0),
M03=c(1400,2000,11500,0,0,0,0,0,0,0,0,0),
M04=c(1100,1300,2400,13200,0,0,0,0,0,0,0,0),
M05=c(1000,1100,1400,2400,11100,0,0,0,0,0,0,0),
M06=c(900,900,1200,1600,1900,10300,0,0,0,0,0,0),
M07=c(850,900,1100,1300,1300,1900,13000,0,0,0,0,0),
M08=c(850,850,1000,1200,1100,1300,1900,11500,0,0,0,0),
M09=c(800,800,950,1100,1100,1250,1000,1200,11000,0,0,0),
M10=c(800,780,900,1050,1050,1200,900,1200,1900,13200,0,0),
M11=c(750,750,900,1000,1000,1180,800,1100,1150,2000,11300,0),
M12=c(740,700,870,1000,900,1100,700,1050,1025,1300,1800,20000))
cohort.clients.r <- cohort.clients #create new data frame
totcols <- ncol(cohort.clients.r) #count number of columns in data set
for (i in 1:nrow(cohort.clients.r)) { #for loop for shifting each row
df <- cohort.clients.r[i,] #select row from data frame
df <- df[ , !df[]==0] #remove columns with zeros
partcols <- ncol(df) #count number of columns in row (w/o zeros)
#fill columns after values by zeros
if (partcols < totcols) df[, c((partcols+1):totcols)] <- 0
cohort.clients.r[i,] <- df #replace initial row by new one
}
cohort.clients
## cohort M01 M02 M03 M04 M05 M06 M07 M08 M09 M10
## 1 Cohort01 11000 1900 1400 1100 1000 900 850 850 800 800
## 2 Cohort02 0 10000 2000 1300 1100 900 900 850 800 780
## 3 Cohort03 0 0 11500 2400 1400 1200 1100 1000 950 900
## 4 Cohort04 0 0 0 13200 2400 1600 1300 1200 1100 1050
## 5 Cohort05 0 0 0 0 11100 1900 1300 1100 1100 1050
## 6 Cohort06 0 0 0 0 0 10300 1900 1300 1250 1200
## 7 Cohort07 0 0 0 0 0 0 13000 1900 1000 900
## 8 Cohort08 0 0 0 0 0 0 0 11500 1200 1200
## 9 Cohort09 0 0 0 0 0 0 0 0 11000 1900
## 10 Cohort10 0 0 0 0 0 0 0 0 0 13200
## 11 Cohort11 0 0 0 0 0 0 0 0 0 0
## 12 Cohort12 0 0 0 0 0 0 0 0 0 0
## M11 M12
## 1 750 740
## 2 750 700
## 3 900 870
## 4 1000 1000
## 5 1000 900
## 6 1180 1100
## 7 800 700
## 8 1100 1050
## 9 1150 1025
## 10 2000 1300
## 11 11300 1800
## 12 0 20000
x <- cohort.clients.r[,c(2:13)]
y <- cohort.clients.r[,2]
reten.r <- apply(x, 2, function(x) x/y )
reten.r <- data.frame(cohort=(cohort.clients.r$cohort), reten.r)
reten.r <- reten.r[,-2] #delete M1 Cause it is 100% awlways
cohort.chart1 <- melt(reten.r, id.vars = 'cohort')
colnames(cohort.chart1) <- c('cohort', 'month', 'retention')
cohort.chart1 <- subset(cohort.chart1, cohort.chart1$retention != 0)
p <- ggplot(cohort.chart1, aes(x=month, y=retention, group=cohort, colour=cohort))
p + geom_line(size=1, alpha=1/2) +
geom_point(size=2, alpha=1) +
labs(title="Cohorts Retention ratio dynamics")

#There is no Cohort 12 presented beacause it is at its base month
x <- cohort.clients.r[,c(2:13)]
y <- cohort.clients.r[,2]
reten.r <- apply(x, 2, function(x) x/y )
reten.r <- data.frame(cohort=(cohort.clients.r$cohort), reten.r)
reten.r <- reten.r[,-2]
cohort.chart1 <- melt(reten.r, id.vars = 'cohort')
colnames(cohort.chart1) <- c('cohort', 'month', 'retention')
cohort.chart1 <- subset(cohort.chart1, cohort.chart1$retention != 0)
p <- ggplot(cohort.chart1, aes(x=month, y=retention, group=cohort, colour=cohort))
p + geom_line(size=1, alpha=1/2) +
geom_point(size=2, alpha=1) +
facet_wrap(~ cohort) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
labs(title="Cohorts Retention ratio")
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

#There is no Cohort 12 presented beacause it is at its base month