1.Set the WD and read data# (1)Read data
setwd("~/Documents/MSXF/产品业务分析/逾期率分析/data/markov_chain")
roll_rate = read.csv("20160524_rollrate.csv",stringsAsFactors=F)
abnorm_contract = read.table("abnorm_contract",stringsAsFactors=F)
(2)delete the abnorm contract number
roll_rate = roll_rate[!(roll_rate[,"no_contract"] %in% abnorm_contract[,1]),]
rm(abnorm_contract)
(3)Make the type right####
roll_rate[,"checking_time"] = as.Date(roll_rate[,"checking_time"])
#roll_rate[,"first_stmt_date"] = as.Date(roll_rate[,"first_stmt_date"])
roll_rate[,"loan_code"] = as.character(roll_rate[,"loan_code"])
(4)Null will be replaced by NA, Replace NA with 0
roll_rate[,"term"] = as.numeric(roll_rate[,"term"])
roll_rate[,"dpd_curr"] = as.numeric(roll_rate[,"dpd_curr"])
roll_rate[,"dpd_curr"][is.na(roll_rate[,"dpd_curr"])] = 0
roll_rate[,"max_dpd_hist"] = as.numeric(roll_rate[,"max_dpd_hist"])
roll_rate[,"max_dpd_hist"][is.na(roll_rate[,"max_dpd_hist"])] = 0
2.Add two colunmn - status and status1###
##(1)Add status column represents the current status -- status
roll_rate[,"status"] = rep(NA,nrow(roll_rate))
roll_rate[32> roll_rate[,"dpd_curr"] & roll_rate[,"dpd_curr"]>=10,"status"] = "M1"
roll_rate[62> roll_rate[,"dpd_curr"] & roll_rate[,"dpd_curr"]>=32,"status"] = "M2"
roll_rate[91> roll_rate[,"dpd_curr"] & roll_rate[,"dpd_curr"]>=62,"status"] = "M3"
roll_rate[roll_rate[,"dpd_curr"]>=91,"status"] = "M3+"
roll_rate[roll_rate[,"dpd_curr"]<10,"status"] = "M0"
unique(roll_rate[,"status"])
## [1] "M0" "M3" "M1" "M2" "M3+"
table(roll_rate[,"status"])
##
## M0 M1 M2 M3 M3+
## 279125 20099 6181 3571 5902
(2)Add status column represents the next status – status1
roll_rate[,"status1"] = roll_rate[,"status"]
roll_rate[,"term1"] = roll_rate[,"term"]-1
(3)Left join and make the status1 is the next status
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
roll_rate = left_join(roll_rate[,1:12],roll_rate[,c(1,13,14)],by=c("no_contract"="no_contract","term"="term1"))
get data function
get_data_v = function(data,loan_code,vintage){
data = data[which(data$loan_code==loan_code &data$vintage==vintage),]
return(data)
}
#
head(get_data_v(roll_rate,1101,4))
## no_contract checking_time first_stmt_date apply_no
## 206 2015510725007643 2016-05-23 2016-01-27 09:20:42 20151226444221
## 280 2015522725007652 2016-05-23 2016-01-27 11:06:51 20151227444925
## 535 2015220524007649 2016-05-23 2016-01-27 10:25:07 20151226444397
## 683 2015612427007647 2016-05-23 2016-01-27 10:12:59 20151226444317
## 1299 2015130602007645 2016-05-23 2016-01-27 09:35:09 20151219424810
## 1300 2016513101007721 2016-05-23 2016-02-03 09:14:57 20160102459222
## loan_code vintage term system dpd_curr max_dpd_hist total_repay
## 206 1101 4 0 core -1 -1 0.00
## 280 1101 4 4 core 0 0 2880.64
## 535 1101 4 3 core 1 1 1815.54
## 683 1101 4 0 core -1 -1 0.00
## 1299 1101 4 1 core 2 2 754.11
## 1300 1101 4 0 core 0 0 3000.15
## status status1
## 206 M0 M0
## 280 M0 <NA>
## 535 M0 M0
## 683 M0 M0
## 1299 M0 M0
## 1300 M0 M0
4.Calculate the overdue rate of all customers (term0-term4) 4.1 term0 - term4
ma_term = function(data,term){
ma1 = table(data$status[which(data$term==term)],data$status1[which(data$term==term)])
return(ma1)
}
ma_term(data = get_data_v(roll_rate,1101,4),term=0)
##
## M0 M1
## M0 2103 379
Markov Function
Notice If the month changes, the alpha might change
mac_chain = function(data,loan_code,vintage,alpha){
data = get_data_v(data,loan_code,vintage)
if(vintage<4){
print("vintage must greater than 3")
break
}else{
#data
data = data[which(data$vintage==vintage ),]
#Alpha
alpha = alpha
#Prepare F1#
F1 = matrix(c(0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0),byrow=T,ncol=5,nrow=4,
dimnames=list(c("M0","M1","M2","M3"),c("M0","M1","M2","M3","M3+")))
#Prepare F2#
F2 = matrix(c(0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0),byrow=T,ncol=5,nrow=4,
dimnames=list(c("M0","M1","M2","M3"),c("M0","M1","M2","M3","M3+")))
#Prepare F3#
F3 = matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1),byrow=T,ncol=5,nrow=4,
dimnames=list(c("M0","M1","M2","M3"),c("M0","M1","M2","M3","M3+")))
###Term:1###
T1_6 = as.data.frame.matrix(ma_term(data,1)/rowSums(ma_term(data,1)))
T1_6[,c("M3","M3+")] = 0 ; T1_6[c("M2","M3"),] = 0
###Term:2###
T2_6 = as.data.frame.matrix(ma_term(data,2)/rowSums(ma_term(data,2)))
T2_6[,"M3+"] = 0; T2_6[c("M3"),] = 0
###Term:3###
T3_6 = as.data.frame.matrix(ma_term(data,3)/rowSums(ma_term(data,3)))
T3_6 = T3_6[-5,];T3_6
######AVG(T1,T2,T3) for the 3 terms######
T_com = (T1_6+T2_6+T3_6)/3
######AVG(F1,F2,F3) for the 3######
F_com = (F1+F2+F3)/3
###calculate the M###
M_com = (T_com-alpha*F_com)/(1-alpha); M_com ; M_com["M3+",] = c(0,0,0,0,1)
##Standardize##
M_com = M_com / rowSums(M_com)
M_com = as.matrix(M_com)}
return(M_com)
}
#
mac_chain(roll_rate,1101,4,0.2397)
## M0 M1 M2 M3 M3+
## M0 0.9947963 0.005203658 0.00000000 0.0000000 0.000000
## M1 0.3151146 0.291170731 0.39371467 0.0000000 0.000000
## M2 0.1169567 0.074708049 0.04259217 0.7657430 0.000000
## M3 0.1171131 0.018017401 0.01801740 0.0270261 0.819826
## M3+ 0.0000000 0.000000000 0.00000000 0.0000000 1.000000
Write up a prediction function
pred_roll_rate = function(data,loan_code,vintage,months,alpha){
M_com = mac_chain(data,loan_code,vintage,alpha)
data = get_data_v(data,loan_code,vintage)
data1 = c(table(data$status[which(data$term == max(data$term))]))
n=1
while(n<months){
n = n + 1
data1 = data1 %*% M_com
}
#rownames(data1) = toString(paste0("vintage_",vintage,sep=""))
rownames(data1) = format(Sys.Date() %m-% months(vintage),format = "%Y-%m")
return(data1)
}
###
pred_roll_rate(roll_rate,1101,vintage=6,months=6,alpha=0.2397)
## M0 M1 M2 M3 M3+
## 2015-11 1244.368 130.12 84.92985 75.40859 1508.173
Use/Apply vintage must be greater than 3
pred_roll_rate(roll_rate,1101,vintage=4,months=6,alpha=0.2397)
## M0 M1 M2 M3 M3+
## 2016-01 1955.341 17.10147 8.576941 9.39647 407.5838
pred_roll_rate(roll_rate,1101,vintage=5,months=6,alpha=0.2397)
## M0 M1 M2 M3 M3+
## 2015-12 464.1791 34.83368 19.34029 16.46109 301.1858
pred_roll_rate(roll_rate,1101,vintage=6,months=6,alpha=0.2397)
## M0 M1 M2 M3 M3+
## 2015-11 1244.368 130.12 84.92985 75.40859 1508.173
For vintage less than 3
T_term=function(data,term){
if(term==0){
temp = as.data.frame.matrix(ma_term(data,0)/rowSums(ma_term(data,0)))
temp[,c("M2","M3","M3+")] = 0 ; temp[c("M1","M2","M3"),] = 0 ; temp[c("M3+"),] = c(0,0,0,0,0)
return(temp)
}else if(term==1){
temp = as.data.frame.matrix(ma_term(data,1)/rowSums(ma_term(data,1)))
temp[,c("M3","M3+")] = 0 ; temp[c("M2","M3"),] = 0 ; temp[c("M3+"),] = c(0,0,0,0,0)
return(temp)
}else if(term==2){
temp = as.data.frame.matrix(ma_term(data,2)/rowSums(ma_term(data,2)))
temp[c("M3","M3+"),] = 0
if(!("M3+"%in%colnames(temp))){
temp[,"M3+"] = 0
}
#temp[3,c("M3+")] = 0
return(temp)
}else if(term==3){
temp = as.data.frame.matrix(ma_term(data,3)/rowSums(ma_term(data,3)))
temp[3,c("M3+")] = 0
return(temp)
}else{
print("term must be less than 4");
break;
}
}
Predition:vintage1~3, term1–>term4
#vintage 1:
pred_roll_rate_1_3 = function(data,loan_code,vintage,alpha,months){
if(loan_code!=1101){
print("loan_code must be 1101")
}else{
M_com = (mac_chain(data,loan_code,4,alpha) + mac_chain(data,loan_code,5,alpha) + mac_chain(data,loan_code,6,alpha))/3
if(vintage==1){
data1 = get_data_v(data,loan_code,1)
T1 = T_term(get_data_v(data,loan_code,1),0)
a2 = c(1,0,0,0,0)%*%as.matrix(alpha*T1+(1-alpha)*M_com)
num = length(which(data1$term==0))
n = 1
while(n<months){
n = n + 1
a2 = a2 %*% M_com
}
vin1_pre = num*a2
rownames(vin1_pre) = format(Sys.Date() %m-% months(1),format = "%Y-%m")
result = vin1_pre
}
##vintage 2:
else if(vintage==2){
data2 = get_data_v(data,loan_code,2)
T1 = T_term(data2,0)
a2 = c(1,0,0,0,0)%*%as.matrix(alpha*T1+(1-alpha)*M_com)
#a2
T2 = T_term(data2,1)
a3 = a2%*%as.matrix(alpha*T2+(1-alpha)*M_com)
num = length(which(data2$term==0))
months=6
n = 1
while(n<months){
n = n + 1
a3 = a3 %*% M_com
}
vin2_pre = num*a3
rownames(vin2_pre) = format(Sys.Date() %m-% months(2),format = "%Y-%m")
result = vin2_pre
}
##vintage 3:
else if(vintage==3){
data3 = get_data_v(data,loan_code,3)
T1 = T_term(data3,0)
a2 = c(1,0,0,0,0)%*%as.matrix(alpha*T1+(1-alpha)*M_com)
#a3
T2 = T_term(data3,1)
a3 = a2%*%as.matrix(alpha*T2+(1-alpha)*M_com)
#a4
T3 = T_term(data3,2)
a4 = a3%*%as.matrix(alpha*T3+(1-alpha)*M_com)
num = length(which(data3$term==0))
n = 1
while(n<months){
n = n + 1
a4 = a4 %*% M_com
}
vin3_pre = num*a4
rownames(vin3_pre) = format(Sys.Date() %m-% months(3),format = "%Y-%m")
result = vin3_pre
}else{
print("vintage must be less than 4")
break
}}
return(result)
}
vintage: 1 ~ 3
pred_roll_rate_1_3(roll_rate,1101,1,0.2397,months=6)
## M0 M1 M2 M3 M3+
## 2016-04 12624.9 772.6451 412.61 340.0277 980.8195
pred_roll_rate_1_3(roll_rate,1101,2,0.2397,months=6)
## M0 M1 M2 M3 M3+
## 2016-03 5187.182 317.8564 170.0601 140.7051 569.1965
pred_roll_rate_1_3(roll_rate,1101,3,0.2397,months=6)
## M0 M1 M2 M3 M3+
## 2016-02 2969.612 182.004 97.40465 80.63737 414.3425
Combine 6 vintage into a dataframe
pred_roll_rate_com = rbind(pred_roll_rate_1_3(roll_rate,1101,1,0.2397,months=6),pred_roll_rate_1_3(roll_rate,1101,2,0.2397,months=6),pred_roll_rate_1_3(roll_rate,1101,3,0.2397,months=6),pred_roll_rate(roll_rate,1101,vintage=4,months=6,alpha=0.2397),pred_roll_rate(roll_rate,1101,vintage=5,months=6,alpha=0.2397),pred_roll_rate(roll_rate,1101,vintage=6,months=6,alpha=0.2397))
pred_roll_rate_com = as.data.frame(pred_roll_rate_com)
###Inser the total row and colums
pred_roll_rate_com $total = rowSums(pred_roll_rate_com )
pred_roll_rate_com ["total",] = colSums(pred_roll_rate_com )
##rate
pred_roll_rate_com_r = as.data.frame(pred_roll_rate_com/rowSums(pred_roll_rate_com[,-6]))
Print the result
pred_roll_rate_com
## M0 M1 M2 M3 M3+ total
## 2016-04 12624.8976 772.64514 412.610044 340.02768 980.8195 15131
## 2016-03 5187.1819 317.85641 170.060059 140.70511 569.1965 6385
## 2016-02 2969.6115 182.00397 97.404648 80.63737 414.3425 3744
## 2016-01 1955.3414 17.10147 8.576941 9.39647 407.5838 2398
## 2015-12 464.1791 34.83368 19.340291 16.46109 301.1858 836
## 2015-11 1244.3684 130.11995 84.929849 75.40859 1508.1732 3043
## total 24445.5799 1454.56063 792.921833 662.63632 4181.3013 31537
pred_roll_rate_com_r
## M0 M1 M2 M3 M3+ total
## 2016-04 0.8343730 0.051063720 0.027269185 0.022472254 0.06482186 1
## 2016-03 0.8124012 0.049781740 0.026634308 0.022036823 0.08914589 1
## 2016-02 0.7931655 0.048612171 0.026016199 0.021537760 0.11066840 1
## 2016-01 0.8154051 0.007131556 0.003576706 0.003918461 0.16996820 1
## 2015-12 0.5552382 0.041667077 0.023134319 0.019690300 0.36027015 1
## 2015-11 0.4089282 0.042760418 0.027909908 0.024781001 0.49562051 1
## total 0.7751397 0.046122352 0.025142589 0.021011393 0.13258399 1