This document looks at the creation of decision tress (CART), which are used to show the likely outcomes for a particular question or outcome that is being asked. In particular, this document uses the LoanData.csv and looks at how a credit grade is determined by borrower rate and amount.
head(LoanData)
## # A tibble: 6 x 6
## Status Credit.Grade Amount Age Borrower.Rate Debt.To.Income.Ratio
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Current C 5000 4 0.15 0.04
## 2 Current HR 1900 6 0.265 0.02
## 3 Current HR 1000 3 0.15 0.02
## 4 Current HR 1000 5 0.290 0.02
## 5 Current AA 2550 8 0.0795 0.033
## 6 Current NC 1500 2 0.26 0.03
LoanData <- LoanData %>%
mutate(Status = factor(Status))
LoanData <- LoanData %>%
mutate(Credit.Grade = factor(Credit.Grade))
library(tree)
## Warning: package 'tree' was built under R version 4.0.3
## Registered S3 method overwritten by 'tree':
## method from
## print.tree cli
loantree <- tree(Credit.Grade ~., data=LoanData, mindev=0.1, mincut=1)
loantree <- tree(Credit.Grade ~., data=LoanData, mincut=1)
loantree
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 5611 21490 HR ( 0.0755659 0.0803778 0.0985564 0.1502406 0.1652112 0.2012119 0.2168954 0.0119408 )
## 2) Borrower.Rate < 0.16995 2149 7840 C ( 0.1884597 0.2061424 0.2010237 0.2154490 0.0856212 0.0404839 0.0581666 0.0046533 )
## 4) Borrower.Rate < 0.10945 766 2047 AA ( 0.2950392 0.4869452 0.1122715 0.0248042 0.0104439 0.0221932 0.0456919 0.0026110 ) *
## 5) Borrower.Rate > 0.10945 1383 4835 C ( 0.1294288 0.0506146 0.2501808 0.3210412 0.1272596 0.0506146 0.0650759 0.0057845 )
## 10) Amount < 4999.5 686 2335 C ( 0.0451895 0.0102041 0.1559767 0.3513120 0.2113703 0.0962099 0.1180758 0.0116618 ) *
## 11) Amount > 4999.5 697 2087 B ( 0.2123386 0.0903874 0.3428981 0.2912482 0.0444763 0.0057389 0.0129125 0.0000000 ) *
## 3) Borrower.Rate > 0.16995 3462 10560 HR ( 0.0054882 0.0023108 0.0349509 0.1097631 0.2146158 0.3009821 0.3154246 0.0164645 )
## 6) Amount < 4275 2367 6194 HR ( 0.0004225 0.0000000 0.0097169 0.0511196 0.1630756 0.3354457 0.4190959 0.0211238 )
## 12) Borrower.Rate < 0.23825 1097 3210 HR ( 0.0009116 0.0000000 0.0200547 0.0984503 0.2789426 0.2825889 0.2999088 0.0191431 ) *
## 13) Borrower.Rate > 0.23825 1270 2591 HR ( 0.0000000 0.0000000 0.0007874 0.0102362 0.0629921 0.3811024 0.5220472 0.0228346 ) *
## 7) Amount > 4275 1095 3533 D ( 0.0164384 0.0073059 0.0894977 0.2365297 0.3260274 0.2264840 0.0913242 0.0063927 )
## 14) Borrower.Rate < 0.226 577 1715 C ( 0.0294627 0.0121317 0.1455806 0.3622184 0.3466205 0.0606586 0.0415945 0.0017331 ) *
## 15) Borrower.Rate > 0.226 518 1459 E ( 0.0019305 0.0019305 0.0270270 0.0965251 0.3030888 0.4111969 0.1467181 0.0115830 ) *
plot(loantree, col=8)
text(loantree, digits=2)
loancut <- prune.tree(loantree,k=1.7)
plot(loancut)
loancut
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 5611 21490 HR ( 0.0755659 0.0803778 0.0985564 0.1502406 0.1652112 0.2012119 0.2168954 0.0119408 )
## 2) Borrower.Rate < 0.16995 2149 7840 C ( 0.1884597 0.2061424 0.2010237 0.2154490 0.0856212 0.0404839 0.0581666 0.0046533 )
## 4) Borrower.Rate < 0.10945 766 2047 AA ( 0.2950392 0.4869452 0.1122715 0.0248042 0.0104439 0.0221932 0.0456919 0.0026110 ) *
## 5) Borrower.Rate > 0.10945 1383 4835 C ( 0.1294288 0.0506146 0.2501808 0.3210412 0.1272596 0.0506146 0.0650759 0.0057845 )
## 10) Amount < 4999.5 686 2335 C ( 0.0451895 0.0102041 0.1559767 0.3513120 0.2113703 0.0962099 0.1180758 0.0116618 ) *
## 11) Amount > 4999.5 697 2087 B ( 0.2123386 0.0903874 0.3428981 0.2912482 0.0444763 0.0057389 0.0129125 0.0000000 ) *
## 3) Borrower.Rate > 0.16995 3462 10560 HR ( 0.0054882 0.0023108 0.0349509 0.1097631 0.2146158 0.3009821 0.3154246 0.0164645 )
## 6) Amount < 4275 2367 6194 HR ( 0.0004225 0.0000000 0.0097169 0.0511196 0.1630756 0.3354457 0.4190959 0.0211238 )
## 12) Borrower.Rate < 0.23825 1097 3210 HR ( 0.0009116 0.0000000 0.0200547 0.0984503 0.2789426 0.2825889 0.2999088 0.0191431 ) *
## 13) Borrower.Rate > 0.23825 1270 2591 HR ( 0.0000000 0.0000000 0.0007874 0.0102362 0.0629921 0.3811024 0.5220472 0.0228346 ) *
## 7) Amount > 4275 1095 3533 D ( 0.0164384 0.0073059 0.0894977 0.2365297 0.3260274 0.2264840 0.0913242 0.0063927 )
## 14) Borrower.Rate < 0.226 577 1715 C ( 0.0294627 0.0121317 0.1455806 0.3622184 0.3466205 0.0606586 0.0415945 0.0017331 ) *
## 15) Borrower.Rate > 0.226 518 1459 E ( 0.0019305 0.0019305 0.0270270 0.0965251 0.3030888 0.4111969 0.1467181 0.0115830 ) *
loancut <- prune.tree(loantree,k=2.05)
plot(loancut)
loancut
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 5611 21490 HR ( 0.0755659 0.0803778 0.0985564 0.1502406 0.1652112 0.2012119 0.2168954 0.0119408 )
## 2) Borrower.Rate < 0.16995 2149 7840 C ( 0.1884597 0.2061424 0.2010237 0.2154490 0.0856212 0.0404839 0.0581666 0.0046533 )
## 4) Borrower.Rate < 0.10945 766 2047 AA ( 0.2950392 0.4869452 0.1122715 0.0248042 0.0104439 0.0221932 0.0456919 0.0026110 ) *
## 5) Borrower.Rate > 0.10945 1383 4835 C ( 0.1294288 0.0506146 0.2501808 0.3210412 0.1272596 0.0506146 0.0650759 0.0057845 )
## 10) Amount < 4999.5 686 2335 C ( 0.0451895 0.0102041 0.1559767 0.3513120 0.2113703 0.0962099 0.1180758 0.0116618 ) *
## 11) Amount > 4999.5 697 2087 B ( 0.2123386 0.0903874 0.3428981 0.2912482 0.0444763 0.0057389 0.0129125 0.0000000 ) *
## 3) Borrower.Rate > 0.16995 3462 10560 HR ( 0.0054882 0.0023108 0.0349509 0.1097631 0.2146158 0.3009821 0.3154246 0.0164645 )
## 6) Amount < 4275 2367 6194 HR ( 0.0004225 0.0000000 0.0097169 0.0511196 0.1630756 0.3354457 0.4190959 0.0211238 )
## 12) Borrower.Rate < 0.23825 1097 3210 HR ( 0.0009116 0.0000000 0.0200547 0.0984503 0.2789426 0.2825889 0.2999088 0.0191431 ) *
## 13) Borrower.Rate > 0.23825 1270 2591 HR ( 0.0000000 0.0000000 0.0007874 0.0102362 0.0629921 0.3811024 0.5220472 0.0228346 ) *
## 7) Amount > 4275 1095 3533 D ( 0.0164384 0.0073059 0.0894977 0.2365297 0.3260274 0.2264840 0.0913242 0.0063927 )
## 14) Borrower.Rate < 0.226 577 1715 C ( 0.0294627 0.0121317 0.1455806 0.3622184 0.3466205 0.0606586 0.0415945 0.0017331 ) *
## 15) Borrower.Rate > 0.226 518 1459 E ( 0.0019305 0.0019305 0.0270270 0.0965251 0.3030888 0.4111969 0.1467181 0.0115830 ) *
loancut <- prune.tree(loantree,k=3)
plot(loancut)
loancut
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 5611 21490 HR ( 0.0755659 0.0803778 0.0985564 0.1502406 0.1652112 0.2012119 0.2168954 0.0119408 )
## 2) Borrower.Rate < 0.16995 2149 7840 C ( 0.1884597 0.2061424 0.2010237 0.2154490 0.0856212 0.0404839 0.0581666 0.0046533 )
## 4) Borrower.Rate < 0.10945 766 2047 AA ( 0.2950392 0.4869452 0.1122715 0.0248042 0.0104439 0.0221932 0.0456919 0.0026110 ) *
## 5) Borrower.Rate > 0.10945 1383 4835 C ( 0.1294288 0.0506146 0.2501808 0.3210412 0.1272596 0.0506146 0.0650759 0.0057845 )
## 10) Amount < 4999.5 686 2335 C ( 0.0451895 0.0102041 0.1559767 0.3513120 0.2113703 0.0962099 0.1180758 0.0116618 ) *
## 11) Amount > 4999.5 697 2087 B ( 0.2123386 0.0903874 0.3428981 0.2912482 0.0444763 0.0057389 0.0129125 0.0000000 ) *
## 3) Borrower.Rate > 0.16995 3462 10560 HR ( 0.0054882 0.0023108 0.0349509 0.1097631 0.2146158 0.3009821 0.3154246 0.0164645 )
## 6) Amount < 4275 2367 6194 HR ( 0.0004225 0.0000000 0.0097169 0.0511196 0.1630756 0.3354457 0.4190959 0.0211238 )
## 12) Borrower.Rate < 0.23825 1097 3210 HR ( 0.0009116 0.0000000 0.0200547 0.0984503 0.2789426 0.2825889 0.2999088 0.0191431 ) *
## 13) Borrower.Rate > 0.23825 1270 2591 HR ( 0.0000000 0.0000000 0.0007874 0.0102362 0.0629921 0.3811024 0.5220472 0.0228346 ) *
## 7) Amount > 4275 1095 3533 D ( 0.0164384 0.0073059 0.0894977 0.2365297 0.3260274 0.2264840 0.0913242 0.0063927 )
## 14) Borrower.Rate < 0.226 577 1715 C ( 0.0294627 0.0121317 0.1455806 0.3622184 0.3466205 0.0606586 0.0415945 0.0017331 ) *
## 15) Borrower.Rate > 0.226 518 1459 E ( 0.0019305 0.0019305 0.0270270 0.0965251 0.3030888 0.4111969 0.1467181 0.0115830 ) *
loancut <- prune.tree(loantree,k=5)
plot(loancut)
loancut
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 5611 21490 HR ( 0.0755659 0.0803778 0.0985564 0.1502406 0.1652112 0.2012119 0.2168954 0.0119408 )
## 2) Borrower.Rate < 0.16995 2149 7840 C ( 0.1884597 0.2061424 0.2010237 0.2154490 0.0856212 0.0404839 0.0581666 0.0046533 )
## 4) Borrower.Rate < 0.10945 766 2047 AA ( 0.2950392 0.4869452 0.1122715 0.0248042 0.0104439 0.0221932 0.0456919 0.0026110 ) *
## 5) Borrower.Rate > 0.10945 1383 4835 C ( 0.1294288 0.0506146 0.2501808 0.3210412 0.1272596 0.0506146 0.0650759 0.0057845 )
## 10) Amount < 4999.5 686 2335 C ( 0.0451895 0.0102041 0.1559767 0.3513120 0.2113703 0.0962099 0.1180758 0.0116618 ) *
## 11) Amount > 4999.5 697 2087 B ( 0.2123386 0.0903874 0.3428981 0.2912482 0.0444763 0.0057389 0.0129125 0.0000000 ) *
## 3) Borrower.Rate > 0.16995 3462 10560 HR ( 0.0054882 0.0023108 0.0349509 0.1097631 0.2146158 0.3009821 0.3154246 0.0164645 )
## 6) Amount < 4275 2367 6194 HR ( 0.0004225 0.0000000 0.0097169 0.0511196 0.1630756 0.3354457 0.4190959 0.0211238 )
## 12) Borrower.Rate < 0.23825 1097 3210 HR ( 0.0009116 0.0000000 0.0200547 0.0984503 0.2789426 0.2825889 0.2999088 0.0191431 ) *
## 13) Borrower.Rate > 0.23825 1270 2591 HR ( 0.0000000 0.0000000 0.0007874 0.0102362 0.0629921 0.3811024 0.5220472 0.0228346 ) *
## 7) Amount > 4275 1095 3533 D ( 0.0164384 0.0073059 0.0894977 0.2365297 0.3260274 0.2264840 0.0913242 0.0063927 )
## 14) Borrower.Rate < 0.226 577 1715 C ( 0.0294627 0.0121317 0.1455806 0.3622184 0.3466205 0.0606586 0.0415945 0.0017331 ) *
## 15) Borrower.Rate > 0.226 518 1459 E ( 0.0019305 0.0019305 0.0270270 0.0965251 0.3030888 0.4111969 0.1467181 0.0115830 ) *
loancut <- prune.tree(loantree)
loancut
## $size
## [1] 7 6 5 4 3 2 1
##
## $dev
## [1] 15443.67 15802.66 16195.81 16609.17 17445.34 18402.43 21494.67
##
## $k
## [1] -Inf 358.9890 393.1502 413.3549 836.1773 957.0818 3092.2446
##
## $method
## [1] "deviance"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
plot(loancut)
set.seed(2)
cvpst <- cv.tree(loantree, K=10)
cvpst$size
## [1] 7 6 5 4 3 2 1
cvpst$dev
## [1] 15682.04 16360.97 16835.50 16854.92 17620.59 18468.48 21511.58
plot(cvpst, pch=21, bg=8, type="p", cex=1.5, ylim=c(15680,21520))
loancut <- prune.tree(loantree, best=5)
loancut
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 5611 21490 HR ( 0.0755659 0.0803778 0.0985564 0.1502406 0.1652112 0.2012119 0.2168954 0.0119408 )
## 2) Borrower.Rate < 0.16995 2149 7840 C ( 0.1884597 0.2061424 0.2010237 0.2154490 0.0856212 0.0404839 0.0581666 0.0046533 )
## 4) Borrower.Rate < 0.10945 766 2047 AA ( 0.2950392 0.4869452 0.1122715 0.0248042 0.0104439 0.0221932 0.0456919 0.0026110 ) *
## 5) Borrower.Rate > 0.10945 1383 4835 C ( 0.1294288 0.0506146 0.2501808 0.3210412 0.1272596 0.0506146 0.0650759 0.0057845 )
## 10) Amount < 4999.5 686 2335 C ( 0.0451895 0.0102041 0.1559767 0.3513120 0.2113703 0.0962099 0.1180758 0.0116618 ) *
## 11) Amount > 4999.5 697 2087 B ( 0.2123386 0.0903874 0.3428981 0.2912482 0.0444763 0.0057389 0.0129125 0.0000000 ) *
## 3) Borrower.Rate > 0.16995 3462 10560 HR ( 0.0054882 0.0023108 0.0349509 0.1097631 0.2146158 0.3009821 0.3154246 0.0164645 )
## 6) Amount < 4275 2367 6194 HR ( 0.0004225 0.0000000 0.0097169 0.0511196 0.1630756 0.3354457 0.4190959 0.0211238 ) *
## 7) Amount > 4275 1095 3533 D ( 0.0164384 0.0073059 0.0894977 0.2365297 0.3260274 0.2264840 0.0913242 0.0063927 ) *
plot(loancut, col=8)
text(loancut)
The decision tree showed that credit grade can be classified by its borrower rate and amount. A credit grade that has a borrower rate <0.16995 and a borrower rate <0.10945 is classified as AA. A credit grade that has a borrower rate of <0.16995 and a borrower rate >0.10945 and an amount <4999.5 is classified as C. A credit grade that has a borrower rate of <0.16995 and a borrower rate >0.10945 and an amount >4999.5 is classified as B. A credit grade that has a borrower rate <0.16995 and an amount <4275 is classified as HR. A credit grade that has a borrower rate <0.16995 and an amount >4275 is classified as D.