## 基于Logit模型的**行业信用风险测度
## 作业说明
## 作业名为《基于logit模型**行业信用风险分析》,补充完整行业名称;
## 具体内容包括
## 1.该行业以及信用风险的基本情况
## 2.描述性统计
##(1)表格。分析相关变量的最大值最小值均值标准差等信息。
##(2)图。使用boxplot比较0-1公司的变量分布特征
## 3.logit模型估计结果
## 4.结果的解释
## 5.模型的检验
## (1)画TPR\FPR图;
## (2)计算AUC的面积
## 6.结论
## 这个 Y 高风险的为0,其他为1
## 加载包
library(readxl)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(gridExtra)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:glmnet':
##
## auc
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(Deducer)
## Loading required package: JGR
## Loading required package: rJava
## Loading required package: JavaGD
## Loading required package: iplots
## Note: On Mac OS X we strongly recommend using iplots from within JGR.
## Proceed at your own risk as iplots cannot resolve potential ev.loop deadlocks.
## 'Yes' is assumed for all dialogs as they cannot be shown without a deadlock,
## also ievent.wait() is disabled.
## More recent OS X version do not allow signle-threaded GUIs and will fail.
##
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.
## Loading required package: car
## Loading required package: MASS
##
##
## Note Non-JGR console detected:
## Deducer is best used from within JGR (http://jgr.markushelbig.org/).
## To Bring up GUI dialogs, type deducer().
theme_set(theme_bw(base_family = "STKaiti"))
## 读取数据
logidata <- read_excel("分析数据.xlsx")
logidata$YY <- factor(logidata$Y,levels = c(0,1),labels = c("高风险","其它"))
## 2.描述性统计
##(1)表格。分析相关变量的最大值最小值均值标准差等信息。
summary(logidata)
## 代码 简称 FlowRate
## Length:60 Length:60 Min. : 0.004713
## Class :character Class :character 1st Qu.: 1.143146
## Mode :character Mode :character Median : 1.445763
## Mean : 1.790291
## 3rd Qu.: 1.972126
## Max. :10.592783
## ProfitRate FinancialLeverage TurnoverRate AssetLiabilityRatio
## Min. :0.1936 Min. : 0.2654 Min. :0.000803 Min. : 0.1044
## 1st Qu.:0.7415 1st Qu.: 0.9924 1st Qu.:0.763564 1st Qu.: 0.3442
## Median :0.7758 Median : 1.0931 Median :1.343214 Median : 0.5263
## Mean :0.7864 Mean : 1.5339 Mean :1.333669 Mean : 1.5375
## 3rd Qu.:0.8429 3rd Qu.: 1.3708 3rd Qu.:1.726989 3rd Qu.: 0.7730
## Max. :1.0242 Max. :20.4578 Max. :3.129450 Max. :50.4595
## EquityRatio Y YY
## Min. :-0.9802 Min. :0.0000 高风险: 8
## 1st Qu.: 0.2936 1st Qu.:1.0000 其它 :52
## Median : 0.9000 Median :1.0000
## Mean : 1.4516 Mean :0.8667
## 3rd Qu.: 1.9066 3rd Qu.:1.0000
## Max. : 8.5815 Max. :1.0000
## 标准差 Standard Deviation
apply(logidata[,3:8],2,sd)
## FlowRate ProfitRate FinancialLeverage
## 1.6087051 0.1411268 2.5475723
## TurnoverRate AssetLiabilityRatio EquityRatio
## 0.7257283 6.5480775 1.7740255
##(2)图。使用boxplot比较0-1公司的变量分布特征
p1 <- ggplot(logidata,aes(x = YY,y = FlowRate))+
geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
labs(x = "风险状况",title = "FlowRate") +
theme(plot.title = element_text(hjust = 0.5))
p1

p2 <- ggplot(logidata,aes(x = YY,y = ProfitRate))+
geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
labs(x = "风险状况",title = "ProfitRate") +
theme(plot.title = element_text(hjust = 0.5))
p2

p3 <- ggplot(logidata,aes(x = YY,y = FinancialLeverage))+
geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
labs(x = "风险状况",title = "FinancialLeverage") +
theme(plot.title = element_text(hjust = 0.5))
p3

p4 <- ggplot(logidata,aes(x = YY,y = TurnoverRate))+
geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
labs(x = "风险状况",title = "TurnoverRate") +
theme(plot.title = element_text(hjust = 0.5))
p4

p5 <- ggplot(logidata,aes(x = YY,y = AssetLiabilityRatio))+
geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
labs(x = "风险状况",title = "AssetLiabilityRatio") +
theme(plot.title = element_text(hjust = 0.5))
p5

p6 <- ggplot(logidata,aes(x = YY,y = EquityRatio))+
geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
labs(x = "风险状况",title = "EquityRatio") +
theme(plot.title = element_text(hjust = 0.5))
p6

grid.arrange(p1,p2,p3,p4,p5,p6,nrow = 2)

## 3.logit模型估计结果
names(logidata)
## [1] "代码" "简称" "FlowRate"
## [4] "ProfitRate" "FinancialLeverage" "TurnoverRate"
## [7] "AssetLiabilityRatio" "EquityRatio" "Y"
## [10] "YY"
logidata$Y <- as.integer(logidata$Y)
mod1 <- glm(Y~FlowRate+ProfitRate+FinancialLeverage+TurnoverRate+AssetLiabilityRatio+EquityRatio,
data = logidata,family=binomial(link=logit))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mod1)
##
## Call:
## glm(formula = Y ~ FlowRate + ProfitRate + FinancialLeverage +
## TurnoverRate + AssetLiabilityRatio + EquityRatio, family = binomial(link = logit),
## data = logidata)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.86547 0.00918 0.07330 0.17817 0.96132
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -15.76252 13.32055 -1.183 0.2367
## FlowRate -0.02781 0.52836 -0.053 0.9580
## ProfitRate 14.29886 6.80493 2.101 0.0356 *
## FinancialLeverage 8.66922 4.03359 2.149 0.0316 *
## TurnoverRate 2.84946 2.30625 1.236 0.2166
## AssetLiabilityRatio -6.49589 7.13215 -0.911 0.3624
## EquityRatio -0.50881 1.34578 -0.378 0.7054
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47.121 on 59 degrees of freedom
## Residual deviance: 13.111 on 53 degrees of freedom
## AIC: 27.111
##
## Number of Fisher Scoring iterations: 12
mod2 <- step(mod1)
## Start: AIC=27.11
## Y ~ FlowRate + ProfitRate + FinancialLeverage + TurnoverRate +
## AssetLiabilityRatio + EquityRatio
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - FlowRate 1 13.114 25.114
## - EquityRatio 1 13.236 25.236
## <none> 13.111 27.111
## - TurnoverRate 1 15.373 27.372
## - ProfitRate 1 20.545 32.545
## - AssetLiabilityRatio 1 20.616 32.616
## - FinancialLeverage 1 23.945 35.945
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=25.11
## Y ~ ProfitRate + FinancialLeverage + TurnoverRate + AssetLiabilityRatio +
## EquityRatio
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - EquityRatio 1 13.238 23.238
## <none> 13.114 25.114
## - TurnoverRate 1 16.009 26.009
## - AssetLiabilityRatio 1 20.643 30.643
## - ProfitRate 1 21.489 31.489
## - FinancialLeverage 1 24.608 34.608
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=23.24
## Y ~ ProfitRate + FinancialLeverage + TurnoverRate + AssetLiabilityRatio
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 13.238 23.238
## - TurnoverRate 1 17.771 25.771
## - ProfitRate 1 21.576 29.576
## - FinancialLeverage 1 25.630 33.630
## - AssetLiabilityRatio 1 27.769 35.769
summary(mod2)
##
## Call:
## glm(formula = Y ~ ProfitRate + FinancialLeverage + TurnoverRate +
## AssetLiabilityRatio, family = binomial(link = logit), data = logidata)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.86488 0.00562 0.06883 0.19066 0.91030
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -19.562 9.912 -1.974 0.0484 *
## ProfitRate 14.948 6.845 2.184 0.0290 *
## FinancialLeverage 9.242 3.936 2.348 0.0189 *
## TurnoverRate 3.411 1.904 1.792 0.0732 .
## AssetLiabilityRatio -4.021 2.454 -1.639 0.1012
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47.121 on 59 degrees of freedom
## Residual deviance: 13.238 on 55 degrees of freedom
## AIC: 23.238
##
## Number of Fisher Scoring iterations: 10
## 4.结果的解释
## AIC: 23.238;formula = Y ~ ProfitRate + FinancialLeverage + TurnoverRate + AssetLiabilityRatio
## 使用模型二进行预测
logidata$preY <- predict(mod2,logidata,type="response")
logidata$preYY <- ifelse(logidata$preY>0.5,1,0)
logidata$preY
## [1] 6.607868e-01 2.220446e-16 9.953960e-01 9.964907e-01 9.968174e-01
## [6] 9.551973e-01 4.027341e-02 9.999700e-01 9.922248e-01 9.834899e-01
## [11] 9.996953e-01 9.825380e-01 9.988783e-01 9.865285e-01 9.999840e-01
## [16] 2.220446e-16 1.000000e+00 3.604849e-04 1.000000e+00 9.999999e-01
## [21] 9.977208e-01 9.998582e-01 8.378363e-01 9.975452e-01 9.992855e-01
## [26] 9.998917e-01 9.587517e-01 2.423231e-01 9.999993e-01 9.469397e-01
## [31] 9.802757e-01 9.907047e-01 9.997321e-01 9.948195e-01 9.992785e-01
## [36] 9.951717e-01 7.714751e-01 1.000000e+00 8.021206e-01 9.998516e-01
## [41] 1.657270e-01 9.999849e-01 7.872184e-01 9.491468e-01 1.918033e-01
## [46] 9.998999e-01 9.998863e-01 9.873727e-01 9.638155e-01 9.792227e-01
## [51] 9.996703e-01 9.587209e-01 9.573729e-01 9.977963e-01 9.926356e-01
## [56] 9.943267e-01 9.965034e-01 9.999852e-01 9.802942e-01 9.964051e-01
## 混淆矩阵
table(logidata$Y,logidata$preYY)
##
## 0 1
## 0 7 1
## 1 0 52
## 5.模型的检验
## (1)画TPR\FPR图;
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
pred <-prediction(logidata$preY,logidata$Y)
tpr <- pred@tp[[1]]/max(pred@tp[[1]])
fpr <- pred@fp[[1]]/max(pred@fp[[1]])
plot(fpr, tpr, type = "l")

## 该图和下面的图一样
## (2)计算AUC的面积,方法1
g <- roc(Y ~ preY, data = logidata,direction="<")
plot(g,col="yellow", lwd=3, main="The turtle finds its way")

##
## Call:
## roc.formula(formula = Y ~ preY, data = logidata, direction = "<")
##
## Data: preY in 8 controls (Y 0) < 52 cases (Y 1).
## Area under the curve: 0.9615
## 方法2
rocplot(mod2)

rocplot(mod2) +
ggtitle("模型的ROC曲线")+
theme(plot.title = element_text(hjust = 0.5))
