## 基于Logit模型的**行业信用风险测度
## 作业说明
## 作业名为《基于logit模型**行业信用风险分析》,补充完整行业名称;
## 具体内容包括
## 1.该行业以及信用风险的基本情况
## 2.描述性统计
##(1)表格。分析相关变量的最大值最小值均值标准差等信息。
##(2)图。使用boxplot比较0-1公司的变量分布特征
## 3.logit模型估计结果
## 4.结果的解释
## 5.模型的检验
## (1)画TPR\FPR图;
## (2)计算AUC的面积
## 6.结论


## 这个 Y 高风险的为0,其他为1


## 加载包
library(readxl)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(gridExtra)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:glmnet':
## 
##     auc
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(Deducer)
## Loading required package: JGR
## Loading required package: rJava
## Loading required package: JavaGD
## Loading required package: iplots
## Note: On Mac OS X we strongly recommend using iplots from within JGR.
## Proceed at your own risk as iplots cannot resolve potential ev.loop deadlocks.
## 'Yes' is assumed for all dialogs as they cannot be shown without a deadlock,
## also ievent.wait() is disabled.
## More recent OS X version do not allow signle-threaded GUIs and will fail.
## 
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.
## Loading required package: car
## Loading required package: MASS
## 
## 
## Note Non-JGR console detected:
##  Deducer is best used from within JGR (http://jgr.markushelbig.org/).
##  To Bring up GUI dialogs, type deducer().
theme_set(theme_bw(base_family = "STKaiti"))

## 读取数据
logidata <- read_excel("分析数据.xlsx")
logidata$YY <- factor(logidata$Y,levels = c(0,1),labels = c("高风险","其它"))
## 2.描述性统计
##(1)表格。分析相关变量的最大值最小值均值标准差等信息。
summary(logidata)
##      代码               简称              FlowRate        
##  Length:60          Length:60          Min.   : 0.004713  
##  Class :character   Class :character   1st Qu.: 1.143146  
##  Mode  :character   Mode  :character   Median : 1.445763  
##                                        Mean   : 1.790291  
##                                        3rd Qu.: 1.972126  
##                                        Max.   :10.592783  
##    ProfitRate     FinancialLeverage  TurnoverRate      AssetLiabilityRatio
##  Min.   :0.1936   Min.   : 0.2654   Min.   :0.000803   Min.   : 0.1044    
##  1st Qu.:0.7415   1st Qu.: 0.9924   1st Qu.:0.763564   1st Qu.: 0.3442    
##  Median :0.7758   Median : 1.0931   Median :1.343214   Median : 0.5263    
##  Mean   :0.7864   Mean   : 1.5339   Mean   :1.333669   Mean   : 1.5375    
##  3rd Qu.:0.8429   3rd Qu.: 1.3708   3rd Qu.:1.726989   3rd Qu.: 0.7730    
##  Max.   :1.0242   Max.   :20.4578   Max.   :3.129450   Max.   :50.4595    
##   EquityRatio            Y               YY    
##  Min.   :-0.9802   Min.   :0.0000   高风险: 8  
##  1st Qu.: 0.2936   1st Qu.:1.0000   其它  :52  
##  Median : 0.9000   Median :1.0000              
##  Mean   : 1.4516   Mean   :0.8667              
##  3rd Qu.: 1.9066   3rd Qu.:1.0000              
##  Max.   : 8.5815   Max.   :1.0000
## 标准差 Standard Deviation
apply(logidata[,3:8],2,sd)
##            FlowRate          ProfitRate   FinancialLeverage 
##           1.6087051           0.1411268           2.5475723 
##        TurnoverRate AssetLiabilityRatio         EquityRatio 
##           0.7257283           6.5480775           1.7740255
##(2)图。使用boxplot比较0-1公司的变量分布特征
p1 <- ggplot(logidata,aes(x = YY,y = FlowRate))+
  geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
  labs(x = "风险状况",title = "FlowRate") +
  theme(plot.title = element_text(hjust = 0.5))
p1

p2 <- ggplot(logidata,aes(x = YY,y = ProfitRate))+
  geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
  labs(x = "风险状况",title = "ProfitRate") +
  theme(plot.title = element_text(hjust = 0.5))
p2

p3 <- ggplot(logidata,aes(x = YY,y = FinancialLeverage))+
  geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
  labs(x = "风险状况",title = "FinancialLeverage") +
  theme(plot.title = element_text(hjust = 0.5))
p3

p4 <- ggplot(logidata,aes(x = YY,y = TurnoverRate))+
  geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
  labs(x = "风险状况",title = "TurnoverRate") +
  theme(plot.title = element_text(hjust = 0.5))
p4

p5 <- ggplot(logidata,aes(x = YY,y = AssetLiabilityRatio))+
  geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
  labs(x = "风险状况",title = "AssetLiabilityRatio") +
  theme(plot.title = element_text(hjust = 0.5))
p5

p6 <- ggplot(logidata,aes(x = YY,y = EquityRatio))+
  geom_boxplot(colour = "red") +#geom_point(colour = "blue") +
  labs(x = "风险状况",title = "EquityRatio") +
  theme(plot.title = element_text(hjust = 0.5))
p6

grid.arrange(p1,p2,p3,p4,p5,p6,nrow = 2)

## 3.logit模型估计结果
names(logidata)
##  [1] "代码"                "简称"                "FlowRate"           
##  [4] "ProfitRate"          "FinancialLeverage"   "TurnoverRate"       
##  [7] "AssetLiabilityRatio" "EquityRatio"         "Y"                  
## [10] "YY"
logidata$Y <- as.integer(logidata$Y)
mod1 <- glm(Y~FlowRate+ProfitRate+FinancialLeverage+TurnoverRate+AssetLiabilityRatio+EquityRatio,
            data = logidata,family=binomial(link=logit))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mod1)
## 
## Call:
## glm(formula = Y ~ FlowRate + ProfitRate + FinancialLeverage + 
##     TurnoverRate + AssetLiabilityRatio + EquityRatio, family = binomial(link = logit), 
##     data = logidata)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.86547   0.00918   0.07330   0.17817   0.96132  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -15.76252   13.32055  -1.183   0.2367  
## FlowRate             -0.02781    0.52836  -0.053   0.9580  
## ProfitRate           14.29886    6.80493   2.101   0.0356 *
## FinancialLeverage     8.66922    4.03359   2.149   0.0316 *
## TurnoverRate          2.84946    2.30625   1.236   0.2166  
## AssetLiabilityRatio  -6.49589    7.13215  -0.911   0.3624  
## EquityRatio          -0.50881    1.34578  -0.378   0.7054  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47.121  on 59  degrees of freedom
## Residual deviance: 13.111  on 53  degrees of freedom
## AIC: 27.111
## 
## Number of Fisher Scoring iterations: 12
mod2 <- step(mod1)
## Start:  AIC=27.11
## Y ~ FlowRate + ProfitRate + FinancialLeverage + TurnoverRate + 
##     AssetLiabilityRatio + EquityRatio
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                       Df Deviance    AIC
## - FlowRate             1   13.114 25.114
## - EquityRatio          1   13.236 25.236
## <none>                     13.111 27.111
## - TurnoverRate         1   15.373 27.372
## - ProfitRate           1   20.545 32.545
## - AssetLiabilityRatio  1   20.616 32.616
## - FinancialLeverage    1   23.945 35.945
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=25.11
## Y ~ ProfitRate + FinancialLeverage + TurnoverRate + AssetLiabilityRatio + 
##     EquityRatio
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                       Df Deviance    AIC
## - EquityRatio          1   13.238 23.238
## <none>                     13.114 25.114
## - TurnoverRate         1   16.009 26.009
## - AssetLiabilityRatio  1   20.643 30.643
## - ProfitRate           1   21.489 31.489
## - FinancialLeverage    1   24.608 34.608
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=23.24
## Y ~ ProfitRate + FinancialLeverage + TurnoverRate + AssetLiabilityRatio
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                       Df Deviance    AIC
## <none>                     13.238 23.238
## - TurnoverRate         1   17.771 25.771
## - ProfitRate           1   21.576 29.576
## - FinancialLeverage    1   25.630 33.630
## - AssetLiabilityRatio  1   27.769 35.769
summary(mod2)
## 
## Call:
## glm(formula = Y ~ ProfitRate + FinancialLeverage + TurnoverRate + 
##     AssetLiabilityRatio, family = binomial(link = logit), data = logidata)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.86488   0.00562   0.06883   0.19066   0.91030  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          -19.562      9.912  -1.974   0.0484 *
## ProfitRate            14.948      6.845   2.184   0.0290 *
## FinancialLeverage      9.242      3.936   2.348   0.0189 *
## TurnoverRate           3.411      1.904   1.792   0.0732 .
## AssetLiabilityRatio   -4.021      2.454  -1.639   0.1012  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47.121  on 59  degrees of freedom
## Residual deviance: 13.238  on 55  degrees of freedom
## AIC: 23.238
## 
## Number of Fisher Scoring iterations: 10
## 4.结果的解释
## AIC: 23.238;formula = Y ~ ProfitRate + FinancialLeverage + TurnoverRate + AssetLiabilityRatio

## 使用模型二进行预测
logidata$preY <- predict(mod2,logidata,type="response")
logidata$preYY <- ifelse(logidata$preY>0.5,1,0)
logidata$preY
##  [1] 6.607868e-01 2.220446e-16 9.953960e-01 9.964907e-01 9.968174e-01
##  [6] 9.551973e-01 4.027341e-02 9.999700e-01 9.922248e-01 9.834899e-01
## [11] 9.996953e-01 9.825380e-01 9.988783e-01 9.865285e-01 9.999840e-01
## [16] 2.220446e-16 1.000000e+00 3.604849e-04 1.000000e+00 9.999999e-01
## [21] 9.977208e-01 9.998582e-01 8.378363e-01 9.975452e-01 9.992855e-01
## [26] 9.998917e-01 9.587517e-01 2.423231e-01 9.999993e-01 9.469397e-01
## [31] 9.802757e-01 9.907047e-01 9.997321e-01 9.948195e-01 9.992785e-01
## [36] 9.951717e-01 7.714751e-01 1.000000e+00 8.021206e-01 9.998516e-01
## [41] 1.657270e-01 9.999849e-01 7.872184e-01 9.491468e-01 1.918033e-01
## [46] 9.998999e-01 9.998863e-01 9.873727e-01 9.638155e-01 9.792227e-01
## [51] 9.996703e-01 9.587209e-01 9.573729e-01 9.977963e-01 9.926356e-01
## [56] 9.943267e-01 9.965034e-01 9.999852e-01 9.802942e-01 9.964051e-01
## 混淆矩阵
table(logidata$Y,logidata$preYY)
##    
##      0  1
##   0  7  1
##   1  0 52
## 5.模型的检验
## (1)画TPR\FPR图;
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
pred <-prediction(logidata$preY,logidata$Y)
tpr <- pred@tp[[1]]/max(pred@tp[[1]])
fpr <- pred@fp[[1]]/max(pred@fp[[1]])
plot(fpr, tpr, type = "l")

## 该图和下面的图一样

## (2)计算AUC的面积,方法1
g <- roc(Y ~ preY, data = logidata,direction="<")
plot(g,col="yellow", lwd=3, main="The turtle finds its way")    

## 
## Call:
## roc.formula(formula = Y ~ preY, data = logidata, direction = "<")
## 
## Data: preY in 8 controls (Y 0) < 52 cases (Y 1).
## Area under the curve: 0.9615
## 方法2
rocplot(mod2)

rocplot(mod2) +
  ggtitle("模型的ROC曲线")+
  theme(plot.title = element_text(hjust = 0.5))