options(scipen=0, digits=7)
# Libraries
library(data.table) # Data Table library
library (plyr)
library(psych)
library(coda)
library(base) # Date-time conversion
library(ggplot2) # Plotting Library
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(moments) # Measures of distribution
library(ggcorrplot) # Correlation plotting
library(mice) # Missing values
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(devtools)
## Loading required package: usethis
library(superml) # Label encoding
## Loading required package: R6
library(R6)
library(stats) # PCA
library(ggbiplot) # PCA plot
## Loading required package: scales
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
## Loading required package: grid
library(factoextra, quietly = TRUE) # PCA visualization
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot) # Correlation plot
## corrplot 0.92 loaded
library(clustvarsel)
## Loading required package: mclust
## Package 'mclust' version 5.4.10
## Type 'citation("mclust")' for citing this R package in publications.
##
## Attaching package: 'mclust'
## The following object is masked from 'package:psych':
##
## sim
## Package 'clustvarsel' version 2.3.4
## Type 'citation("clustvarsel")' for citing this R package in publications.
library(mclust, quiet = T)
library(FSelectorRcpp) # Feature ranking
library(caret)
## Loading required package: lattice
library(lattice)
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
library(viridisLite)
library(hrbrthemes, quietly = T)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(FSelectorRcpp)
Computing PCA
# Loading data set
data <- read.csv('data.csv')
X <- data[,c(-1)]
y <- data[,c(1)]
head(X)
## ROA.C..before.interest.and.depreciation.before.interest
## 1 0.3705943
## 2 0.4642909
## 3 0.4260713
## 4 0.3998440
## 5 0.4650222
## 6 0.3886803
## ROA.A..before.interest.and...after.tax
## 1 0.4243894
## 2 0.5382141
## 3 0.4990188
## 4 0.4512647
## 5 0.5384322
## 6 0.4151766
## ROA.B..before.interest.and.depreciation.after.tax Operating.Gross.Margin
## 1 0.4057498 0.6014572
## 2 0.5167300 0.6102351
## 3 0.4722951 0.6014500
## 4 0.4577333 0.5835411
## 5 0.5222978 0.5987835
## 6 0.4191338 0.5901714
## Realized.Sales.Gross.Margin Operating.Profit.Rate Pre.tax.net.Interest.Rate
## 1 0.6014572 0.9989692 0.7968871
## 2 0.6102351 0.9989460 0.7973802
## 3 0.6013635 0.9988574 0.7964034
## 4 0.5835411 0.9986997 0.7969670
## 5 0.5987835 0.9989731 0.7973661
## 6 0.5902507 0.9987581 0.7969032
## After.tax.net.Interest.Rate Non.industry.income.and.expenditure.revenue
## 1 0.8088094 0.3026464
## 2 0.8093007 0.3035564
## 3 0.8083875 0.3020352
## 4 0.8089656 0.3033495
## 5 0.8093037 0.3034750
## 6 0.8087706 0.3031158
## Continuous.interest.rate..after.tax. Operating.Expense.Rate
## 1 0.7809849 1.256969e-04
## 2 0.7815060 2.897851e-04
## 3 0.7802839 2.361297e-04
## 4 0.7812410 1.078888e-04
## 5 0.7815500 7.890000e+09
## 6 0.7810691 1.571500e-04
## Research.and.development.expense.rate Cash.flow.rate
## 1 0 0.4581431
## 2 0 0.4618673
## 3 25500000 0.4585206
## 4 0 0.4657054
## 5 0 0.4627463
## 6 0 0.4658615
## Interest.bearing.debt.interest.rate Tax.rate..A. Net.Value.Per.Share..B.
## 1 0.0007250725 0 0.1479499
## 2 0.0006470647 0 0.1822511
## 3 0.0007900790 0 0.1779107
## 4 0.0004490449 0 0.1541865
## 5 0.0006860686 0 0.1675024
## 6 0.0007160716 0 0.1555771
## Net.Value.Per.Share..A. Net.Value.Per.Share..C.
## 1 0.1479499 0.1479499
## 2 0.1822511 0.1822511
## 3 0.1779107 0.1937129
## 4 0.1541865 0.1541865
## 5 0.1675024 0.1675024
## 6 0.1555771 0.1555771
## Persistent.EPS.in.the.Last.Four.Seasons Cash.Flow.Per.Share
## 1 0.1691406 0.3116644
## 2 0.2089439 0.3181368
## 3 0.1805805 0.3071019
## 4 0.1937222 0.3216736
## 5 0.2125366 0.3191625
## 6 0.1744351 0.3253873
## Revenue.Per.Share..Yuan... Operating.Profit.Per.Share..Yuan...
## 1 0.017559780 0.09592053
## 2 0.021144335 0.09372201
## 3 0.005944008 0.09233776
## 4 0.014368468 0.07776240
## 5 0.029689792 0.09689765
## 6 0.018104270 0.07808810
## Per.Share.Net.profit.before.tax..Yuan...
## 1 0.1387362
## 2 0.1699179
## 3 0.1428033
## 4 0.1486028
## 5 0.1684115
## 6 0.1388115
## Realized.Sales.Gross.Profit.Growth.Rate Operating.Profit.Growth.Rate
## 1 0.02210228 0.8481950
## 2 0.02208017 0.8480879
## 3 0.02276010 0.8480940
## 4 0.02204607 0.8480055
## 5 0.02209591 0.8482582
## 6 0.02156494 0.8479828
## After.tax.Net.Profit.Growth.Rate Regular.Net.Profit.Growth.Rate
## 1 0.6889795 0.6889795
## 2 0.6896929 0.6897017
## 3 0.6894627 0.6894697
## 4 0.6891095 0.6891095
## 5 0.6896969 0.6896969
## 6 0.6891051 0.6891775
## Continuous.Net.Profit.Growth.Rate Total.Asset.Growth.Rate
## 1 0.2175354 4.98e+09
## 2 0.2176196 6.11e+09
## 3 0.2176013 7.28e+09
## 4 0.2175682 4.88e+09
## 5 0.2176256 5.51e+09
## 6 0.2175664 6.08e+08
## Net.Value.Growth.Rate Total.Asset.Return.Growth.Rate.Ratio
## 1 0.0003269773 0.2631000
## 2 0.0004430401 0.2645158
## 3 0.0003964253 0.2641840
## 4 0.0003824259 0.2633712
## 5 0.0004389476 0.2652182
## 6 0.0003517819 0.2632500
## Cash.Reinvestment.. Current.Ratio Quick.Ratio Interest.Expense.Ratio
## 1 0.3637253 0.002258963 0.0012077551 0.6299513
## 2 0.3767091 0.006016206 0.0040393668 0.6351725
## 3 0.3689132 0.011542554 0.0053475602 0.6296314
## 4 0.3840766 0.004194059 0.0028964911 0.6302284
## 5 0.3796897 0.006022446 0.0037274466 0.6360550
## 6 0.3880258 0.002740085 0.0008546614 0.6301838
## Total.debt.Total.net.worth Debt.ratio.. Net.worth.Assets
## 1 0.021265924 0.2075763 0.7924237
## 2 0.012502394 0.1711763 0.8288237
## 3 0.021247686 0.2075158 0.7924842
## 4 0.009572402 0.1514648 0.8485352
## 5 0.005149600 0.1065091 0.8934909
## 6 0.014213152 0.1804275 0.8195725
## Long.term.fund.suitability.ratio..A. Borrowing.dependency
## 1 0.005024455 0.3902844
## 2 0.005058882 0.3767600
## 3 0.005099899 0.3790929
## 4 0.005046924 0.3797427
## 5 0.005303319 0.3750254
## 6 0.004913193 0.3814482
## Contingent.liabilities.Net.worth Operating.profit.Paid.in.capital
## 1 0.006478502 0.09588483
## 2 0.005835039 0.09374338
## 3 0.006561982 0.09231847
## 4 0.005365848 0.07772729
## 5 0.006623525 0.09692706
## 6 0.005749123 0.07810185
## Net.profit.before.tax.Paid.in.capital
## 1 0.1377573
## 2 0.1689616
## 3 0.1480356
## 4 0.1475605
## 5 0.1674610
## 6 0.1378252
## Inventory.and.accounts.receivable.Net.value Total.Asset.Turnover
## 1 0.3980357 0.08695652
## 2 0.3977249 0.06446777
## 3 0.4065805 0.01499250
## 4 0.3979245 0.08995502
## 5 0.4000788 0.17541229
## 6 0.4004191 0.09595202
## Accounts.Receivable.Turnover Average.Collection.Days
## 1 0.001813884 0.003487364
## 2 0.001286356 0.004916808
## 3 0.001495338 0.004226849
## 4 0.001966056 0.003214967
## 5 0.001448673 0.004366891
## 6 0.001527802 0.004137189
## Inventory.Turnover.Rate..times. Fixed.Assets.Turnover.Frequency
## 1 1.820926e-04 1.165007e-04
## 2 9.360000e+09 7.190000e+08
## 3 6.500000e+07 2.650000e+09
## 4 7.130000e+09 9.150000e+09
## 5 1.633674e-04 2.935211e-04
## 6 6.500000e+08 9.300000e+09
## Net.Worth.Turnover.Rate..times. Revenue.per.person
## 1 0.03290323 0.034164182
## 2 0.02548387 0.006888651
## 3 0.01338710 0.028996960
## 4 0.02806452 0.015463478
## 5 0.04016129 0.058111423
## 6 0.02967742 0.021300471
## Operating.profit.per.person Allocation.rate.per.person
## 1 0.3929129 0.03713530
## 2 0.3915900 0.01233497
## 3 0.3819678 0.14101631
## 4 0.3784966 0.02131999
## 5 0.3943715 0.02398821
## 6 0.3775695 0.03282877
## Working.Capital.to.Total.Assets Quick.Assets.Total.Assets
## 1 0.6727753 0.16667296
## 2 0.7511109 0.12723600
## 3 0.8295019 0.34020088
## 4 0.7257542 0.16157453
## 5 0.7518225 0.26032988
## 6 0.6867286 0.08026371
## Current.Assets.Total.Assets Cash.Total.Assets Quick.Assets.Current.Liability
## 1 0.1906430 0.0040944060 0.001996771
## 2 0.1824191 0.0149477270 0.004136030
## 3 0.6028057 0.0009909445 0.006302481
## 4 0.2258149 0.0188506248 0.002961238
## 5 0.3583802 0.0141609738 0.004274771
## 6 0.2145360 0.0026452256 0.000988425
## Cash.Current.Liability Current.Liability.to.Assets
## 1 1.473360e-04 0.14730845
## 2 1.383910e-03 0.05696283
## 3 5.340000e+09 0.09816206
## 4 1.010646e-03 0.09871463
## 5 6.804636e-04 0.11019485
## 6 1.008563e-04 0.13900211
## Operating.Funds.to.Liability Inventory.Working.Capital
## 1 0.3340152 0.2769202
## 2 0.3411060 0.2896416
## 3 0.3367315 0.2774555
## 4 0.3487164 0.2765803
## 5 0.3446388 0.2879127
## 6 0.3505631 0.2766785
## Inventory.Current.Liability Current.Liabilities.Liability
## 1 0.001035990 0.6762692
## 2 0.005209682 0.3085886
## 3 0.013878786 0.4460275
## 4 0.003540148 0.6158484
## 5 0.004868570 0.9750066
## 6 0.004879131 0.7333519
## Working.Capital.Equity Current.Liabilities.Equity
## 1 0.7212746 0.3390770
## 2 0.7319753 0.3297401
## 3 0.7427286 0.3347769
## 4 0.7298249 0.3315090
## 5 0.7319996 0.3307263
## 6 0.7252016 0.3355344
## Long.term.Liability.to.Current.Assets Retained.Earnings.to.Total.Assets
## 1 0.025592368 0.9032248
## 2 0.023946819 0.9310652
## 3 0.003715116 0.9099034
## 4 0.022165200 0.9069022
## 5 0.000000000 0.9138502
## 6 0.003772505 0.9030413
## Total.income.Total.expense Total.expense.Assets Current.Asset.Turnover.Rate
## 1 0.002021613 0.06485571 7.010000e+08
## 2 0.002225608 0.02551586 1.065198e-04
## 3 0.002060071 0.02138743 1.791094e-03
## 4 0.001831359 0.02416107 8.140000e+09
## 5 0.002223930 0.02638525 6.680000e+09
## 6 0.001865609 0.04009362 8.010000e+09
## Quick.Asset.Turnover.Rate Working.capitcal.Turnover.Rate Cash.Turnover.Rate
## 1 6.550000e+09 0.5938305 4.58e+08
## 2 7.700000e+09 0.5939155 2.49e+09
## 3 1.022676e-03 0.5945019 7.61e+08
## 4 6.050000e+09 0.5938888 2.03e+09
## 5 5.050000e+09 0.5939153 8.24e+08
## 6 2.810000e+09 0.5938458 2.95e+08
## Cash.Flow.to.Sales Fixed.Assets.to.Assets Current.Liability.to.Liability
## 1 0.6715677 0.4242058 0.6762692
## 2 0.6715699 0.4688281 0.3085886
## 3 0.6715713 0.2761792 0.4460275
## 4 0.6715192 0.5591440 0.6158484
## 5 0.6715631 0.3095549 0.9750066
## 6 0.6715676 0.6031935 0.7333519
## Current.Liability.to.Equity Equity.to.Long.term.Liability
## 1 0.3390770 0.1265495
## 2 0.3297401 0.1209161
## 3 0.3347769 0.1179223
## 4 0.3315090 0.1207605
## 5 0.3307263 0.1109332
## 6 0.3355344 0.1129172
## Cash.Flow.to.Total.Assets Cash.Flow.to.Liability CFO.to.Assets
## 1 0.6375554 0.4586091 0.5203819
## 2 0.6411000 0.4590011 0.5671013
## 3 0.6427646 0.4592540 0.5384905
## 4 0.5790393 0.4485179 0.6041051
## 5 0.6223741 0.4544109 0.5784689
## 6 0.6374698 0.4584993 0.6221901
## Cash.Flow.to.Equity Current.Liability.to.Current.Assets Liability.Assets.Flag
## 1 0.3129049 0.11825048 0
## 2 0.3141631 0.04777528 0
## 3 0.3145154 0.02534649 0
## 4 0.3023823 0.06724962 0
## 5 0.3115672 0.04772537 0
## 6 0.3132685 0.09952193 0
## Net.Income.to.Total.Assets Total.assets.to.GNP.price No.credit.Interval
## 1 0.7168453 0.009219440 0.6228790
## 2 0.7952971 0.008323302 0.6236517
## 3 0.7746697 0.040002853 0.6238410
## 4 0.7395545 0.003252475 0.6229287
## 5 0.7950159 0.003877563 0.6235207
## 6 0.7104205 0.005277875 0.6226046
## Gross.Profit.to.Sales Net.Income.to.Stockholder.s.Equity Liability.to.Equity
## 1 0.6014533 0.8278902 0.2902019
## 2 0.6102365 0.8399693 0.2838460
## 3 0.6014493 0.8367743 0.2901885
## 4 0.5835376 0.8346971 0.2817212
## 5 0.5987815 0.8399727 0.2785138
## 6 0.5901723 0.8299390 0.2850871
## Degree.of.Financial.Leverage..DFL.
## 1 0.02660063
## 2 0.26457682
## 3 0.02655472
## 4 0.02669663
## 5 0.02475185
## 6 0.02667537
## Interest.Coverage.Ratio..Interest.expense.to.EBIT. Net.Income.Flag
## 1 0.5640501 1
## 2 0.5701749 1
## 3 0.5637061 1
## 4 0.5646634 1
## 5 0.5756166 1
## 6 0.5645383 1
## Equity.to.Liability
## 1 0.01646874
## 2 0.02079431
## 3 0.01647411
## 4 0.02398233
## 5 0.03549020
## 6 0.01953448
length(unique(X$ROA.C..before.interest.and.depreciation.before.interest))
## [1] 3333
# Identifying columns that might have unit variance
for (col in colnames(X)){
if (length(unique(X[,col])) <30){
print(col)
}
}
## [1] "Liability.Assets.Flag"
## [1] "Net.Income.Flag"
# Unique values
unique(X$Liability.Assets.Flag)
## [1] 0 1
# Unique values
unique(X$Net.Income.Flag)
## [1] 1
# This column will be dropped
col <- which(colnames(X) == 'Net.Income.Flag')
X.pca <- X[,-col]
# PCA
pca <- prcomp(X.pca, scale = T)
Scree Plot
Visualizing the Eigen values.
# Scree Plot
fviz_eig(pca, addlabels = TRUE, ylim = c(0,40))
# PCA summary
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.5421 2.60402 2.17589 2.10737 1.99515 1.7398 1.69302
## Proportion of Variance 0.1335 0.07214 0.05037 0.04724 0.04235 0.0322 0.03049
## Cumulative Proportion 0.1335 0.20561 0.25597 0.30322 0.34557 0.3778 0.40826
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 1.66079 1.62288 1.42376 1.39126 1.36585 1.29676 1.22658
## Proportion of Variance 0.02934 0.02802 0.02156 0.02059 0.01985 0.01789 0.01601
## Cumulative Proportion 0.43760 0.46562 0.48719 0.50778 0.52763 0.54551 0.56152
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 1.21788 1.20052 1.19314 1.14600 1.11174 1.09444 1.08316
## Proportion of Variance 0.01578 0.01533 0.01514 0.01397 0.01315 0.01274 0.01248
## Cumulative Proportion 0.57730 0.59263 0.60778 0.62175 0.63490 0.64764 0.66012
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## Standard deviation 1.06979 1.04464 1.0258 1.0216 1.01646 1.00550 1.00431
## Proportion of Variance 0.01218 0.01161 0.0112 0.0111 0.01099 0.01076 0.01073
## Cumulative Proportion 0.67229 0.68390 0.6951 0.7062 0.71719 0.72795 0.73868
## PC29 PC30 PC31 PC32 PC33 PC34 PC35
## Standard deviation 1.00349 0.99258 0.98787 0.98249 0.97868 0.97609 0.96895
## Proportion of Variance 0.01071 0.01048 0.01038 0.01027 0.01019 0.01014 0.00999
## Cumulative Proportion 0.74939 0.75987 0.77026 0.78052 0.79071 0.80085 0.81084
## PC36 PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 0.95926 0.95447 0.95246 0.93801 0.92408 0.9200 0.91108
## Proportion of Variance 0.00979 0.00969 0.00965 0.00936 0.00908 0.0090 0.00883
## Cumulative Proportion 0.82063 0.83032 0.83997 0.84933 0.85841 0.8674 0.87625
## PC43 PC44 PC45 PC46 PC47 PC48 PC49
## Standard deviation 0.89548 0.89160 0.86972 0.84674 0.8282 0.81158 0.77470
## Proportion of Variance 0.00853 0.00846 0.00805 0.00763 0.0073 0.00701 0.00638
## Cumulative Proportion 0.88478 0.89323 0.90128 0.90891 0.9162 0.92321 0.92960
## PC50 PC51 PC52 PC53 PC54 PC55 PC56
## Standard deviation 0.75457 0.73706 0.73394 0.70858 0.68317 0.6504 0.64172
## Proportion of Variance 0.00606 0.00578 0.00573 0.00534 0.00497 0.0045 0.00438
## Cumulative Proportion 0.93565 0.94143 0.94716 0.95251 0.95747 0.9620 0.96635
## PC57 PC58 PC59 PC60 PC61 PC62 PC63
## Standard deviation 0.62196 0.57071 0.56909 0.50048 0.49503 0.4647 0.46311
## Proportion of Variance 0.00412 0.00347 0.00345 0.00266 0.00261 0.0023 0.00228
## Cumulative Proportion 0.97047 0.97393 0.97738 0.98004 0.98265 0.9849 0.98723
## PC64 PC65 PC66 PC67 PC68 PC69 PC70
## Standard deviation 0.4111 0.40695 0.39339 0.3749 0.3353 0.2912 0.28669
## Proportion of Variance 0.0018 0.00176 0.00165 0.0015 0.0012 0.0009 0.00087
## Cumulative Proportion 0.9890 0.99079 0.99243 0.9939 0.9951 0.9960 0.99690
## PC71 PC72 PC73 PC74 PC75 PC76 PC77
## Standard deviation 0.25234 0.23169 0.1938 0.17058 0.15664 0.14825 0.13899
## Proportion of Variance 0.00068 0.00057 0.0004 0.00031 0.00026 0.00023 0.00021
## Cumulative Proportion 0.99758 0.99815 0.9986 0.99886 0.99912 0.99935 0.99956
## PC78 PC79 PC80 PC81 PC82 PC83 PC84
## Standard deviation 0.12361 0.09752 0.07504 0.06138 0.05153 0.04017 0.03030
## Proportion of Variance 0.00016 0.00010 0.00006 0.00004 0.00003 0.00002 0.00001
## Cumulative Proportion 0.99972 0.99982 0.99988 0.99992 0.99995 0.99997 0.99998
## PC85 PC86 PC87 PC88 PC89 PC90
## Standard deviation 0.02794 0.02541 0.01942 0.01219 8.611e-05 3.248e-06
## Proportion of Variance 0.00001 0.00001 0.00000 0.00000 0.000e+00 0.000e+00
## Cumulative Proportion 0.99999 0.99999 1.00000 1.00000 1.000e+00 1.000e+00
## PC91 PC92 PC93 PC94
## Standard deviation 1.413e-09 3.356e-15 5.056e-16 2.034e-16
## Proportion of Variance 0.000e+00 0.000e+00 0.000e+00 0.000e+00
## Cumulative Proportion 1.000e+00 1.000e+00 1.000e+00 1.000e+00
Quality of Representation
# Result of variables
var.result <- get_pca_var(pca)
var.result
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
# Cos2
var <- as.data.frame(var.result$cos2[,c('Dim.1', 'Dim.2')])
dim1 <- var[order(var$Dim.1, decreasing = TRUE), ]
dim2 <- var[order(var$Dim.2, decreasing = TRUE), ]
# Dim 1
head(dim1, 10)
## Dim.1 Dim.2
## ROA.A..before.interest.and...after.tax 0.7214872 0.04647017
## Persistent.EPS.in.the.Last.Four.Seasons 0.7195261 0.09527141
## ROA.C..before.interest.and.depreciation.before.interest 0.7154907 0.04024190
## ROA.B..before.interest.and.depreciation.after.tax 0.7043138 0.03665850
## Net.profit.before.tax.Paid.in.capital 0.6878911 0.10022179
## Per.Share.Net.profit.before.tax..Yuan... 0.6804454 0.09984436
## Net.Income.to.Total.Assets 0.6646532 0.02722216
## Operating.Profit.Per.Share..Yuan... 0.5971660 0.12132431
## Operating.profit.Paid.in.capital 0.5930495 0.12146067
## Net.Value.Per.Share..A. 0.4972046 0.03921236
# Dim 2
head(dim2, 10)
## Dim.1 Dim.2
## Current.Liabilities.Equity 0.121223856 0.6917356
## Current.Liability.to.Equity 0.121223856 0.6917356
## Liability.to.Equity 0.137364584 0.6831411
## Borrowing.dependency 0.150587158 0.5681420
## Inventory.and.accounts.receivable.Net.value 0.061407304 0.4691494
## Equity.to.Long.term.Liability 0.087700954 0.3257798
## Net.Income.to.Stockholder.s.Equity 0.165007217 0.2910922
## Current.Liability.to.Assets 0.115590316 0.2638680
## Net.Worth.Turnover.Rate..times. 0.002990991 0.2544098
## Contingent.liabilities.Net.worth 0.031976293 0.2347898
# Total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(pca, choice = "var", axes = 1:2)
# Cos2
var <- as.data.frame(var.result$contrib[,c('Dim.1', 'Dim.2')])
head(var[order(var$Dim.1, decreasing = TRUE), ], 10)
## Dim.1 Dim.2
## ROA.A..before.interest.and...after.tax 5.750662 0.6853069
## Persistent.EPS.in.the.Last.Four.Seasons 5.735032 1.4049907
## ROA.C..before.interest.and.depreciation.before.interest 5.702867 0.5934571
## ROA.B..before.interest.and.depreciation.after.tax 5.613781 0.5406118
## Net.profit.before.tax.Paid.in.capital 5.482883 1.4779951
## Per.Share.Net.profit.before.tax..Yuan... 5.423536 1.4724292
## Net.Income.to.Total.Assets 5.297663 0.4014518
## Operating.Profit.Per.Share..Yuan... 4.759752 1.7891992
## Operating.profit.Paid.in.capital 4.726941 1.7912101
## Net.Value.Per.Share..A. 3.963003 0.5782743
Contribution to the PCs
# Contributions of variables to PC1
fviz_contrib(pca, choice = "var", axes = 1)
# Contributions of variables to PC2
fviz_contrib(pca, choice = "var", axes = 2)
Quality of Representation
# Variable plot
fviz_pca_var(pca, repel = T,
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
## Warning: ggrepel: 79 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Variable plot
fviz_pca_var(pca, repel = T,
col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
## Warning: ggrepel: 79 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# FSelectorRcpp - Information gain
# Selecting the entire data set
imp <- information_gain(Bankrupt. ~ . , data = data)
imp <- imp[order(imp$importance, decreasing = T), ]
head(imp, 10)
## attributes importance
## 90 Net.Income.to.Stockholder.s.Equity 0.04126541
## 19 Persistent.EPS.in.the.Last.Four.Seasons 0.03974476
## 43 Net.profit.before.tax.Paid.in.capital 0.03914224
## 40 Borrowing.dependency 0.03761592
## 30 Net.Value.Growth.Rate 0.03674042
## 23 Per.Share.Net.profit.before.tax..Yuan... 0.03651945
## 86 Net.Income.to.Total.Assets 0.03631065
## 68 Retained.Earnings.to.Total.Assets 0.03421456
## 35 Interest.Expense.Ratio 0.03395595
## 10 Continuous.interest.rate..after.tax. 0.03326532