#Reading the data
cr<-read.csv(paste("german_credit_data (2).csv",sep=""))
View(cr)
#Summarising the data
attach(cr)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(cr)
## vars n mean sd median trimmed mad min
## X 1 1000 499.50 288.82 499.5 499.50 370.65 0
## Age 2 1000 35.55 11.38 33.0 34.17 10.38 19
## Sex* 3 1000 1.69 0.46 2.0 1.74 0.00 1
## Job 4 1000 1.90 0.65 2.0 1.91 0.00 0
## Housing* 5 1000 2.07 0.53 2.0 2.09 0.00 1
## Saving.accounts* 6 817 1.46 0.87 1.0 1.25 0.00 1
## Checking.account* 7 606 1.65 0.66 2.0 1.57 1.48 1
## Credit.amount 8 1000 3271.26 2822.74 2319.5 2754.57 1627.15 250
## Duration 9 1000 20.90 12.06 18.0 19.47 8.90 4
## Purpose* 10 1000 3.88 1.98 4.0 3.91 2.97 1
## Risk* 11 1000 1.70 0.46 2.0 1.75 0.00 1
## max range skew kurtosis se
## X 999 999 0.00 -1.20 9.13
## Age 75 56 1.02 0.58 0.36
## Sex* 2 1 -0.82 -1.33 0.01
## Job 3 3 -0.37 0.49 0.02
## Housing* 3 2 0.07 0.46 0.02
## Saving.accounts* 4 3 1.83 2.15 0.03
## Checking.account* 3 2 0.52 -0.73 0.03
## Credit.amount 18424 18174 1.94 4.25 89.26
## Duration 72 68 1.09 0.90 0.38
## Purpose* 8 7 -0.01 -1.54 0.06
## Risk* 2 1 -0.87 -1.24 0.01
#Data types
str(cr)
## 'data.frame': 1000 obs. of 11 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
## $ Job : int 2 2 1 2 2 1 2 3 1 3 ...
## $ Housing : Factor w/ 3 levels "free","own","rent": 2 2 2 1 1 1 2 3 2 2 ...
## $ Saving.accounts : Factor w/ 4 levels "little","moderate",..: NA 1 1 1 1 NA 3 1 4 1 ...
## $ Checking.account: Factor w/ 3 levels "little","moderate",..: 1 2 NA 1 1 NA NA 2 NA 2 ...
## $ Credit.amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ Duration : int 6 48 12 42 24 36 24 36 12 30 ...
## $ Purpose : Factor w/ 8 levels "business","car",..: 6 6 4 5 2 4 5 2 6 2 ...
## $ Risk : Factor w/ 2 levels "bad","good": 2 1 2 2 1 2 2 2 2 1 ...
The categorical variables in my dataset are as follows: 1. Sex (male, female) 2. Job (0-unskilled and non-resident, 1-unskilled and resident, 2-skilled, 3-highly skilled) 3. Housing (own, rent or free) 4. Savings accounts (little, moderate, quite rich and rich) 5. Purpose (car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others) 6. Risk (value target-Good or Bad Risk)
#One way contingency tables
mytable1<-with(cr,table(Sex))
mytable1
## Sex
## female male
## 310 690
prop.table(mytable1)*100
## Sex
## female male
## 31 69
mytable2<-with(cr,table(Job))
mytable2
## Job
## 0 1 2 3
## 22 200 630 148
prop.table(mytable2)*100
## Job
## 0 1 2 3
## 2.2 20.0 63.0 14.8
mytable3<-with(cr,table(Housing))
mytable3
## Housing
## free own rent
## 108 713 179
prop.table(mytable3)*100
## Housing
## free own rent
## 10.8 71.3 17.9
mytable4<-with(cr,table(Saving.accounts))
mytable4
## Saving.accounts
## little moderate quite rich rich
## 603 103 63 48
prop.table(mytable4)*100
## Saving.accounts
## little moderate quite rich rich
## 73.806610 12.607099 7.711138 5.875153
mytable5<-with(cr,table(Purpose))
mytable5
## Purpose
## business car domestic appliances
## 97 337 12
## education furniture/equipment radio/TV
## 59 181 280
## repairs vacation/others
## 22 12
prop.table(mytable5)*100
## Purpose
## business car domestic appliances
## 9.7 33.7 1.2
## education furniture/equipment radio/TV
## 5.9 18.1 28.0
## repairs vacation/others
## 2.2 1.2
mytable6<-with(cr,table(Risk))
mytable6
## Risk
## bad good
## 300 700
prop.table(mytable6)*100
## Risk
## bad good
## 30 70
mytable7<-with(cr,table(Checking.account))
mytable7
## Checking.account
## little moderate rich
## 274 269 63
prop.table(mytable7)*100
## Checking.account
## little moderate rich
## 45.21452 44.38944 10.39604
#Two-way Contingency tables
mytable11<-xtabs(~Sex+Job,data=cr)
mytable11
## Job
## Sex 0 1 2 3
## female 12 64 197 37
## male 10 136 433 111
margin.table(mytable11,1)
## Sex
## female male
## 310 690
prop.table(mytable11,1)
## Job
## Sex 0 1 2 3
## female 0.03870968 0.20645161 0.63548387 0.11935484
## male 0.01449275 0.19710145 0.62753623 0.16086957
margin.table(mytable11,2)
## Job
## 0 1 2 3
## 22 200 630 148
prop.table(mytable11,2)
## Job
## Sex 0 1 2 3
## female 0.5454545 0.3200000 0.3126984 0.2500000
## male 0.4545455 0.6800000 0.6873016 0.7500000
addmargins(prop.table(mytable11))
## Job
## Sex 0 1 2 3 Sum
## female 0.012 0.064 0.197 0.037 0.310
## male 0.010 0.136 0.433 0.111 0.690
## Sum 0.022 0.200 0.630 0.148 1.000
mytable22<-xtabs(~Sex+Housing,data=cr)
mytable22
## Housing
## Sex free own rent
## female 19 196 95
## male 89 517 84
margin.table(mytable22,1)
## Sex
## female male
## 310 690
prop.table(mytable22,1)
## Housing
## Sex free own rent
## female 0.06129032 0.63225806 0.30645161
## male 0.12898551 0.74927536 0.12173913
margin.table(mytable22,2)
## Housing
## free own rent
## 108 713 179
prop.table(mytable22,2)
## Housing
## Sex free own rent
## female 0.1759259 0.2748948 0.5307263
## male 0.8240741 0.7251052 0.4692737
addmargins(prop.table(mytable22))
## Housing
## Sex free own rent Sum
## female 0.019 0.196 0.095 0.310
## male 0.089 0.517 0.084 0.690
## Sum 0.108 0.713 0.179 1.000
mytable33<-xtabs(~Sex+Saving.accounts,data=cr)
mytable33
## Saving.accounts
## Sex little moderate quite rich rich
## female 194 32 16 19
## male 409 71 47 29
margin.table(mytable33,1)
## Sex
## female male
## 261 556
prop.table(mytable33,1)
## Saving.accounts
## Sex little moderate quite rich rich
## female 0.74329502 0.12260536 0.06130268 0.07279693
## male 0.73561151 0.12769784 0.08453237 0.05215827
margin.table(mytable33,2)
## Saving.accounts
## little moderate quite rich rich
## 603 103 63 48
prop.table(mytable33,2)
## Saving.accounts
## Sex little moderate quite rich rich
## female 0.3217247 0.3106796 0.2539683 0.3958333
## male 0.6782753 0.6893204 0.7460317 0.6041667
addmargins(prop.table(mytable33))
## Saving.accounts
## Sex little moderate quite rich rich Sum
## female 0.23745410 0.03916769 0.01958384 0.02325581 0.31946144
## male 0.50061200 0.08690330 0.05752754 0.03549572 0.68053856
## Sum 0.73806610 0.12607099 0.07711138 0.05875153 1.00000000
mytable44<-xtabs(~Sex+Purpose,data=cr)
mytable44
## Purpose
## Sex business car domestic appliances education furniture/equipment
## female 19 94 6 24 74
## male 78 243 6 35 107
## Purpose
## Sex radio/TV repairs vacation/others
## female 85 5 3
## male 195 17 9
margin.table(mytable44,1)
## Sex
## female male
## 310 690
prop.table(mytable44,1)
## Purpose
## Sex business car domestic appliances education
## female 0.061290323 0.303225806 0.019354839 0.077419355
## male 0.113043478 0.352173913 0.008695652 0.050724638
## Purpose
## Sex furniture/equipment radio/TV repairs vacation/others
## female 0.238709677 0.274193548 0.016129032 0.009677419
## male 0.155072464 0.282608696 0.024637681 0.013043478
margin.table(mytable44,2)
## Purpose
## business car domestic appliances
## 97 337 12
## education furniture/equipment radio/TV
## 59 181 280
## repairs vacation/others
## 22 12
prop.table(mytable44,2)
## Purpose
## Sex business car domestic appliances education
## female 0.1958763 0.2789318 0.5000000 0.4067797
## male 0.8041237 0.7210682 0.5000000 0.5932203
## Purpose
## Sex furniture/equipment radio/TV repairs vacation/others
## female 0.4088398 0.3035714 0.2272727 0.2500000
## male 0.5911602 0.6964286 0.7727273 0.7500000
addmargins(prop.table(mytable44))
## Purpose
## Sex business car domestic appliances education furniture/equipment
## female 0.019 0.094 0.006 0.024 0.074
## male 0.078 0.243 0.006 0.035 0.107
## Sum 0.097 0.337 0.012 0.059 0.181
## Purpose
## Sex radio/TV repairs vacation/others Sum
## female 0.085 0.005 0.003 0.310
## male 0.195 0.017 0.009 0.690
## Sum 0.280 0.022 0.012 1.000
mytable444<-xtabs(~Sex+Risk,data=cr)
mytable444
## Risk
## Sex bad good
## female 109 201
## male 191 499
margin.table(mytable444,1)
## Sex
## female male
## 310 690
prop.table(mytable444,1)
## Risk
## Sex bad good
## female 0.3516129 0.6483871
## male 0.2768116 0.7231884
margin.table(mytable444,2)
## Risk
## bad good
## 300 700
prop.table(mytable444,2)
## Risk
## Sex bad good
## female 0.3633333 0.2871429
## male 0.6366667 0.7128571
addmargins(prop.table(mytable444))
## Risk
## Sex bad good Sum
## female 0.109 0.201 0.310
## male 0.191 0.499 0.690
## Sum 0.300 0.700 1.000
mytable55<-xtabs(~Job+Housing,data=cr)
mytable55
## Housing
## Job free own rent
## 0 4 13 5
## 1 8 154 38
## 2 63 452 115
## 3 33 94 21
margin.table(mytable55,1)
## Job
## 0 1 2 3
## 22 200 630 148
prop.table(mytable55,1)
## Housing
## Job free own rent
## 0 0.1818182 0.5909091 0.2272727
## 1 0.0400000 0.7700000 0.1900000
## 2 0.1000000 0.7174603 0.1825397
## 3 0.2229730 0.6351351 0.1418919
margin.table(mytable55,2)
## Housing
## free own rent
## 108 713 179
prop.table(mytable55,2)
## Housing
## Job free own rent
## 0 0.03703704 0.01823282 0.02793296
## 1 0.07407407 0.21598878 0.21229050
## 2 0.58333333 0.63394109 0.64245810
## 3 0.30555556 0.13183731 0.11731844
addmargins(prop.table(mytable55))
## Housing
## Job free own rent Sum
## 0 0.004 0.013 0.005 0.022
## 1 0.008 0.154 0.038 0.200
## 2 0.063 0.452 0.115 0.630
## 3 0.033 0.094 0.021 0.148
## Sum 0.108 0.713 0.179 1.000
mytable66<-xtabs(~Job+Saving.accounts,data=cr)
mytable66
## Saving.accounts
## Job little moderate quite rich rich
## 0 13 0 3 1
## 1 128 23 12 8
## 2 366 66 42 36
## 3 96 14 6 3
margin.table(mytable66,1)
## Job
## 0 1 2 3
## 17 171 510 119
prop.table(mytable66,1)
## Saving.accounts
## Job little moderate quite rich rich
## 0 0.76470588 0.00000000 0.17647059 0.05882353
## 1 0.74853801 0.13450292 0.07017544 0.04678363
## 2 0.71764706 0.12941176 0.08235294 0.07058824
## 3 0.80672269 0.11764706 0.05042017 0.02521008
margin.table(mytable66,2)
## Saving.accounts
## little moderate quite rich rich
## 603 103 63 48
prop.table(mytable66,2)
## Saving.accounts
## Job little moderate quite rich rich
## 0 0.02155887 0.00000000 0.04761905 0.02083333
## 1 0.21227197 0.22330097 0.19047619 0.16666667
## 2 0.60696517 0.64077670 0.66666667 0.75000000
## 3 0.15920398 0.13592233 0.09523810 0.06250000
addmargins(prop.table(mytable66))
## Saving.accounts
## Job little moderate quite rich rich Sum
## 0 0.015911873 0.000000000 0.003671971 0.001223990 0.020807834
## 1 0.156670747 0.028151775 0.014687882 0.009791922 0.209302326
## 2 0.447980416 0.080783354 0.051407589 0.044063647 0.624235006
## 3 0.117503060 0.017135863 0.007343941 0.003671971 0.145654835
## Sum 0.738066095 0.126070991 0.077111383 0.058751530 1.000000000
mytable77<-xtabs(~Job+Purpose,data=cr)
mytable77
## Purpose
## Job business car domestic appliances education furniture/equipment
## 0 2 12 1 1 1
## 1 20 66 1 15 33
## 2 60 190 10 35 126
## 3 15 69 0 8 21
## Purpose
## Job radio/TV repairs vacation/others
## 0 2 2 1
## 1 57 7 1
## 2 195 13 1
## 3 26 0 9
margin.table(mytable77,1)
## Job
## 0 1 2 3
## 22 200 630 148
prop.table(mytable77,1)
## Purpose
## Job business car domestic appliances education
## 0 0.090909091 0.545454545 0.045454545 0.045454545
## 1 0.100000000 0.330000000 0.005000000 0.075000000
## 2 0.095238095 0.301587302 0.015873016 0.055555556
## 3 0.101351351 0.466216216 0.000000000 0.054054054
## Purpose
## Job furniture/equipment radio/TV repairs vacation/others
## 0 0.045454545 0.090909091 0.090909091 0.045454545
## 1 0.165000000 0.285000000 0.035000000 0.005000000
## 2 0.200000000 0.309523810 0.020634921 0.001587302
## 3 0.141891892 0.175675676 0.000000000 0.060810811
margin.table(mytable77,2)
## Purpose
## business car domestic appliances
## 97 337 12
## education furniture/equipment radio/TV
## 59 181 280
## repairs vacation/others
## 22 12
prop.table(mytable77,2)
## Purpose
## Job business car domestic appliances education
## 0 0.020618557 0.035608309 0.083333333 0.016949153
## 1 0.206185567 0.195845697 0.083333333 0.254237288
## 2 0.618556701 0.563798220 0.833333333 0.593220339
## 3 0.154639175 0.204747774 0.000000000 0.135593220
## Purpose
## Job furniture/equipment radio/TV repairs vacation/others
## 0 0.005524862 0.007142857 0.090909091 0.083333333
## 1 0.182320442 0.203571429 0.318181818 0.083333333
## 2 0.696132597 0.696428571 0.590909091 0.083333333
## 3 0.116022099 0.092857143 0.000000000 0.750000000
addmargins(prop.table(mytable77))
## Purpose
## Job business car domestic appliances education furniture/equipment
## 0 0.002 0.012 0.001 0.001 0.001
## 1 0.020 0.066 0.001 0.015 0.033
## 2 0.060 0.190 0.010 0.035 0.126
## 3 0.015 0.069 0.000 0.008 0.021
## Sum 0.097 0.337 0.012 0.059 0.181
## Purpose
## Job radio/TV repairs vacation/others Sum
## 0 0.002 0.002 0.001 0.022
## 1 0.057 0.007 0.001 0.200
## 2 0.195 0.013 0.001 0.630
## 3 0.026 0.000 0.009 0.148
## Sum 0.280 0.022 0.012 1.000
mytable777<-xtabs(~Job+Risk,data=cr)
mytable777
## Risk
## Job bad good
## 0 7 15
## 1 56 144
## 2 186 444
## 3 51 97
margin.table(mytable777,1)
## Job
## 0 1 2 3
## 22 200 630 148
prop.table(mytable777,1)
## Risk
## Job bad good
## 0 0.3181818 0.6818182
## 1 0.2800000 0.7200000
## 2 0.2952381 0.7047619
## 3 0.3445946 0.6554054
margin.table(mytable777,2)
## Risk
## bad good
## 300 700
prop.table(mytable777,2)
## Risk
## Job bad good
## 0 0.02333333 0.02142857
## 1 0.18666667 0.20571429
## 2 0.62000000 0.63428571
## 3 0.17000000 0.13857143
addmargins(prop.table(mytable777))
## Risk
## Job bad good Sum
## 0 0.007 0.015 0.022
## 1 0.056 0.144 0.200
## 2 0.186 0.444 0.630
## 3 0.051 0.097 0.148
## Sum 0.300 0.700 1.000
mytable88<-xtabs(~Housing+Saving.accounts,data=cr)
mytable88
## Saving.accounts
## Housing little moderate quite rich rich
## free 67 10 6 2
## own 430 71 45 37
## rent 106 22 12 9
margin.table(mytable88,1)
## Housing
## free own rent
## 85 583 149
prop.table(mytable88,1)
## Saving.accounts
## Housing little moderate quite rich rich
## free 0.78823529 0.11764706 0.07058824 0.02352941
## own 0.73756432 0.12178388 0.07718696 0.06346484
## rent 0.71140940 0.14765101 0.08053691 0.06040268
margin.table(mytable88,2)
## Saving.accounts
## little moderate quite rich rich
## 603 103 63 48
prop.table(mytable88,2)
## Saving.accounts
## Housing little moderate quite rich rich
## free 0.11111111 0.09708738 0.09523810 0.04166667
## own 0.71310116 0.68932039 0.71428571 0.77083333
## rent 0.17578773 0.21359223 0.19047619 0.18750000
addmargins(prop.table(mytable88))
## Saving.accounts
## Housing little moderate quite rich rich Sum
## free 0.082007344 0.012239902 0.007343941 0.002447980 0.104039168
## own 0.526315789 0.086903305 0.055079559 0.045287638 0.713586291
## rent 0.129742962 0.026927785 0.014687882 0.011015912 0.182374541
## Sum 0.738066095 0.126070991 0.077111383 0.058751530 1.000000000
mytable99<-xtabs(~Housing+Purpose,data=cr)
mytable99
## Purpose
## Housing business car domestic appliances education furniture/equipment
## free 5 55 0 15 11
## own 76 219 10 34 122
## rent 16 63 2 10 48
## Purpose
## Housing radio/TV repairs vacation/others
## free 15 3 4
## own 227 17 8
## rent 38 2 0
margin.table(mytable99,1)
## Housing
## free own rent
## 108 713 179
prop.table(mytable99,1)
## Purpose
## Housing business car domestic appliances education
## free 0.04629630 0.50925926 0.00000000 0.13888889
## own 0.10659187 0.30715288 0.01402525 0.04768583
## rent 0.08938547 0.35195531 0.01117318 0.05586592
## Purpose
## Housing furniture/equipment radio/TV repairs vacation/others
## free 0.10185185 0.13888889 0.02777778 0.03703704
## own 0.17110799 0.31837307 0.02384292 0.01122020
## rent 0.26815642 0.21229050 0.01117318 0.00000000
margin.table(mytable99,2)
## Purpose
## business car domestic appliances
## 97 337 12
## education furniture/equipment radio/TV
## 59 181 280
## repairs vacation/others
## 22 12
prop.table(mytable99,2)
## Purpose
## Housing business car domestic appliances education
## free 0.05154639 0.16320475 0.00000000 0.25423729
## own 0.78350515 0.64985163 0.83333333 0.57627119
## rent 0.16494845 0.18694362 0.16666667 0.16949153
## Purpose
## Housing furniture/equipment radio/TV repairs vacation/others
## free 0.06077348 0.05357143 0.13636364 0.33333333
## own 0.67403315 0.81071429 0.77272727 0.66666667
## rent 0.26519337 0.13571429 0.09090909 0.00000000
addmargins(prop.table(mytable99))
## Purpose
## Housing business car domestic appliances education furniture/equipment
## free 0.005 0.055 0.000 0.015 0.011
## own 0.076 0.219 0.010 0.034 0.122
## rent 0.016 0.063 0.002 0.010 0.048
## Sum 0.097 0.337 0.012 0.059 0.181
## Purpose
## Housing radio/TV repairs vacation/others Sum
## free 0.015 0.003 0.004 0.108
## own 0.227 0.017 0.008 0.713
## rent 0.038 0.002 0.000 0.179
## Sum 0.280 0.022 0.012 1.000
mytable999<-xtabs(~Housing+Risk,data=cr)
mytable999
## Risk
## Housing bad good
## free 44 64
## own 186 527
## rent 70 109
margin.table(mytable999,1)
## Housing
## free own rent
## 108 713 179
prop.table(mytable999,1)
## Risk
## Housing bad good
## free 0.4074074 0.5925926
## own 0.2608696 0.7391304
## rent 0.3910615 0.6089385
margin.table(mytable999,2)
## Risk
## bad good
## 300 700
prop.table(mytable999,2)
## Risk
## Housing bad good
## free 0.14666667 0.09142857
## own 0.62000000 0.75285714
## rent 0.23333333 0.15571429
addmargins(prop.table(mytable999))
## Risk
## Housing bad good Sum
## free 0.044 0.064 0.108
## own 0.186 0.527 0.713
## rent 0.070 0.109 0.179
## Sum 0.300 0.700 1.000
mytable100<-xtabs(~Saving.accounts+Purpose,data=cr)
mytable100
## Purpose
## Saving.accounts business car domestic appliances education
## little 56 188 6 34
## moderate 17 39 1 5
## quite rich 4 18 2 3
## rich 6 18 0 2
## Purpose
## Saving.accounts furniture/equipment radio/TV repairs vacation/others
## little 128 169 14 8
## moderate 9 27 3 2
## quite rich 12 23 1 0
## rich 11 9 2 0
margin.table(mytable100,1)
## Saving.accounts
## little moderate quite rich rich
## 603 103 63 48
prop.table(mytable100,1)
## Purpose
## Saving.accounts business car domestic appliances education
## little 0.092868988 0.311774461 0.009950249 0.056384743
## moderate 0.165048544 0.378640777 0.009708738 0.048543689
## quite rich 0.063492063 0.285714286 0.031746032 0.047619048
## rich 0.125000000 0.375000000 0.000000000 0.041666667
## Purpose
## Saving.accounts furniture/equipment radio/TV repairs
## little 0.212271973 0.280265340 0.023217247
## moderate 0.087378641 0.262135922 0.029126214
## quite rich 0.190476190 0.365079365 0.015873016
## rich 0.229166667 0.187500000 0.041666667
## Purpose
## Saving.accounts vacation/others
## little 0.013266998
## moderate 0.019417476
## quite rich 0.000000000
## rich 0.000000000
margin.table(mytable100,2)
## Purpose
## business car domestic appliances
## 83 263 9
## education furniture/equipment radio/TV
## 44 160 228
## repairs vacation/others
## 20 10
prop.table(mytable100,2)
## Purpose
## Saving.accounts business car domestic appliances education
## little 0.67469880 0.71482890 0.66666667 0.77272727
## moderate 0.20481928 0.14828897 0.11111111 0.11363636
## quite rich 0.04819277 0.06844106 0.22222222 0.06818182
## rich 0.07228916 0.06844106 0.00000000 0.04545455
## Purpose
## Saving.accounts furniture/equipment radio/TV repairs vacation/others
## little 0.80000000 0.74122807 0.70000000 0.80000000
## moderate 0.05625000 0.11842105 0.15000000 0.20000000
## quite rich 0.07500000 0.10087719 0.05000000 0.00000000
## rich 0.06875000 0.03947368 0.10000000 0.00000000
addmargins(prop.table(mytable100))
## Purpose
## Saving.accounts business car domestic appliances education
## little 0.068543452 0.230110159 0.007343941 0.041615667
## moderate 0.020807834 0.047735618 0.001223990 0.006119951
## quite rich 0.004895961 0.022031824 0.002447980 0.003671971
## rich 0.007343941 0.022031824 0.000000000 0.002447980
## Sum 0.101591187 0.321909425 0.011015912 0.053855569
## Purpose
## Saving.accounts furniture/equipment radio/TV repairs
## little 0.156670747 0.206854345 0.017135863
## moderate 0.011015912 0.033047736 0.003671971
## quite rich 0.014687882 0.028151775 0.001223990
## rich 0.013463892 0.011015912 0.002447980
## Sum 0.195838433 0.279069767 0.024479804
## Purpose
## Saving.accounts vacation/others Sum
## little 0.009791922 0.738066095
## moderate 0.002447980 0.126070991
## quite rich 0.000000000 0.077111383
## rich 0.000000000 0.058751530
## Sum 0.012239902 1.000000000
mytable101<-xtabs(~Saving.accounts+Risk,data=cr)
mytable101
## Risk
## Saving.accounts bad good
## little 217 386
## moderate 34 69
## quite rich 11 52
## rich 6 42
margin.table(mytable101,1)
## Saving.accounts
## little moderate quite rich rich
## 603 103 63 48
prop.table(mytable101,1)
## Risk
## Saving.accounts bad good
## little 0.3598673 0.6401327
## moderate 0.3300971 0.6699029
## quite rich 0.1746032 0.8253968
## rich 0.1250000 0.8750000
margin.table(mytable101,2)
## Risk
## bad good
## 268 549
prop.table(mytable101,2)
## Risk
## Saving.accounts bad good
## little 0.80970149 0.70309654
## moderate 0.12686567 0.12568306
## quite rich 0.04104478 0.09471767
## rich 0.02238806 0.07650273
addmargins(prop.table(mytable101))
## Risk
## Saving.accounts bad good Sum
## little 0.265605875 0.472460220 0.738066095
## moderate 0.041615667 0.084455324 0.126070991
## quite rich 0.013463892 0.063647491 0.077111383
## rich 0.007343941 0.051407589 0.058751530
## Sum 0.328029376 0.671970624 1.000000000
mytable102<-xtabs(~Purpose+Risk,data=cr)
mytable102
## Risk
## Purpose bad good
## business 34 63
## car 106 231
## domestic appliances 4 8
## education 23 36
## furniture/equipment 58 123
## radio/TV 62 218
## repairs 8 14
## vacation/others 5 7
margin.table(mytable102,1)
## Purpose
## business car domestic appliances
## 97 337 12
## education furniture/equipment radio/TV
## 59 181 280
## repairs vacation/others
## 22 12
prop.table(mytable102,1)
## Risk
## Purpose bad good
## business 0.3505155 0.6494845
## car 0.3145401 0.6854599
## domestic appliances 0.3333333 0.6666667
## education 0.3898305 0.6101695
## furniture/equipment 0.3204420 0.6795580
## radio/TV 0.2214286 0.7785714
## repairs 0.3636364 0.6363636
## vacation/others 0.4166667 0.5833333
margin.table(mytable102,2)
## Risk
## bad good
## 300 700
prop.table(mytable102,2)
## Risk
## Purpose bad good
## business 0.11333333 0.09000000
## car 0.35333333 0.33000000
## domestic appliances 0.01333333 0.01142857
## education 0.07666667 0.05142857
## furniture/equipment 0.19333333 0.17571429
## radio/TV 0.20666667 0.31142857
## repairs 0.02666667 0.02000000
## vacation/others 0.01666667 0.01000000
addmargins(prop.table(mytable102))
## Risk
## Purpose bad good Sum
## business 0.034 0.063 0.097
## car 0.106 0.231 0.337
## domestic appliances 0.004 0.008 0.012
## education 0.023 0.036 0.059
## furniture/equipment 0.058 0.123 0.181
## radio/TV 0.062 0.218 0.280
## repairs 0.008 0.014 0.022
## vacation/others 0.005 0.007 0.012
## Sum 0.300 0.700 1.000
#Useful boxplots for the study
boxplot(Age~Checking.account,data=cr,horizontal=TRUE,ylab="Checking.account",xlab="Age",las=1,main="Age v/s Checking account",col=c("red","blue","green"))
boxplot(Age~Saving.accounts,data=cr,horizontal=TRUE,ylab="Savings.account",xlab="Age",las=1,main="Age v/s Savings.account",col=c("red","blue","green","yellow"))
boxplot(Age~Housing,data=cr,horizontal=TRUE,ylab="Housing",xlab="Age",las=1,main="Age v/s Housing",col=c("red","blue","green"))
boxplot(Age~Job,data=cr,horizontal=TRUE,ylab="Job",xlab="Age",las=1,main="Age v/s Job",col=c("red","blue","green","yellow"))
boxplot(Age~Purpose,data=cr,horizontal=TRUE,ylab="Purpose",xlab="Age",las=1,main="Age v/s Purpose",col=c("red","blue","green","yellow","peachpuff","darkred","lightblue","gray60"))
boxplot(Age~Duration,data=cr,horizontal=TRUE,ylab="Duration",xlab="Age",las=1,main="Age v/s Duraction",col=c("red","blue","green","yellow","peachpuff","darkred","lightblue","gray60","dark blue","blue4","burlywood","darkolivegreen","gray50","gray95","darkgoldenrod4","darkkhaki","burlywood1"))
boxplot(Age~Risk,data=cr,horizontal=TRUE,ylab="Risk",xlab="Age",las=1,main="Age v/s Risk",col=c("peachpuff","lightblue"))
boxplot(Duration~Risk,data=cr,horizontal=TRUE,ylab="Risk",xlab="Duration",las=1,main="Duration v/s Risk",col=c("peachpuff","lightblue"))
boxplot(Age~Risk,data=cr,horizontal=TRUE,ylab="Risk",xlab="Age",las=1,main="Age v/s Risk",col=c("pink","purple"))
#Suitable histograms
library(lattice)
## Warning: package 'lattice' was built under R version 3.4.3
histogram(Sex, data=cr,type="count",col=c("darkolivegreen"))
## Warning in histogram.factor(Sex, data = cr, type = "count", col =
## c("darkolivegreen")): explicit 'data' specification ignored
histogram(Job, data=cr,type="count",col=c("burlywood"))
## Warning in histogram.numeric(Job, data = cr, type = "count", col =
## c("burlywood")): explicit 'data' specification ignored
histogram(Housing, data=cr,type="count",col=c("red"))
## Warning in histogram.factor(Housing, data = cr, type = "count", col =
## c("red")): explicit 'data' specification ignored
histogram(Saving.accounts, data=cr,type="count",col=c("blue"))
## Warning in histogram.factor(Saving.accounts, data = cr, type = "count", :
## explicit 'data' specification ignored
histogram(Purpose, data=cr,type="count",col=c("gray50"))
## Warning in histogram.factor(Purpose, data = cr, type = "count", col =
## c("gray50")): explicit 'data' specification ignored
histogram(Duration, data=cr,type="count",col=c("yellow"))
## Warning in histogram.numeric(Duration, data = cr, type = "count", col =
## c("yellow")): explicit 'data' specification ignored
histogram(Credit.amount, data=cr,type="count",col=c("green"))
## Warning in histogram.numeric(Credit.amount, data = cr, type = "count", col
## = c("green")): explicit 'data' specification ignored
histogram(Checking.account, data=cr,type="count",col=c("purple"))
## Warning in histogram.factor(Checking.account, data = cr, type = "count", :
## explicit 'data' specification ignored
histogram(Age, data=cr,type="count",col=c("lightblue"))
## Warning in histogram.numeric(Age, data = cr, type = "count", col =
## c("lightblue")): explicit 'data' specification ignored
histogram(Risk, data=cr,type="count",col=c("pink"))
## Warning in histogram.factor(Risk, data = cr, type = "count", col =
## c("pink")): explicit 'data' specification ignored
#Scatterplot matrix
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(formula=~Age+Sex+Job+Housing+Saving.accounts+Checking.account+Credit.amount+Duration+Purpose+Risk,cex=0.6,data=cr,diagonal="histogram")
#Correlation matrix
cor(cr[,c(1,2,4,8,9)])
## X Age Job Credit.amount
## X 1.00000000 -0.01009576 -0.02734538 0.01348793
## Age -0.01009576 1.00000000 0.01567316 0.03271642
## Job -0.02734538 0.01567316 1.00000000 0.28538533
## Credit.amount 0.01348793 0.03271642 0.28538533 1.00000000
## Duration 0.03078762 -0.03613637 0.21090973 0.62498420
## Duration
## X 0.03078762
## Age -0.03613637
## Job 0.21090973
## Credit.amount 0.62498420
## Duration 1.00000000
#to visualize correlation matrix
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.3
## corrplot 0.84 loaded
corrplot(corr=cor(cr[,c(1,2,4,8,9)],use="complete.obs"),method="ellipse")
#Model1
model1<-Credit.amount~Age+Housing+Purpose+Saving.accounts
fit1<-lm(model1,data=cr)
summary(fit1)
##
## Call:
## lm(formula = model1, data = cr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8860.4 -1573.4 -665.8 729.1 13198.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5511.791 557.430 9.888 < 2e-16 ***
## Age -7.244 8.516 -0.851 0.395265
## Housingown -1340.335 316.890 -4.230 2.61e-05 ***
## Housingrent -1300.608 375.690 -3.462 0.000565 ***
## Purposecar -559.867 326.101 -1.717 0.086392 .
## Purposedomestic appliances -2511.729 905.713 -2.773 0.005679 **
## Purposeeducation -1349.699 486.338 -2.775 0.005645 **
## Purposefurniture/equipment -963.235 352.386 -2.733 0.006405 **
## Purposeradio/TV -1565.008 331.519 -4.721 2.77e-06 ***
## Purposerepairs -1336.529 642.178 -2.081 0.037728 *
## Purposevacation/others 4882.052 865.616 5.640 2.36e-08 ***
## Saving.accountsmoderate 53.588 277.511 0.193 0.846928
## Saving.accountsquite rich -387.223 342.283 -1.131 0.258269
## Saving.accountsrich -556.073 388.089 -1.433 0.152290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2573 on 803 degrees of freedom
## (183 observations deleted due to missingness)
## Multiple R-squared: 0.1273, Adjusted R-squared: 0.1132
## F-statistic: 9.009 on 13 and 803 DF, p-value: < 2.2e-16
#Model2
model2<-Credit.amount~Housing+Purpose+Risk
fit2<-lm(model2,data=cr)
summary(fit2)
##
## Call:
## lm(formula = model2, data = cr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7728.4 -1673.0 -676.2 902.2 13410.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6097.5 388.7 15.689 < 2e-16 ***
## Housingown -1507.1 281.0 -5.364 1.01e-07 ***
## Housingrent -1558.9 328.1 -4.751 2.32e-06 ***
## Purposecar -529.2 306.6 -1.726 0.08469 .
## Purposedomestic appliances -2569.0 809.5 -3.173 0.00155 **
## Purposeeducation -1614.4 440.4 -3.666 0.00026 ***
## Purposefurniture/equipment -1076.5 333.7 -3.226 0.00130 **
## Purposeradio/TV -1575.3 312.6 -5.040 5.53e-07 ***
## Purposerepairs -1571.7 625.2 -2.514 0.01209 *
## Purposevacation/others 3567.0 813.5 4.385 1.29e-05 ***
## Riskgood -772.1 185.2 -4.169 3.33e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2645 on 989 degrees of freedom
## Multiple R-squared: 0.1308, Adjusted R-squared: 0.122
## F-statistic: 14.88 on 10 and 989 DF, p-value: < 2.2e-16
#Model3
model3<-Credit.amount~Housing+Purpose+Risk+Duration
fit3<-lm(model3,data=cr)
summary(fit3)
##
## Call:
## lm(formula = model3, data = cr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5524.2 -1175.4 -350.5 711.4 13278.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 975.029 381.534 2.556 0.01075 *
## Housingown -581.068 229.113 -2.536 0.01136 *
## Housingrent -430.786 267.909 -1.608 0.10816
## Purposecar 392.148 249.425 1.572 0.11622
## Purposedomestic appliances -1229.320 652.702 -1.883 0.05994 .
## Purposeeducation -392.581 357.553 -1.098 0.27249
## Purposefurniture/equipment -50.829 271.602 -0.187 0.85159
## Purposeradio/TV -707.313 253.771 -2.787 0.00542 **
## Purposerepairs -381.068 504.690 -0.755 0.45040
## Purposevacation/others 3162.203 653.622 4.838 1.52e-06 ***
## Riskgood -36.851 152.034 -0.242 0.80853
## Duration 138.587 5.935 23.349 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2124 on 988 degrees of freedom
## Multiple R-squared: 0.4399, Adjusted R-squared: 0.4336
## F-statistic: 70.53 on 11 and 988 DF, p-value: < 2.2e-16
#Model4
model4<-Credit.amount~Housing+Purpose+Risk+Duration+Job
fit4<-lm(model4,data=cr)
summary(fit4)
##
## Call:
## lm(formula = model4, data = cr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5927.7 -1118.4 -335.3 726.0 13998.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -124.958 420.581 -0.297 0.76645
## Housingown -461.370 226.363 -2.038 0.04180 *
## Housingrent -296.531 264.606 -1.121 0.26271
## Purposecar 348.789 245.519 1.421 0.15574
## Purposedomestic appliances -1202.833 642.199 -1.873 0.06137 .
## Purposeeducation -377.915 351.799 -1.074 0.28298
## Purposefurniture/equipment -108.021 267.407 -0.404 0.68633
## Purposeradio/TV -728.548 249.708 -2.918 0.00361 **
## Purposerepairs -171.283 497.872 -0.344 0.73090
## Purposevacation/others 2870.544 645.052 4.450 9.56e-06 ***
## Riskgood -49.210 149.599 -0.329 0.74227
## Duration 132.333 5.939 22.284 < 2e-16 ***
## Job 608.512 104.926 5.799 8.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2090 on 987 degrees of freedom
## Multiple R-squared: 0.4583, Adjusted R-squared: 0.4517
## F-statistic: 69.59 on 12 and 987 DF, p-value: < 2.2e-16
#Model5
model5<-Credit.amount~Housing+Purpose+Risk+Duration+Job+Sex
fit5<-lm(model5,data=cr)
summary(fit5)
##
## Call:
## lm(formula = model5, data = cr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5991.0 -1137.6 -327.3 729.7 13921.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -275.971 435.716 -0.633 0.52664
## Housingown -443.519 226.682 -1.957 0.05068 .
## Housingrent -232.489 268.921 -0.865 0.38751
## Purposecar 364.662 245.721 1.484 0.13812
## Purposedomestic appliances -1148.399 643.282 -1.785 0.07453 .
## Purposeeducation -336.575 353.060 -0.953 0.34067
## Purposefurniture/equipment -72.774 268.637 -0.271 0.78652
## Purposeradio/TV -706.748 250.160 -2.825 0.00482 **
## Purposerepairs -165.832 497.702 -0.333 0.73906
## Purposevacation/others 2898.697 645.162 4.493 7.86e-06 ***
## Riskgood -63.812 149.952 -0.426 0.67053
## Duration 131.980 5.942 22.210 < 2e-16 ***
## Job 601.886 105.006 5.732 1.32e-08 ***
## Sexmale 196.585 148.955 1.320 0.18722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2089 on 986 degrees of freedom
## Multiple R-squared: 0.4593, Adjusted R-squared: 0.4521
## F-statistic: 64.42 on 13 and 986 DF, p-value: < 2.2e-16
#Model6
model6<-Credit.amount~Housing+Purpose+Risk+Duration+Job+Checking.account
fit6<-lm(model6,data=cr)
summary(fit6)
##
## Call:
## lm(formula = model6, data = cr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5758.5 -1267.7 -382.6 810.5 13798.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -494.335 584.402 -0.846 0.397962
## Housingown -374.570 299.501 -1.251 0.211559
## Housingrent -246.743 343.070 -0.719 0.472289
## Purposecar 339.212 343.276 0.988 0.323477
## Purposedomestic appliances -1232.977 850.329 -1.450 0.147589
## Purposeeducation -383.515 486.818 -0.788 0.431130
## Purposefurniture/equipment -84.392 365.535 -0.231 0.817493
## Purposeradio/TV -876.706 348.215 -2.518 0.012075 *
## Purposerepairs 37.699 674.992 0.056 0.955479
## Purposevacation/others 2858.554 743.232 3.846 0.000133 ***
## Riskgood 37.753 194.341 0.194 0.846037
## Duration 126.778 8.068 15.714 < 2e-16 ***
## Job 694.721 137.486 5.053 5.8e-07 ***
## Checking.accountmoderate 526.032 199.057 2.643 0.008445 **
## Checking.accountrich -185.448 321.416 -0.577 0.564177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2239 on 591 degrees of freedom
## (394 observations deleted due to missingness)
## Multiple R-squared: 0.4495, Adjusted R-squared: 0.4365
## F-statistic: 34.47 on 14 and 591 DF, p-value: < 2.2e-16
#T-test
t.test(Credit.amount~Risk,data=cr)
##
## Welch Two Sample t-test
##
## data: Credit.amount by Risk
## t = 4.2642, df = 421.86, p-value = 2.478e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 513.534 1391.805
## sample estimates:
## mean in group bad mean in group good
## 3938.127 2985.457
Inference: I. The important variables for the Credit Risk analysis are: 1. Job 2. Housing 3. Saving.accounts 4. Purpose 5. Duration 6. Risk
The choice of variables have been made by ruuning regression for the variables mentioned above. The ones with significant values are listed. It has also been observed that the variable “Checking.account”" is significant only when it comes to moderate class of people.