#Reading the data
cr<-read.csv(paste("german_credit_data (2).csv",sep=""))
View(cr)
#Summarising the data
attach(cr)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(cr)
##                   vars    n    mean      sd median trimmed     mad min
## X                    1 1000  499.50  288.82  499.5  499.50  370.65   0
## Age                  2 1000   35.55   11.38   33.0   34.17   10.38  19
## Sex*                 3 1000    1.69    0.46    2.0    1.74    0.00   1
## Job                  4 1000    1.90    0.65    2.0    1.91    0.00   0
## Housing*             5 1000    2.07    0.53    2.0    2.09    0.00   1
## Saving.accounts*     6  817    1.46    0.87    1.0    1.25    0.00   1
## Checking.account*    7  606    1.65    0.66    2.0    1.57    1.48   1
## Credit.amount        8 1000 3271.26 2822.74 2319.5 2754.57 1627.15 250
## Duration             9 1000   20.90   12.06   18.0   19.47    8.90   4
## Purpose*            10 1000    3.88    1.98    4.0    3.91    2.97   1
## Risk*               11 1000    1.70    0.46    2.0    1.75    0.00   1
##                     max range  skew kurtosis    se
## X                   999   999  0.00    -1.20  9.13
## Age                  75    56  1.02     0.58  0.36
## Sex*                  2     1 -0.82    -1.33  0.01
## Job                   3     3 -0.37     0.49  0.02
## Housing*              3     2  0.07     0.46  0.02
## Saving.accounts*      4     3  1.83     2.15  0.03
## Checking.account*     3     2  0.52    -0.73  0.03
## Credit.amount     18424 18174  1.94     4.25 89.26
## Duration             72    68  1.09     0.90  0.38
## Purpose*              8     7 -0.01    -1.54  0.06
## Risk*                 2     1 -0.87    -1.24  0.01
#Data types
str(cr)
## 'data.frame':    1000 obs. of  11 variables:
##  $ X               : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Age             : int  67 22 49 45 53 35 53 35 61 28 ...
##  $ Sex             : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ Job             : int  2 2 1 2 2 1 2 3 1 3 ...
##  $ Housing         : Factor w/ 3 levels "free","own","rent": 2 2 2 1 1 1 2 3 2 2 ...
##  $ Saving.accounts : Factor w/ 4 levels "little","moderate",..: NA 1 1 1 1 NA 3 1 4 1 ...
##  $ Checking.account: Factor w/ 3 levels "little","moderate",..: 1 2 NA 1 1 NA NA 2 NA 2 ...
##  $ Credit.amount   : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
##  $ Duration        : int  6 48 12 42 24 36 24 36 12 30 ...
##  $ Purpose         : Factor w/ 8 levels "business","car",..: 6 6 4 5 2 4 5 2 6 2 ...
##  $ Risk            : Factor w/ 2 levels "bad","good": 2 1 2 2 1 2 2 2 2 1 ...

The categorical variables in my dataset are as follows: 1. Sex (male, female) 2. Job (0-unskilled and non-resident, 1-unskilled and resident, 2-skilled, 3-highly skilled) 3. Housing (own, rent or free) 4. Savings accounts (little, moderate, quite rich and rich) 5. Purpose (car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others) 6. Risk (value target-Good or Bad Risk)

#One way contingency tables
mytable1<-with(cr,table(Sex))
mytable1
## Sex
## female   male 
##    310    690
prop.table(mytable1)*100
## Sex
## female   male 
##     31     69
mytable2<-with(cr,table(Job))
mytable2
## Job
##   0   1   2   3 
##  22 200 630 148
prop.table(mytable2)*100
## Job
##    0    1    2    3 
##  2.2 20.0 63.0 14.8
mytable3<-with(cr,table(Housing))
mytable3
## Housing
## free  own rent 
##  108  713  179
prop.table(mytable3)*100
## Housing
## free  own rent 
## 10.8 71.3 17.9
mytable4<-with(cr,table(Saving.accounts))
mytable4
## Saving.accounts
##     little   moderate quite rich       rich 
##        603        103         63         48
prop.table(mytable4)*100
## Saving.accounts
##     little   moderate quite rich       rich 
##  73.806610  12.607099   7.711138   5.875153
mytable5<-with(cr,table(Purpose))
mytable5
## Purpose
##            business                 car domestic appliances 
##                  97                 337                  12 
##           education furniture/equipment            radio/TV 
##                  59                 181                 280 
##             repairs     vacation/others 
##                  22                  12
prop.table(mytable5)*100
## Purpose
##            business                 car domestic appliances 
##                 9.7                33.7                 1.2 
##           education furniture/equipment            radio/TV 
##                 5.9                18.1                28.0 
##             repairs     vacation/others 
##                 2.2                 1.2
mytable6<-with(cr,table(Risk))
mytable6
## Risk
##  bad good 
##  300  700
prop.table(mytable6)*100
## Risk
##  bad good 
##   30   70
mytable7<-with(cr,table(Checking.account))
mytable7
## Checking.account
##   little moderate     rich 
##      274      269       63
prop.table(mytable7)*100
## Checking.account
##   little moderate     rich 
## 45.21452 44.38944 10.39604
#Two-way Contingency tables
mytable11<-xtabs(~Sex+Job,data=cr)
mytable11
##         Job
## Sex        0   1   2   3
##   female  12  64 197  37
##   male    10 136 433 111
margin.table(mytable11,1)
## Sex
## female   male 
##    310    690
prop.table(mytable11,1)
##         Job
## Sex               0          1          2          3
##   female 0.03870968 0.20645161 0.63548387 0.11935484
##   male   0.01449275 0.19710145 0.62753623 0.16086957
margin.table(mytable11,2)
## Job
##   0   1   2   3 
##  22 200 630 148
prop.table(mytable11,2)
##         Job
## Sex              0         1         2         3
##   female 0.5454545 0.3200000 0.3126984 0.2500000
##   male   0.4545455 0.6800000 0.6873016 0.7500000
addmargins(prop.table(mytable11))
##         Job
## Sex          0     1     2     3   Sum
##   female 0.012 0.064 0.197 0.037 0.310
##   male   0.010 0.136 0.433 0.111 0.690
##   Sum    0.022 0.200 0.630 0.148 1.000
mytable22<-xtabs(~Sex+Housing,data=cr)
mytable22
##         Housing
## Sex      free own rent
##   female   19 196   95
##   male     89 517   84
margin.table(mytable22,1)
## Sex
## female   male 
##    310    690
prop.table(mytable22,1)
##         Housing
## Sex            free        own       rent
##   female 0.06129032 0.63225806 0.30645161
##   male   0.12898551 0.74927536 0.12173913
margin.table(mytable22,2)
## Housing
## free  own rent 
##  108  713  179
prop.table(mytable22,2)
##         Housing
## Sex           free       own      rent
##   female 0.1759259 0.2748948 0.5307263
##   male   0.8240741 0.7251052 0.4692737
addmargins(prop.table(mytable22))
##         Housing
## Sex       free   own  rent   Sum
##   female 0.019 0.196 0.095 0.310
##   male   0.089 0.517 0.084 0.690
##   Sum    0.108 0.713 0.179 1.000
mytable33<-xtabs(~Sex+Saving.accounts,data=cr)
mytable33
##         Saving.accounts
## Sex      little moderate quite rich rich
##   female    194       32         16   19
##   male      409       71         47   29
margin.table(mytable33,1)
## Sex
## female   male 
##    261    556
prop.table(mytable33,1)
##         Saving.accounts
## Sex          little   moderate quite rich       rich
##   female 0.74329502 0.12260536 0.06130268 0.07279693
##   male   0.73561151 0.12769784 0.08453237 0.05215827
margin.table(mytable33,2)
## Saving.accounts
##     little   moderate quite rich       rich 
##        603        103         63         48
prop.table(mytable33,2)
##         Saving.accounts
## Sex         little  moderate quite rich      rich
##   female 0.3217247 0.3106796  0.2539683 0.3958333
##   male   0.6782753 0.6893204  0.7460317 0.6041667
addmargins(prop.table(mytable33))
##         Saving.accounts
## Sex          little   moderate quite rich       rich        Sum
##   female 0.23745410 0.03916769 0.01958384 0.02325581 0.31946144
##   male   0.50061200 0.08690330 0.05752754 0.03549572 0.68053856
##   Sum    0.73806610 0.12607099 0.07711138 0.05875153 1.00000000
mytable44<-xtabs(~Sex+Purpose,data=cr)
mytable44
##         Purpose
## Sex      business car domestic appliances education furniture/equipment
##   female       19  94                   6        24                  74
##   male         78 243                   6        35                 107
##         Purpose
## Sex      radio/TV repairs vacation/others
##   female       85       5               3
##   male        195      17               9
margin.table(mytable44,1)
## Sex
## female   male 
##    310    690
prop.table(mytable44,1)
##         Purpose
## Sex         business         car domestic appliances   education
##   female 0.061290323 0.303225806         0.019354839 0.077419355
##   male   0.113043478 0.352173913         0.008695652 0.050724638
##         Purpose
## Sex      furniture/equipment    radio/TV     repairs vacation/others
##   female         0.238709677 0.274193548 0.016129032     0.009677419
##   male           0.155072464 0.282608696 0.024637681     0.013043478
margin.table(mytable44,2)
## Purpose
##            business                 car domestic appliances 
##                  97                 337                  12 
##           education furniture/equipment            radio/TV 
##                  59                 181                 280 
##             repairs     vacation/others 
##                  22                  12
prop.table(mytable44,2)
##         Purpose
## Sex       business       car domestic appliances education
##   female 0.1958763 0.2789318           0.5000000 0.4067797
##   male   0.8041237 0.7210682           0.5000000 0.5932203
##         Purpose
## Sex      furniture/equipment  radio/TV   repairs vacation/others
##   female           0.4088398 0.3035714 0.2272727       0.2500000
##   male             0.5911602 0.6964286 0.7727273       0.7500000
addmargins(prop.table(mytable44))
##         Purpose
## Sex      business   car domestic appliances education furniture/equipment
##   female    0.019 0.094               0.006     0.024               0.074
##   male      0.078 0.243               0.006     0.035               0.107
##   Sum       0.097 0.337               0.012     0.059               0.181
##         Purpose
## Sex      radio/TV repairs vacation/others   Sum
##   female    0.085   0.005           0.003 0.310
##   male      0.195   0.017           0.009 0.690
##   Sum       0.280   0.022           0.012 1.000
mytable444<-xtabs(~Sex+Risk,data=cr)
mytable444
##         Risk
## Sex      bad good
##   female 109  201
##   male   191  499
margin.table(mytable444,1)
## Sex
## female   male 
##    310    690
prop.table(mytable444,1)
##         Risk
## Sex            bad      good
##   female 0.3516129 0.6483871
##   male   0.2768116 0.7231884
margin.table(mytable444,2)
## Risk
##  bad good 
##  300  700
prop.table(mytable444,2)
##         Risk
## Sex            bad      good
##   female 0.3633333 0.2871429
##   male   0.6366667 0.7128571
addmargins(prop.table(mytable444))
##         Risk
## Sex        bad  good   Sum
##   female 0.109 0.201 0.310
##   male   0.191 0.499 0.690
##   Sum    0.300 0.700 1.000
mytable55<-xtabs(~Job+Housing,data=cr)
mytable55
##    Housing
## Job free own rent
##   0    4  13    5
##   1    8 154   38
##   2   63 452  115
##   3   33  94   21
margin.table(mytable55,1)
## Job
##   0   1   2   3 
##  22 200 630 148
prop.table(mytable55,1)
##    Housing
## Job      free       own      rent
##   0 0.1818182 0.5909091 0.2272727
##   1 0.0400000 0.7700000 0.1900000
##   2 0.1000000 0.7174603 0.1825397
##   3 0.2229730 0.6351351 0.1418919
margin.table(mytable55,2)
## Housing
## free  own rent 
##  108  713  179
prop.table(mytable55,2)
##    Housing
## Job       free        own       rent
##   0 0.03703704 0.01823282 0.02793296
##   1 0.07407407 0.21598878 0.21229050
##   2 0.58333333 0.63394109 0.64245810
##   3 0.30555556 0.13183731 0.11731844
addmargins(prop.table(mytable55))
##      Housing
## Job    free   own  rent   Sum
##   0   0.004 0.013 0.005 0.022
##   1   0.008 0.154 0.038 0.200
##   2   0.063 0.452 0.115 0.630
##   3   0.033 0.094 0.021 0.148
##   Sum 0.108 0.713 0.179 1.000
mytable66<-xtabs(~Job+Saving.accounts,data=cr)
mytable66
##    Saving.accounts
## Job little moderate quite rich rich
##   0     13        0          3    1
##   1    128       23         12    8
##   2    366       66         42   36
##   3     96       14          6    3
margin.table(mytable66,1)
## Job
##   0   1   2   3 
##  17 171 510 119
prop.table(mytable66,1)
##    Saving.accounts
## Job     little   moderate quite rich       rich
##   0 0.76470588 0.00000000 0.17647059 0.05882353
##   1 0.74853801 0.13450292 0.07017544 0.04678363
##   2 0.71764706 0.12941176 0.08235294 0.07058824
##   3 0.80672269 0.11764706 0.05042017 0.02521008
margin.table(mytable66,2)
## Saving.accounts
##     little   moderate quite rich       rich 
##        603        103         63         48
prop.table(mytable66,2)
##    Saving.accounts
## Job     little   moderate quite rich       rich
##   0 0.02155887 0.00000000 0.04761905 0.02083333
##   1 0.21227197 0.22330097 0.19047619 0.16666667
##   2 0.60696517 0.64077670 0.66666667 0.75000000
##   3 0.15920398 0.13592233 0.09523810 0.06250000
addmargins(prop.table(mytable66))
##      Saving.accounts
## Job        little    moderate  quite rich        rich         Sum
##   0   0.015911873 0.000000000 0.003671971 0.001223990 0.020807834
##   1   0.156670747 0.028151775 0.014687882 0.009791922 0.209302326
##   2   0.447980416 0.080783354 0.051407589 0.044063647 0.624235006
##   3   0.117503060 0.017135863 0.007343941 0.003671971 0.145654835
##   Sum 0.738066095 0.126070991 0.077111383 0.058751530 1.000000000
mytable77<-xtabs(~Job+Purpose,data=cr)
mytable77
##    Purpose
## Job business car domestic appliances education furniture/equipment
##   0        2  12                   1         1                   1
##   1       20  66                   1        15                  33
##   2       60 190                  10        35                 126
##   3       15  69                   0         8                  21
##    Purpose
## Job radio/TV repairs vacation/others
##   0        2       2               1
##   1       57       7               1
##   2      195      13               1
##   3       26       0               9
margin.table(mytable77,1)
## Job
##   0   1   2   3 
##  22 200 630 148
prop.table(mytable77,1)
##    Purpose
## Job    business         car domestic appliances   education
##   0 0.090909091 0.545454545         0.045454545 0.045454545
##   1 0.100000000 0.330000000         0.005000000 0.075000000
##   2 0.095238095 0.301587302         0.015873016 0.055555556
##   3 0.101351351 0.466216216         0.000000000 0.054054054
##    Purpose
## Job furniture/equipment    radio/TV     repairs vacation/others
##   0         0.045454545 0.090909091 0.090909091     0.045454545
##   1         0.165000000 0.285000000 0.035000000     0.005000000
##   2         0.200000000 0.309523810 0.020634921     0.001587302
##   3         0.141891892 0.175675676 0.000000000     0.060810811
margin.table(mytable77,2)
## Purpose
##            business                 car domestic appliances 
##                  97                 337                  12 
##           education furniture/equipment            radio/TV 
##                  59                 181                 280 
##             repairs     vacation/others 
##                  22                  12
prop.table(mytable77,2)
##    Purpose
## Job    business         car domestic appliances   education
##   0 0.020618557 0.035608309         0.083333333 0.016949153
##   1 0.206185567 0.195845697         0.083333333 0.254237288
##   2 0.618556701 0.563798220         0.833333333 0.593220339
##   3 0.154639175 0.204747774         0.000000000 0.135593220
##    Purpose
## Job furniture/equipment    radio/TV     repairs vacation/others
##   0         0.005524862 0.007142857 0.090909091     0.083333333
##   1         0.182320442 0.203571429 0.318181818     0.083333333
##   2         0.696132597 0.696428571 0.590909091     0.083333333
##   3         0.116022099 0.092857143 0.000000000     0.750000000
addmargins(prop.table(mytable77))
##      Purpose
## Job   business   car domestic appliances education furniture/equipment
##   0      0.002 0.012               0.001     0.001               0.001
##   1      0.020 0.066               0.001     0.015               0.033
##   2      0.060 0.190               0.010     0.035               0.126
##   3      0.015 0.069               0.000     0.008               0.021
##   Sum    0.097 0.337               0.012     0.059               0.181
##      Purpose
## Job   radio/TV repairs vacation/others   Sum
##   0      0.002   0.002           0.001 0.022
##   1      0.057   0.007           0.001 0.200
##   2      0.195   0.013           0.001 0.630
##   3      0.026   0.000           0.009 0.148
##   Sum    0.280   0.022           0.012 1.000
mytable777<-xtabs(~Job+Risk,data=cr)
mytable777
##    Risk
## Job bad good
##   0   7   15
##   1  56  144
##   2 186  444
##   3  51   97
margin.table(mytable777,1)
## Job
##   0   1   2   3 
##  22 200 630 148
prop.table(mytable777,1)
##    Risk
## Job       bad      good
##   0 0.3181818 0.6818182
##   1 0.2800000 0.7200000
##   2 0.2952381 0.7047619
##   3 0.3445946 0.6554054
margin.table(mytable777,2)
## Risk
##  bad good 
##  300  700
prop.table(mytable777,2)
##    Risk
## Job        bad       good
##   0 0.02333333 0.02142857
##   1 0.18666667 0.20571429
##   2 0.62000000 0.63428571
##   3 0.17000000 0.13857143
addmargins(prop.table(mytable777))
##      Risk
## Job     bad  good   Sum
##   0   0.007 0.015 0.022
##   1   0.056 0.144 0.200
##   2   0.186 0.444 0.630
##   3   0.051 0.097 0.148
##   Sum 0.300 0.700 1.000
mytable88<-xtabs(~Housing+Saving.accounts,data=cr)
mytable88
##        Saving.accounts
## Housing little moderate quite rich rich
##    free     67       10          6    2
##    own     430       71         45   37
##    rent    106       22         12    9
margin.table(mytable88,1)
## Housing
## free  own rent 
##   85  583  149
prop.table(mytable88,1)
##        Saving.accounts
## Housing     little   moderate quite rich       rich
##    free 0.78823529 0.11764706 0.07058824 0.02352941
##    own  0.73756432 0.12178388 0.07718696 0.06346484
##    rent 0.71140940 0.14765101 0.08053691 0.06040268
margin.table(mytable88,2)
## Saving.accounts
##     little   moderate quite rich       rich 
##        603        103         63         48
prop.table(mytable88,2)
##        Saving.accounts
## Housing     little   moderate quite rich       rich
##    free 0.11111111 0.09708738 0.09523810 0.04166667
##    own  0.71310116 0.68932039 0.71428571 0.77083333
##    rent 0.17578773 0.21359223 0.19047619 0.18750000
addmargins(prop.table(mytable88))
##        Saving.accounts
## Housing      little    moderate  quite rich        rich         Sum
##    free 0.082007344 0.012239902 0.007343941 0.002447980 0.104039168
##    own  0.526315789 0.086903305 0.055079559 0.045287638 0.713586291
##    rent 0.129742962 0.026927785 0.014687882 0.011015912 0.182374541
##    Sum  0.738066095 0.126070991 0.077111383 0.058751530 1.000000000
mytable99<-xtabs(~Housing+Purpose,data=cr)
mytable99
##        Purpose
## Housing business car domestic appliances education furniture/equipment
##    free        5  55                   0        15                  11
##    own        76 219                  10        34                 122
##    rent       16  63                   2        10                  48
##        Purpose
## Housing radio/TV repairs vacation/others
##    free       15       3               4
##    own       227      17               8
##    rent       38       2               0
margin.table(mytable99,1)
## Housing
## free  own rent 
##  108  713  179
prop.table(mytable99,1)
##        Purpose
## Housing   business        car domestic appliances  education
##    free 0.04629630 0.50925926          0.00000000 0.13888889
##    own  0.10659187 0.30715288          0.01402525 0.04768583
##    rent 0.08938547 0.35195531          0.01117318 0.05586592
##        Purpose
## Housing furniture/equipment   radio/TV    repairs vacation/others
##    free          0.10185185 0.13888889 0.02777778      0.03703704
##    own           0.17110799 0.31837307 0.02384292      0.01122020
##    rent          0.26815642 0.21229050 0.01117318      0.00000000
margin.table(mytable99,2)
## Purpose
##            business                 car domestic appliances 
##                  97                 337                  12 
##           education furniture/equipment            radio/TV 
##                  59                 181                 280 
##             repairs     vacation/others 
##                  22                  12
prop.table(mytable99,2)
##        Purpose
## Housing   business        car domestic appliances  education
##    free 0.05154639 0.16320475          0.00000000 0.25423729
##    own  0.78350515 0.64985163          0.83333333 0.57627119
##    rent 0.16494845 0.18694362          0.16666667 0.16949153
##        Purpose
## Housing furniture/equipment   radio/TV    repairs vacation/others
##    free          0.06077348 0.05357143 0.13636364      0.33333333
##    own           0.67403315 0.81071429 0.77272727      0.66666667
##    rent          0.26519337 0.13571429 0.09090909      0.00000000
addmargins(prop.table(mytable99))
##        Purpose
## Housing business   car domestic appliances education furniture/equipment
##    free    0.005 0.055               0.000     0.015               0.011
##    own     0.076 0.219               0.010     0.034               0.122
##    rent    0.016 0.063               0.002     0.010               0.048
##    Sum     0.097 0.337               0.012     0.059               0.181
##        Purpose
## Housing radio/TV repairs vacation/others   Sum
##    free    0.015   0.003           0.004 0.108
##    own     0.227   0.017           0.008 0.713
##    rent    0.038   0.002           0.000 0.179
##    Sum     0.280   0.022           0.012 1.000
mytable999<-xtabs(~Housing+Risk,data=cr)
mytable999
##        Risk
## Housing bad good
##    free  44   64
##    own  186  527
##    rent  70  109
margin.table(mytable999,1)
## Housing
## free  own rent 
##  108  713  179
prop.table(mytable999,1)
##        Risk
## Housing       bad      good
##    free 0.4074074 0.5925926
##    own  0.2608696 0.7391304
##    rent 0.3910615 0.6089385
margin.table(mytable999,2)
## Risk
##  bad good 
##  300  700
prop.table(mytable999,2)
##        Risk
## Housing        bad       good
##    free 0.14666667 0.09142857
##    own  0.62000000 0.75285714
##    rent 0.23333333 0.15571429
addmargins(prop.table(mytable999))
##        Risk
## Housing   bad  good   Sum
##    free 0.044 0.064 0.108
##    own  0.186 0.527 0.713
##    rent 0.070 0.109 0.179
##    Sum  0.300 0.700 1.000
mytable100<-xtabs(~Saving.accounts+Purpose,data=cr)
mytable100
##                Purpose
## Saving.accounts business car domestic appliances education
##      little           56 188                   6        34
##      moderate         17  39                   1         5
##      quite rich        4  18                   2         3
##      rich              6  18                   0         2
##                Purpose
## Saving.accounts furniture/equipment radio/TV repairs vacation/others
##      little                     128      169      14               8
##      moderate                     9       27       3               2
##      quite rich                  12       23       1               0
##      rich                        11        9       2               0
margin.table(mytable100,1)
## Saving.accounts
##     little   moderate quite rich       rich 
##        603        103         63         48
prop.table(mytable100,1)
##                Purpose
## Saving.accounts    business         car domestic appliances   education
##      little     0.092868988 0.311774461         0.009950249 0.056384743
##      moderate   0.165048544 0.378640777         0.009708738 0.048543689
##      quite rich 0.063492063 0.285714286         0.031746032 0.047619048
##      rich       0.125000000 0.375000000         0.000000000 0.041666667
##                Purpose
## Saving.accounts furniture/equipment    radio/TV     repairs
##      little             0.212271973 0.280265340 0.023217247
##      moderate           0.087378641 0.262135922 0.029126214
##      quite rich         0.190476190 0.365079365 0.015873016
##      rich               0.229166667 0.187500000 0.041666667
##                Purpose
## Saving.accounts vacation/others
##      little         0.013266998
##      moderate       0.019417476
##      quite rich     0.000000000
##      rich           0.000000000
margin.table(mytable100,2)
## Purpose
##            business                 car domestic appliances 
##                  83                 263                   9 
##           education furniture/equipment            radio/TV 
##                  44                 160                 228 
##             repairs     vacation/others 
##                  20                  10
prop.table(mytable100,2)
##                Purpose
## Saving.accounts   business        car domestic appliances  education
##      little     0.67469880 0.71482890          0.66666667 0.77272727
##      moderate   0.20481928 0.14828897          0.11111111 0.11363636
##      quite rich 0.04819277 0.06844106          0.22222222 0.06818182
##      rich       0.07228916 0.06844106          0.00000000 0.04545455
##                Purpose
## Saving.accounts furniture/equipment   radio/TV    repairs vacation/others
##      little              0.80000000 0.74122807 0.70000000      0.80000000
##      moderate            0.05625000 0.11842105 0.15000000      0.20000000
##      quite rich          0.07500000 0.10087719 0.05000000      0.00000000
##      rich                0.06875000 0.03947368 0.10000000      0.00000000
addmargins(prop.table(mytable100))
##                Purpose
## Saving.accounts    business         car domestic appliances   education
##      little     0.068543452 0.230110159         0.007343941 0.041615667
##      moderate   0.020807834 0.047735618         0.001223990 0.006119951
##      quite rich 0.004895961 0.022031824         0.002447980 0.003671971
##      rich       0.007343941 0.022031824         0.000000000 0.002447980
##      Sum        0.101591187 0.321909425         0.011015912 0.053855569
##                Purpose
## Saving.accounts furniture/equipment    radio/TV     repairs
##      little             0.156670747 0.206854345 0.017135863
##      moderate           0.011015912 0.033047736 0.003671971
##      quite rich         0.014687882 0.028151775 0.001223990
##      rich               0.013463892 0.011015912 0.002447980
##      Sum                0.195838433 0.279069767 0.024479804
##                Purpose
## Saving.accounts vacation/others         Sum
##      little         0.009791922 0.738066095
##      moderate       0.002447980 0.126070991
##      quite rich     0.000000000 0.077111383
##      rich           0.000000000 0.058751530
##      Sum            0.012239902 1.000000000
mytable101<-xtabs(~Saving.accounts+Risk,data=cr)
mytable101
##                Risk
## Saving.accounts bad good
##      little     217  386
##      moderate    34   69
##      quite rich  11   52
##      rich         6   42
margin.table(mytable101,1)
## Saving.accounts
##     little   moderate quite rich       rich 
##        603        103         63         48
prop.table(mytable101,1)
##                Risk
## Saving.accounts       bad      good
##      little     0.3598673 0.6401327
##      moderate   0.3300971 0.6699029
##      quite rich 0.1746032 0.8253968
##      rich       0.1250000 0.8750000
margin.table(mytable101,2)
## Risk
##  bad good 
##  268  549
prop.table(mytable101,2)
##                Risk
## Saving.accounts        bad       good
##      little     0.80970149 0.70309654
##      moderate   0.12686567 0.12568306
##      quite rich 0.04104478 0.09471767
##      rich       0.02238806 0.07650273
addmargins(prop.table(mytable101))
##                Risk
## Saving.accounts         bad        good         Sum
##      little     0.265605875 0.472460220 0.738066095
##      moderate   0.041615667 0.084455324 0.126070991
##      quite rich 0.013463892 0.063647491 0.077111383
##      rich       0.007343941 0.051407589 0.058751530
##      Sum        0.328029376 0.671970624 1.000000000
mytable102<-xtabs(~Purpose+Risk,data=cr)
mytable102
##                      Risk
## Purpose               bad good
##   business             34   63
##   car                 106  231
##   domestic appliances   4    8
##   education            23   36
##   furniture/equipment  58  123
##   radio/TV             62  218
##   repairs               8   14
##   vacation/others       5    7
margin.table(mytable102,1)
## Purpose
##            business                 car domestic appliances 
##                  97                 337                  12 
##           education furniture/equipment            radio/TV 
##                  59                 181                 280 
##             repairs     vacation/others 
##                  22                  12
prop.table(mytable102,1)
##                      Risk
## Purpose                     bad      good
##   business            0.3505155 0.6494845
##   car                 0.3145401 0.6854599
##   domestic appliances 0.3333333 0.6666667
##   education           0.3898305 0.6101695
##   furniture/equipment 0.3204420 0.6795580
##   radio/TV            0.2214286 0.7785714
##   repairs             0.3636364 0.6363636
##   vacation/others     0.4166667 0.5833333
margin.table(mytable102,2)
## Risk
##  bad good 
##  300  700
prop.table(mytable102,2)
##                      Risk
## Purpose                      bad       good
##   business            0.11333333 0.09000000
##   car                 0.35333333 0.33000000
##   domestic appliances 0.01333333 0.01142857
##   education           0.07666667 0.05142857
##   furniture/equipment 0.19333333 0.17571429
##   radio/TV            0.20666667 0.31142857
##   repairs             0.02666667 0.02000000
##   vacation/others     0.01666667 0.01000000
addmargins(prop.table(mytable102))
##                      Risk
## Purpose                 bad  good   Sum
##   business            0.034 0.063 0.097
##   car                 0.106 0.231 0.337
##   domestic appliances 0.004 0.008 0.012
##   education           0.023 0.036 0.059
##   furniture/equipment 0.058 0.123 0.181
##   radio/TV            0.062 0.218 0.280
##   repairs             0.008 0.014 0.022
##   vacation/others     0.005 0.007 0.012
##   Sum                 0.300 0.700 1.000
#Useful boxplots for the study
boxplot(Age~Checking.account,data=cr,horizontal=TRUE,ylab="Checking.account",xlab="Age",las=1,main="Age v/s Checking account",col=c("red","blue","green"))

boxplot(Age~Saving.accounts,data=cr,horizontal=TRUE,ylab="Savings.account",xlab="Age",las=1,main="Age v/s Savings.account",col=c("red","blue","green","yellow"))

boxplot(Age~Housing,data=cr,horizontal=TRUE,ylab="Housing",xlab="Age",las=1,main="Age v/s Housing",col=c("red","blue","green"))

boxplot(Age~Job,data=cr,horizontal=TRUE,ylab="Job",xlab="Age",las=1,main="Age v/s Job",col=c("red","blue","green","yellow"))

boxplot(Age~Purpose,data=cr,horizontal=TRUE,ylab="Purpose",xlab="Age",las=1,main="Age v/s Purpose",col=c("red","blue","green","yellow","peachpuff","darkred","lightblue","gray60"))

boxplot(Age~Duration,data=cr,horizontal=TRUE,ylab="Duration",xlab="Age",las=1,main="Age v/s Duraction",col=c("red","blue","green","yellow","peachpuff","darkred","lightblue","gray60","dark blue","blue4","burlywood","darkolivegreen","gray50","gray95","darkgoldenrod4","darkkhaki","burlywood1"))

boxplot(Age~Risk,data=cr,horizontal=TRUE,ylab="Risk",xlab="Age",las=1,main="Age v/s Risk",col=c("peachpuff","lightblue"))

boxplot(Duration~Risk,data=cr,horizontal=TRUE,ylab="Risk",xlab="Duration",las=1,main="Duration v/s Risk",col=c("peachpuff","lightblue"))

boxplot(Age~Risk,data=cr,horizontal=TRUE,ylab="Risk",xlab="Age",las=1,main="Age v/s Risk",col=c("pink","purple"))

#Suitable histograms
library(lattice)
## Warning: package 'lattice' was built under R version 3.4.3
histogram(Sex, data=cr,type="count",col=c("darkolivegreen"))
## Warning in histogram.factor(Sex, data = cr, type = "count", col =
## c("darkolivegreen")): explicit 'data' specification ignored

histogram(Job, data=cr,type="count",col=c("burlywood"))
## Warning in histogram.numeric(Job, data = cr, type = "count", col =
## c("burlywood")): explicit 'data' specification ignored

histogram(Housing, data=cr,type="count",col=c("red"))
## Warning in histogram.factor(Housing, data = cr, type = "count", col =
## c("red")): explicit 'data' specification ignored

histogram(Saving.accounts, data=cr,type="count",col=c("blue"))
## Warning in histogram.factor(Saving.accounts, data = cr, type = "count", :
## explicit 'data' specification ignored

histogram(Purpose, data=cr,type="count",col=c("gray50"))
## Warning in histogram.factor(Purpose, data = cr, type = "count", col =
## c("gray50")): explicit 'data' specification ignored

histogram(Duration, data=cr,type="count",col=c("yellow"))
## Warning in histogram.numeric(Duration, data = cr, type = "count", col =
## c("yellow")): explicit 'data' specification ignored

histogram(Credit.amount, data=cr,type="count",col=c("green"))
## Warning in histogram.numeric(Credit.amount, data = cr, type = "count", col
## = c("green")): explicit 'data' specification ignored

histogram(Checking.account, data=cr,type="count",col=c("purple"))
## Warning in histogram.factor(Checking.account, data = cr, type = "count", :
## explicit 'data' specification ignored

histogram(Age, data=cr,type="count",col=c("lightblue"))
## Warning in histogram.numeric(Age, data = cr, type = "count", col =
## c("lightblue")): explicit 'data' specification ignored

histogram(Risk, data=cr,type="count",col=c("pink"))
## Warning in histogram.factor(Risk, data = cr, type = "count", col =
## c("pink")): explicit 'data' specification ignored

#Scatterplot matrix
library(car)
## Warning: package 'car' was built under R version 3.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula=~Age+Sex+Job+Housing+Saving.accounts+Checking.account+Credit.amount+Duration+Purpose+Risk,cex=0.6,data=cr,diagonal="histogram")

#Correlation matrix
cor(cr[,c(1,2,4,8,9)])
##                         X         Age         Job Credit.amount
## X              1.00000000 -0.01009576 -0.02734538    0.01348793
## Age           -0.01009576  1.00000000  0.01567316    0.03271642
## Job           -0.02734538  0.01567316  1.00000000    0.28538533
## Credit.amount  0.01348793  0.03271642  0.28538533    1.00000000
## Duration       0.03078762 -0.03613637  0.21090973    0.62498420
##                  Duration
## X              0.03078762
## Age           -0.03613637
## Job            0.21090973
## Credit.amount  0.62498420
## Duration       1.00000000
#to visualize correlation matrix
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.3
## corrplot 0.84 loaded
corrplot(corr=cor(cr[,c(1,2,4,8,9)],use="complete.obs"),method="ellipse")

#Model1
model1<-Credit.amount~Age+Housing+Purpose+Saving.accounts
fit1<-lm(model1,data=cr)
summary(fit1)
## 
## Call:
## lm(formula = model1, data = cr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8860.4 -1573.4  -665.8   729.1 13198.7 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 5511.791    557.430   9.888  < 2e-16 ***
## Age                           -7.244      8.516  -0.851 0.395265    
## Housingown                 -1340.335    316.890  -4.230 2.61e-05 ***
## Housingrent                -1300.608    375.690  -3.462 0.000565 ***
## Purposecar                  -559.867    326.101  -1.717 0.086392 .  
## Purposedomestic appliances -2511.729    905.713  -2.773 0.005679 ** 
## Purposeeducation           -1349.699    486.338  -2.775 0.005645 ** 
## Purposefurniture/equipment  -963.235    352.386  -2.733 0.006405 ** 
## Purposeradio/TV            -1565.008    331.519  -4.721 2.77e-06 ***
## Purposerepairs             -1336.529    642.178  -2.081 0.037728 *  
## Purposevacation/others      4882.052    865.616   5.640 2.36e-08 ***
## Saving.accountsmoderate       53.588    277.511   0.193 0.846928    
## Saving.accountsquite rich   -387.223    342.283  -1.131 0.258269    
## Saving.accountsrich         -556.073    388.089  -1.433 0.152290    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2573 on 803 degrees of freedom
##   (183 observations deleted due to missingness)
## Multiple R-squared:  0.1273, Adjusted R-squared:  0.1132 
## F-statistic: 9.009 on 13 and 803 DF,  p-value: < 2.2e-16
#Model2
model2<-Credit.amount~Housing+Purpose+Risk
fit2<-lm(model2,data=cr)
summary(fit2)
## 
## Call:
## lm(formula = model2, data = cr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7728.4 -1673.0  -676.2   902.2 13410.0 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  6097.5      388.7  15.689  < 2e-16 ***
## Housingown                  -1507.1      281.0  -5.364 1.01e-07 ***
## Housingrent                 -1558.9      328.1  -4.751 2.32e-06 ***
## Purposecar                   -529.2      306.6  -1.726  0.08469 .  
## Purposedomestic appliances  -2569.0      809.5  -3.173  0.00155 ** 
## Purposeeducation            -1614.4      440.4  -3.666  0.00026 ***
## Purposefurniture/equipment  -1076.5      333.7  -3.226  0.00130 ** 
## Purposeradio/TV             -1575.3      312.6  -5.040 5.53e-07 ***
## Purposerepairs              -1571.7      625.2  -2.514  0.01209 *  
## Purposevacation/others       3567.0      813.5   4.385 1.29e-05 ***
## Riskgood                     -772.1      185.2  -4.169 3.33e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2645 on 989 degrees of freedom
## Multiple R-squared:  0.1308, Adjusted R-squared:  0.122 
## F-statistic: 14.88 on 10 and 989 DF,  p-value: < 2.2e-16
#Model3
model3<-Credit.amount~Housing+Purpose+Risk+Duration
fit3<-lm(model3,data=cr)
summary(fit3)
## 
## Call:
## lm(formula = model3, data = cr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5524.2 -1175.4  -350.5   711.4 13278.4 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  975.029    381.534   2.556  0.01075 *  
## Housingown                  -581.068    229.113  -2.536  0.01136 *  
## Housingrent                 -430.786    267.909  -1.608  0.10816    
## Purposecar                   392.148    249.425   1.572  0.11622    
## Purposedomestic appliances -1229.320    652.702  -1.883  0.05994 .  
## Purposeeducation            -392.581    357.553  -1.098  0.27249    
## Purposefurniture/equipment   -50.829    271.602  -0.187  0.85159    
## Purposeradio/TV             -707.313    253.771  -2.787  0.00542 ** 
## Purposerepairs              -381.068    504.690  -0.755  0.45040    
## Purposevacation/others      3162.203    653.622   4.838 1.52e-06 ***
## Riskgood                     -36.851    152.034  -0.242  0.80853    
## Duration                     138.587      5.935  23.349  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2124 on 988 degrees of freedom
## Multiple R-squared:  0.4399, Adjusted R-squared:  0.4336 
## F-statistic: 70.53 on 11 and 988 DF,  p-value: < 2.2e-16
#Model4
model4<-Credit.amount~Housing+Purpose+Risk+Duration+Job
fit4<-lm(model4,data=cr)
summary(fit4)
## 
## Call:
## lm(formula = model4, data = cr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5927.7 -1118.4  -335.3   726.0 13998.5 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -124.958    420.581  -0.297  0.76645    
## Housingown                  -461.370    226.363  -2.038  0.04180 *  
## Housingrent                 -296.531    264.606  -1.121  0.26271    
## Purposecar                   348.789    245.519   1.421  0.15574    
## Purposedomestic appliances -1202.833    642.199  -1.873  0.06137 .  
## Purposeeducation            -377.915    351.799  -1.074  0.28298    
## Purposefurniture/equipment  -108.021    267.407  -0.404  0.68633    
## Purposeradio/TV             -728.548    249.708  -2.918  0.00361 ** 
## Purposerepairs              -171.283    497.872  -0.344  0.73090    
## Purposevacation/others      2870.544    645.052   4.450 9.56e-06 ***
## Riskgood                     -49.210    149.599  -0.329  0.74227    
## Duration                     132.333      5.939  22.284  < 2e-16 ***
## Job                          608.512    104.926   5.799 8.95e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2090 on 987 degrees of freedom
## Multiple R-squared:  0.4583, Adjusted R-squared:  0.4517 
## F-statistic: 69.59 on 12 and 987 DF,  p-value: < 2.2e-16
#Model5
model5<-Credit.amount~Housing+Purpose+Risk+Duration+Job+Sex
fit5<-lm(model5,data=cr)
summary(fit5)
## 
## Call:
## lm(formula = model5, data = cr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5991.0 -1137.6  -327.3   729.7 13921.4 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -275.971    435.716  -0.633  0.52664    
## Housingown                  -443.519    226.682  -1.957  0.05068 .  
## Housingrent                 -232.489    268.921  -0.865  0.38751    
## Purposecar                   364.662    245.721   1.484  0.13812    
## Purposedomestic appliances -1148.399    643.282  -1.785  0.07453 .  
## Purposeeducation            -336.575    353.060  -0.953  0.34067    
## Purposefurniture/equipment   -72.774    268.637  -0.271  0.78652    
## Purposeradio/TV             -706.748    250.160  -2.825  0.00482 ** 
## Purposerepairs              -165.832    497.702  -0.333  0.73906    
## Purposevacation/others      2898.697    645.162   4.493 7.86e-06 ***
## Riskgood                     -63.812    149.952  -0.426  0.67053    
## Duration                     131.980      5.942  22.210  < 2e-16 ***
## Job                          601.886    105.006   5.732 1.32e-08 ***
## Sexmale                      196.585    148.955   1.320  0.18722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2089 on 986 degrees of freedom
## Multiple R-squared:  0.4593, Adjusted R-squared:  0.4521 
## F-statistic: 64.42 on 13 and 986 DF,  p-value: < 2.2e-16
#Model6
model6<-Credit.amount~Housing+Purpose+Risk+Duration+Job+Checking.account
fit6<-lm(model6,data=cr)
summary(fit6)
## 
## Call:
## lm(formula = model6, data = cr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5758.5 -1267.7  -382.6   810.5 13798.0 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -494.335    584.402  -0.846 0.397962    
## Housingown                  -374.570    299.501  -1.251 0.211559    
## Housingrent                 -246.743    343.070  -0.719 0.472289    
## Purposecar                   339.212    343.276   0.988 0.323477    
## Purposedomestic appliances -1232.977    850.329  -1.450 0.147589    
## Purposeeducation            -383.515    486.818  -0.788 0.431130    
## Purposefurniture/equipment   -84.392    365.535  -0.231 0.817493    
## Purposeradio/TV             -876.706    348.215  -2.518 0.012075 *  
## Purposerepairs                37.699    674.992   0.056 0.955479    
## Purposevacation/others      2858.554    743.232   3.846 0.000133 ***
## Riskgood                      37.753    194.341   0.194 0.846037    
## Duration                     126.778      8.068  15.714  < 2e-16 ***
## Job                          694.721    137.486   5.053  5.8e-07 ***
## Checking.accountmoderate     526.032    199.057   2.643 0.008445 ** 
## Checking.accountrich        -185.448    321.416  -0.577 0.564177    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2239 on 591 degrees of freedom
##   (394 observations deleted due to missingness)
## Multiple R-squared:  0.4495, Adjusted R-squared:  0.4365 
## F-statistic: 34.47 on 14 and 591 DF,  p-value: < 2.2e-16
#T-test
t.test(Credit.amount~Risk,data=cr)
## 
##  Welch Two Sample t-test
## 
## data:  Credit.amount by Risk
## t = 4.2642, df = 421.86, p-value = 2.478e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   513.534 1391.805
## sample estimates:
##  mean in group bad mean in group good 
##           3938.127           2985.457

Inference: I. The important variables for the Credit Risk analysis are: 1. Job 2. Housing 3. Saving.accounts 4. Purpose 5. Duration 6. Risk

The choice of variables have been made by ruuning regression for the variables mentioned above. The ones with significant values are listed. It has also been observed that the variable “Checking.account”" is significant only when it comes to moderate class of people.