Untitled

library(datasetsICR)
data(german)
#('credit amount',Duration)'Class Risk'

genderfac <- factor(german$Gender)
housingfac <- factor(german$Housing)
savaccfac <- factor(german$`Saving accounts`)
checkaccfac <- factor(german$`Checking account`)
purposefac <- factor(german$Purpose)

head(german,10)
   Age Gender Housing Saving accounts Checking account Credit amount Duration
1   67   male     own            <NA>           little          1169        6
2   22 female     own          little         moderate          5951       48
3   49   male     own          little             <NA>          2096       12
4   45   male    free          little           little          7882       42
5   53   male    free          little           little          4870       24
6   35   male    free            <NA>             <NA>          9055       36
7   53   male     own      quite rich             <NA>          2835       24
8   35   male    rent          little         moderate          6948       36
9   61   male     own            rich             <NA>          3059       12
10  28   male     own          little         moderate          5234       30
               Purpose Class Risk
1             radio/TV          1
2             radio/TV          2
3            education          1
4  furniture/equipment          1
5                  car          2
6            education          1
7  furniture/equipment          1
8                  car          1
9             radio/TV          1
10                 car          2
tail(german,10)
     Age Gender Housing Saving accounts Checking account Credit amount Duration
991   37   male     own            <NA>             <NA>          3565       12
992   34   male     own        moderate             <NA>          1569       15
993   23   male    rent            <NA>           little          1936       18
994   30   male     own          little           little          3959       36
995   50   male     own            <NA>             <NA>          2390       12
996   31 female     own          little             <NA>          1736       12
997   40   male     own          little           little          3857       30
998   38   male     own          little             <NA>           804       12
999   23   male    free          little           little          1845       45
1000  27   male     own        moderate         moderate          4576       45
                 Purpose Class Risk
991            education          1
992             radio/TV          1
993             radio/TV          1
994  furniture/equipment          1
995                  car          1
996  furniture/equipment          1
997                  car          1
998             radio/TV          1
999             radio/TV          2
1000                 car          1
summary(german)
      Age           Gender            Housing          Saving accounts   
 Min.   :19.00   Length:1000        Length:1000        Length:1000       
 1st Qu.:27.00   Class :character   Class :character   Class :character  
 Median :33.00   Mode  :character   Mode  :character   Mode  :character  
 Mean   :35.55                                                           
 3rd Qu.:42.00                                                           
 Max.   :75.00                                                           
 Checking account   Credit amount      Duration      Purpose         
 Length:1000        Min.   :  250   Min.   : 4.0   Length:1000       
 Class :character   1st Qu.: 1366   1st Qu.:12.0   Class :character  
 Mode  :character   Median : 2320   Median :18.0   Mode  :character  
                    Mean   : 3271   Mean   :20.9                     
                    3rd Qu.: 3972   3rd Qu.:24.0                     
                    Max.   :18424   Max.   :72.0                     
   Class Risk 
 Min.   :1.0  
 1st Qu.:1.0  
 Median :1.0  
 Mean   :1.3  
 3rd Qu.:2.0  
 Max.   :2.0  
table(german$Gender,german$Housing)
        
         free own rent
  female   19 196   95
  male     89 517   84
table(german$Gender,german$`Saving accounts`)
        
         little moderate quite rich rich
  female    194       32         16   19
  male      409       71         47   29
table(german$Gender,german$`Checking account`)
        
         little moderate rich
  female     88       86   20
  male      186      183   43
table(german$Gender,german$Purpose)
        
         business car domestic appliances education furniture/equipment
  female       19  94                   6        24                  74
  male         78 243                   6        35                 107
        
         radio/TV repairs vacation/others
  female       85       5               3
  male        195      17               9
library(ggplot2)
library(datasetsICR)
data(german)
class <- factor(german$`Class Risk`)

ggplot(data = german, mapping = aes(x= `Credit amount`, y= Duration, color= class)) +geom_point() + scale_y_log10()

library(ggplot2)
library(datasetsICR)
data(german)
class <- factor(german$`Class Risk`)


ggplot(data = german, mapping = aes(x= `Credit amount`, y= Duration, color= class)) +geom_smooth() +geom_point()  + scale_x_log10() +labs(title = "Comparison of Credit Amount, Duration, and Class Risk",
     subtitle = "Credit Risk with the duration of the credit",
     caption = "Source: [german] dataset",
     x= "Credit Amount Borrowed(in log base 10)",
     y= "Durations in months(in log base 10)",
     color= "Class(1=good,2=bad)")
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

This graph shows the relationship between the credit amount borrowed, the duration in months of that amount and the class that they are associated with. When looking at the graph we see that the two classes are very similar to each other. There is a lot of overlap in the distribution between the amount borrowed and the duration. Due to this overlap the credit amount and the duration due have a positive relationship but it is not strong enough to predict the Class Risk.