Solution to assignment @ http://mbaskills.in/wp-content/uploads/2019/06/hiddencode_Assignment_on_Credit_Card_Default_Data_.html

Q.1a) Read data into dataframe

df <- read.csv('DefaultData.csv',header = T)

Q.1b) Convert into datatable

require('data.table')
dt <- data.table(df)

Q.2) Dimensions of the dataframe

dim(df)
## [1] 10000     4

Q.3) List the column names of dataframe

colnames(df)
## [1] "default" "student" "balance" "income"

Q.4) Attach the dataframe

attach(df)

Q.5a) Data structures of columns in dataframe

str(df)
## 'data.frame':    10000 obs. of  4 variables:
##  $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
##  $ balance: num  730 817 1074 529 786 ...
##  $ income : num  44362 12106 31767 35704 38463 ...

Q.5b) Data structures of columns in datatable

str(dt)
## Classes 'data.table' and 'data.frame':   10000 obs. of  4 variables:
##  $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
##  $ balance: num  730 817 1074 529 786 ...
##  $ income : num  44362 12106 31767 35704 38463 ...
##  - attr(*, ".internal.selfref")=<externalptr>

Q.6) Count how many consumers default on their loan

table(default)
## default
##   No  Yes 
## 9667  333
#Alternate
dt[,.(N = .N),by =default]
##    default    N
## 1:      No 9667
## 2:     Yes  333

Q.7) Count how many consumers default on their loan broken down by whether or not they are students

table(default,student)
##        student
## default   No  Yes
##     No  6850 2817
##     Yes  206  127
#Alternate
dt[,.(N = .N), by = .(student,default)]
##    student default    N
## 1:      No      No 6850
## 2:     Yes      No 2817
## 3:     Yes     Yes  127
## 4:      No     Yes  206

Q.8) Contingency table of defaulters broken down by students

con_table <- table(default,student)
addmargins(con_table)
##        student
## default    No   Yes   Sum
##     No   6850  2817  9667
##     Yes   206   127   333
##     Sum  7056  2944 10000

Q.9) Calculate the % of defaulters

round(prop.table(table(default))*100,1)
## default
##   No  Yes 
## 96.7  3.3

Q.10) Calculate mean, standard deviation & variance of income

dt[, .(mean = mean(income), standard_deviation = sd(income), variance = var(income))]
##        mean standard_deviation  variance
## 1: 33516.98           13336.64 177865955

Q.11) Calculate min & max of income rounded to 2 decimal places

dt[, .(min = round(min(income),2), max = round(max(income,2)))]
##       min   max
## 1: 771.97 73554

Q.12a) Print descriptive statistics

require('psych')
describe(df)[,c(1:5,8:9)]
##          vars     n     mean       sd   median    min      max
## default*    1 10000     1.03     0.18     1.00   1.00     2.00
## student*    2 10000     1.29     0.46     1.00   1.00     2.00
## balance     3 10000   835.37   483.71   823.64   0.00  2654.32
## income      4 10000 33516.98 13336.64 34552.64 771.97 73554.23

Q.12b) Interpretation

Q.13a) Average of balance broken down by whether consumers default on their loan

dt[, .(balance = mean(balance)), by = default]
##    default   balance
## 1:      No  803.9438
## 2:     Yes 1747.8217

Q.13b) Histogram of balance

hist(balance, col = 'lightblue')

Q.14) Breakdown of mean & standard deviation of balance

dt[, .(N = .N, MeanBalance = round(mean(balance),2), SDBalance = round(sd(balance),2)), by = .(default, student)]
##    default student    N MeanBalance SDBalance
## 1:      No      No 6850      744.50    445.52
## 2:      No     Yes 2817      948.48    450.55
## 3:     Yes     Yes  127     1860.38    328.74
## 4:     Yes      No  206     1678.43    330.91

Q.15) Create box-plot for balance

boxplot(balance,
        horizontal = T,
        col = 'lightblue',
        main = "Boxplot for balance",xlab = 'balance')

Q.16) Create box-plot for balance, broken down by whether a consumer is a student or not

boxplot(balance ~ student,
        col = c('gray','lightblue'),
        main = "Boxplot for balance, grouped by default (Yes/No)",xlab = "",ylab = "")