Solution to assignment @ http://mbaskills.in/wp-content/uploads/2019/06/hiddencode_Assignment_on_Credit_Card_Default_Data_.html
df <- read.csv('DefaultData.csv',header = T)
require('data.table')
dt <- data.table(df)
dim(df)
## [1] 10000 4
colnames(df)
## [1] "default" "student" "balance" "income"
attach(df)
str(df)
## 'data.frame': 10000 obs. of 4 variables:
## $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
## $ balance: num 730 817 1074 529 786 ...
## $ income : num 44362 12106 31767 35704 38463 ...
str(dt)
## Classes 'data.table' and 'data.frame': 10000 obs. of 4 variables:
## $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
## $ balance: num 730 817 1074 529 786 ...
## $ income : num 44362 12106 31767 35704 38463 ...
## - attr(*, ".internal.selfref")=<externalptr>
table(default)
## default
## No Yes
## 9667 333
#Alternate
dt[,.(N = .N),by =default]
## default N
## 1: No 9667
## 2: Yes 333
table(default,student)
## student
## default No Yes
## No 6850 2817
## Yes 206 127
#Alternate
dt[,.(N = .N), by = .(student,default)]
## student default N
## 1: No No 6850
## 2: Yes No 2817
## 3: Yes Yes 127
## 4: No Yes 206
con_table <- table(default,student)
addmargins(con_table)
## student
## default No Yes Sum
## No 6850 2817 9667
## Yes 206 127 333
## Sum 7056 2944 10000
round(prop.table(table(default))*100,1)
## default
## No Yes
## 96.7 3.3
dt[, .(mean = mean(income), standard_deviation = sd(income), variance = var(income))]
## mean standard_deviation variance
## 1: 33516.98 13336.64 177865955
dt[, .(min = round(min(income),2), max = round(max(income,2)))]
## min max
## 1: 771.97 73554
require('psych')
describe(df)[,c(1:5,8:9)]
## vars n mean sd median min max
## default* 1 10000 1.03 0.18 1.00 1.00 2.00
## student* 2 10000 1.29 0.46 1.00 1.00 2.00
## balance 3 10000 835.37 483.71 823.64 0.00 2654.32
## income 4 10000 33516.98 13336.64 34552.64 771.97 73554.23
dt[, .(balance = mean(balance)), by = default]
## default balance
## 1: No 803.9438
## 2: Yes 1747.8217
hist(balance, col = 'lightblue')
dt[, .(N = .N, MeanBalance = round(mean(balance),2), SDBalance = round(sd(balance),2)), by = .(default, student)]
## default student N MeanBalance SDBalance
## 1: No No 6850 744.50 445.52
## 2: No Yes 2817 948.48 450.55
## 3: Yes Yes 127 1860.38 328.74
## 4: Yes No 206 1678.43 330.91
boxplot(balance,
horizontal = T,
col = 'lightblue',
main = "Boxplot for balance",xlab = 'balance')
boxplot(balance ~ student,
col = c('gray','lightblue'),
main = "Boxplot for balance, grouped by default (Yes/No)",xlab = "",ylab = "")