Sameer Mathur
library(psych)
# descriptive statistics using summary() function
summary(default.dt)
default student balance income
No :9667 No :7056 Min. : 0.0 Min. : 772
Yes: 333 Yes:2944 1st Qu.: 481.7 1st Qu.:21340
Median : 823.6 Median :34553
Mean : 835.4 Mean :33517
3rd Qu.:1166.3 3rd Qu.:43808
Max. :2654.3 Max. :73554
library(psych)
# descriptive statistics selected columns
describe(default.dt)[, c(1:5, 8:9)]
vars n mean sd median min max
default* 1 10000 1.03 0.18 1.00 1.00 2.00
student* 2 10000 1.29 0.46 1.00 1.00 2.00
balance 3 10000 835.37 483.71 823.64 0.00 2654.32
income 4 10000 33516.98 13336.64 34552.64 771.97 73554.23
# number of defaulters by student
addmargins(table(default, student))
student
default No Yes Sum
No 6850 2817 9667
Yes 206 127 333
Sum 7056 2944 10000
library(data.table)
# average income and balance by default
default.dt[, .(N = .N,
MeanIncome = round(mean(income), 2),
MeanBalance = round(mean(balance), 2)),
by = .(default)]
default N MeanIncome MeanBalance
1: No 9667 33566.17 803.94
2: Yes 333 32089.15 1747.82
# average income and balance by default and student
default.dt[, .(N = .N,
MeanIncome = round(mean(income), 2),
MeanBalance = round(mean(balance), 2)),
by = .(student,default)][order(student,default)]
student default N MeanIncome MeanBalance
1: No No 6850 39993.52 744.50
2: No Yes 206 40625.05 1678.43
3: Yes No 2817 17937.01 948.48
4: Yes Yes 127 18243.51 1860.38