Summarizing Data

Sameer Mathur

Summary Statistics

library(psych)
# descriptive statistics using summary() function
summary(default.dt)
 default    student       balance           income     
 No :9667   No :7056   Min.   :   0.0   Min.   :  772  
 Yes: 333   Yes:2944   1st Qu.: 481.7   1st Qu.:21340  
                       Median : 823.6   Median :34553  
                       Mean   : 835.4   Mean   :33517  
                       3rd Qu.:1166.3   3rd Qu.:43808  
                       Max.   :2654.3   Max.   :73554  

Descriptive Statistics

library(psych)
# descriptive statistics selected columns
describe(default.dt)[, c(1:5, 8:9)]
         vars     n     mean       sd   median    min      max
default*    1 10000     1.03     0.18     1.00   1.00     2.00
student*    2 10000     1.29     0.46     1.00   1.00     2.00
balance     3 10000   835.37   483.71   823.64   0.00  2654.32
income      4 10000 33516.98 13336.64 34552.64 771.97 73554.23

Number of Defaulters by Student

# number of defaulters by student
addmargins(table(default, student))
       student
default    No   Yes   Sum
    No   6850  2817  9667
    Yes   206   127   333
    Sum  7056  2944 10000

Average Income by Defaulters

library(data.table)
# average income and balance by default
default.dt[, .(N = .N,
                   MeanIncome = round(mean(income), 2),
                   MeanBalance = round(mean(balance), 2)),
               by = .(default)]
   default    N MeanIncome MeanBalance
1:      No 9667   33566.17      803.94
2:     Yes  333   32089.15     1747.82

Average Income and Balance by Defaulters and Student

# average income and balance by default and student
default.dt[, .(N = .N,
                   MeanIncome = round(mean(income), 2),
                   MeanBalance = round(mean(balance), 2)),
               by = .(student,default)][order(student,default)]
   student default    N MeanIncome MeanBalance
1:      No      No 6850   39993.52      744.50
2:      No     Yes  206   40625.05     1678.43
3:     Yes      No 2817   17937.01      948.48
4:     Yes     Yes  127   18243.51     1860.38