Load datasets
library(datasets)
library(class)
library(ISLR)
library(ggplot2)
library(reshape2)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(base)
library(gmodels)
Set working directory and read data
setwd("/Users/carolyn.khalil/Desktop/R-Tutorial")
bankcred1 <- read.csv("R-tutorial/Data/BankCred.csv")
bankcred<-bankcred1
summary(bankcred)
## default account_check_status
## Min. :0.0 < 0 DM :274
## 1st Qu.:0.0 >= 200 DM / salary assignments for at least 1 year: 63
## Median :0.0 0 <= ... < 200 DM :269
## Mean :0.3 no checking account :394
## 3rd Qu.:1.0
## Max. :1.0
##
## duration_in_month
## Min. : 4.0
## 1st Qu.:12.0
## Median :18.0
## Mean :20.9
## 3rd Qu.:24.0
## Max. :72.0
##
## credit_history
## all credits at this bank paid back duly : 49
## critical account/ other credits existing (not at this bank):293
## delay in paying off in the past : 88
## existing credits paid back duly till now :530
## no credits taken/ all credits paid back duly : 40
##
##
## purpose credit_amount
## domestic appliances :280 Min. : 250
## car (new) :234 1st Qu.: 1366
## radio/television :181 Median : 2320
## car (used) :103 Mean : 3271
## business : 97 3rd Qu.: 3972
## (vacation - does not exist?): 50 Max. :18424
## (Other) : 55
## savings present_emp_since
## .. >= 1000 DM : 48 .. >= 7 years :253
## ... < 100 DM :603 ... < 1 year :172
## 100 <= ... < 500 DM :103 1 <= ... < 4 years:339
## 500 <= ... < 1000 DM : 63 4 <= ... < 7 years:174
## unknown/ no savings account:183 unemployed : 62
##
##
## installment_as_income_perc personal_status_sex
## Min. :1.000 female : divorced/separated/married:310
## 1st Qu.:2.000 male : divorced/separated : 50
## Median :3.000 male : married/widowed : 92
## Mean :2.973 male : single :548
## 3rd Qu.:4.000
## Max. :4.000
##
## other_debtors present_res_since
## co-applicant: 41 Min. :1.000
## guarantor : 52 1st Qu.:2.000
## none :907 Median :3.000
## Mean :2.845
## 3rd Qu.:4.000
## Max. :4.000
##
## property
## if not A121 : building society savings agreement/ life insurance:232
## if not A121/A122 : car or other, not in attribute 6 :332
## real estate :282
## unknown / no property :154
##
##
##
## age other_installment_plans housing credits_this_bank
## Min. :19.00 bank :139 for free:108 Min. :1.000
## 1st Qu.:27.00 none :814 own :713 1st Qu.:1.000
## Median :33.00 stores: 47 rent :179 Median :1.000
## Mean :35.55 Mean :1.407
## 3rd Qu.:42.00 3rd Qu.:2.000
## Max. :75.00 Max. :4.000
##
## job
## management/ self-employed/ highly qualified employee/ officer:148
## skilled employee / official :630
## unemployed/ unskilled - non-resident : 22
## unskilled - resident :200
##
##
##
## people_under_maintenance telephone
## Min. :1.000 none :596
## 1st Qu.:1.000 yes, registered under the customers name :404
## Median :1.000
## Mean :1.155
## 3rd Qu.:1.000
## Max. :2.000
##
## foreign_worker
## no : 37
## yes:963
##
##
##
##
##
Recode qualitative variables
Recode account check status
bankcred$account_check_status= recode(bankcred$account_check_status,"< 0 DM"=1, "0 <= ... < 200 DM"=2, "no checking account"=3, ">= 200 DM / salary assignments for at least 1 year"=4)
Recode credit history
bankcred$credit_history= recode(bankcred$credit_history, "critical account/ other credits existing (not at this bank)"=1,
"existing credits paid back duly till now"=2, "delay in paying off in the past"=3,
"no credits taken/ all credits paid back duly"=4, "all credits at this bank paid back duly" = 5)
Recode purpose #other
bankcred$purpose= recode(bankcred$purpose, "domestic appliances"=1, "(vacation - does not exist?)"=2,
"radio/television"=3, "car (new)"=4, "business" = 5, "car (used)"=6,"repairs"=7,
"retraining"=8,"education"=9, "furniture/equipment"=10, )
Recode savings
summary(bankcred$savings)
## .. >= 1000 DM ... < 100 DM
## 48 603
## 100 <= ... < 500 DM 500 <= ... < 1000 DM
## 103 63
## unknown/ no savings account
## 183
bankcred$savings= recode(bankcred$savings, ".. >= 1000 DM"=1, "... < 100 DM"=2, "100 <= ... < 500 DM"=3,
"500 <= ... < 1000 DM"=4, "unknown/ no savings account"=5)
Recode present emp since
bankcred$present_emp_since= recode(bankcred$present_emp_since, ".. >= 7 years"=1, "... < 1 year "=2,
"1 <= ... < 4 years"=3, "4 <= ... < 7 years"=4, "unemployed"=5)
Recode personal status and sex
bankcred$personal_status_sex= recode(bankcred$personal_status_sex, "female : divorced/separated/married"=1,
"male : single"=2, "male : divorced/separated"=3, "male : married/widowed"=4)
Recode other debtors
bankcred$other_debtors= recode(bankcred$other_debtors, "guarantor"=1, "co-applicant"=2, "none"=3)
Recode property
bankcred$property= recode(bankcred$property, "real estate"=1, "if not A121 : building society savings agreement/ life insurance"=2,
"if not A121/A122 : car or other, not in attribute 6"=3, "unknown / no property"=4)
Recode other installments
bankcred$other_installment_plans= recode(bankcred$other_installment_plans, "bank"=1, "stores"=2, "none"=3)
Recode housing
bankcred$housing= recode(bankcred$housing, "for free"=1, "own"=2, "rent"=3)
Recode job
bankcred$job= recode(bankcred$job, "management/ self-employed/ highly qualified employee/ officer"=1, "skilled employee / official"=2,
"unemployed/ unskilled - non-resident"=3, "unskilled - resident"=4)
Recode telephone
bankcred$telephone= recode(bankcred$telephone, "none"=1, "yes, registered under the customers name "=2)
Recode foreign worker
bankcred$foreign_worker= recode(bankcred$foreign_worker, "no"=1, "yes"=2)
View data
head(bankcred)
## default account_check_status duration_in_month credit_history purpose
## 1 0 1 6 1 1
## 2 1 2 48 2 1
## 3 0 3 12 1 2
## 4 0 1 42 2 3
## 5 1 1 24 3 4
## 6 0 3 36 2 2
## credit_amount savings present_emp_since installment_as_income_perc
## 1 1169 5 1 4
## 2 5951 2 3 2
## 3 2096 2 4 2
## 4 7882 2 4 2
## 5 4870 2 3 3
## 6 9055 5 3 2
## personal_status_sex other_debtors present_res_since property age
## 1 2 3 4 1 67
## 2 1 3 2 1 22
## 3 2 3 3 1 49
## 4 2 1 4 2 45
## 5 2 3 4 4 53
## 6 2 3 4 4 35
## other_installment_plans housing credits_this_bank job
## 1 3 2 2 2
## 2 3 2 1 2
## 3 3 2 1 4
## 4 3 1 1 2
## 5 3 1 2 2
## 6 3 1 1 4
## people_under_maintenance telephone foreign_worker
## 1 1 2 2
## 2 1 1 2
## 3 2 1 2
## 4 2 1 2
## 5 2 1 2
## 6 2 2 2
summary(bankcred)
## default account_check_status duration_in_month credit_history
## Min. :0.0 Min. :1.000 Min. : 4.0 Min. :1.000
## 1st Qu.:0.0 1st Qu.:1.000 1st Qu.:12.0 1st Qu.:1.000
## Median :0.0 Median :2.000 Median :18.0 Median :2.000
## Mean :0.3 Mean :2.246 Mean :20.9 Mean :2.022
## 3rd Qu.:1.0 3rd Qu.:3.000 3rd Qu.:24.0 3rd Qu.:2.000
## Max. :1.0 Max. :4.000 Max. :72.0 Max. :5.000
##
## purpose credit_amount savings present_emp_since
## Min. : 1.000 Min. : 250 Min. :2.000 Min. :1.00
## 1st Qu.: 1.000 1st Qu.: 1366 1st Qu.:2.000 1st Qu.:1.00
## Median : 3.000 Median : 2320 Median :2.000 Median :3.00
## Mean : 3.436 Mean : 3271 Mean :2.733 Mean :2.62
## 3rd Qu.: 5.000 3rd Qu.: 3972 3rd Qu.:3.000 3rd Qu.:3.00
## Max. :10.000 Max. :18424 Max. :5.000 Max. :5.00
## NA's :111
## installment_as_income_perc personal_status_sex other_debtors
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:3.000
## Median :3.000 Median :2.000 Median :3.000
## Mean :2.973 Mean :1.924 Mean :2.855
## 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :3.000
##
## present_res_since property age other_installment_plans
## Min. :1.000 Min. :1.000 Min. :19.00 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:27.00 1st Qu.:3.000
## Median :3.000 Median :2.000 Median :33.00 Median :3.000
## Mean :2.845 Mean :2.358 Mean :35.55 Mean :2.675
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:42.00 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :75.00 Max. :3.000
##
## housing credits_this_bank job people_under_maintenance
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :1.000 Median :2.000 Median :1.000
## Mean :2.071 Mean :1.407 Mean :2.274 Mean :1.155
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :3.000 Max. :4.000 Max. :4.000 Max. :2.000
##
## telephone foreign_worker
## Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000
## Median :1.000 Median :2.000
## Mean :1.404 Mean :1.963
## 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000
##
names(bankcred)
## [1] "default" "account_check_status"
## [3] "duration_in_month" "credit_history"
## [5] "purpose" "credit_amount"
## [7] "savings" "present_emp_since"
## [9] "installment_as_income_perc" "personal_status_sex"
## [11] "other_debtors" "present_res_since"
## [13] "property" "age"
## [15] "other_installment_plans" "housing"
## [17] "credits_this_bank" "job"
## [19] "people_under_maintenance" "telephone"
## [21] "foreign_worker"
Delete savings (missing values - cant fix it:( , something is wrong with the labels for 1 and 4 in the recoding ) i tried with an ifelse statement below these two options wont be read (bankcred\(savings<-ifelse(bankcred\)saving==“.. >= 1000 DM”,1,
newbc<-select(bankcred,-savings)
Normalize
normalize <- function(x) {
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}
Norm_dataset <-as.data.frame(lapply(bankcred[2:20],normalize))
head(bankcred)
## default account_check_status duration_in_month credit_history purpose
## 1 0 1 6 1 1
## 2 1 2 48 2 1
## 3 0 3 12 1 2
## 4 0 1 42 2 3
## 5 1 1 24 3 4
## 6 0 3 36 2 2
## credit_amount savings present_emp_since installment_as_income_perc
## 1 1169 5 1 4
## 2 5951 2 3 2
## 3 2096 2 4 2
## 4 7882 2 4 2
## 5 4870 2 3 3
## 6 9055 5 3 2
## personal_status_sex other_debtors present_res_since property age
## 1 2 3 4 1 67
## 2 1 3 2 1 22
## 3 2 3 3 1 49
## 4 2 1 4 2 45
## 5 2 3 4 4 53
## 6 2 3 4 4 35
## other_installment_plans housing credits_this_bank job
## 1 3 2 2 2
## 2 3 2 1 2
## 3 3 2 1 4
## 4 3 1 1 2
## 5 3 1 2 2
## 6 3 1 1 4
## people_under_maintenance telephone foreign_worker
## 1 1 2 2
## 2 1 1 2
## 3 2 1 2
## 4 2 1 2
## 5 2 1 2
## 6 2 2 2
divide data into two sections (1 & 2) with 67% in on and 33% in the other and name data set
set.seed(8)
ind <-sample(2,nrow(newbc), replace=TRUE,prob=c(0.67,0.33))
Call the first set of data with 67% bank.training,
bank.training <-newbc[ind==1,2:20 ]
head(bank.training)
## account_check_status duration_in_month credit_history purpose credit_amount
## 1 1 6 1 1 1169
## 2 2 48 2 1 5951
## 4 1 42 2 3 7882
## 5 1 24 3 4 4870
## 7 3 24 2 3 2835
## 10 2 30 1 4 5234
## present_emp_since installment_as_income_perc personal_status_sex
## 1 1 4 2
## 2 3 2 1
## 4 4 2 2
## 5 3 3 2
## 7 1 3 2
## 10 5 4 4
## other_debtors present_res_since property age other_installment_plans housing
## 1 3 4 1 67 3 2
## 2 3 2 1 22 3 2
## 4 1 4 2 45 3 1
## 5 3 4 4 53 3 1
## 7 3 4 2 53 3 2
## 10 3 2 3 28 3 2
## credits_this_bank job people_under_maintenance telephone foreign_worker
## 1 2 2 1 2 2
## 2 1 2 1 1 2
## 4 1 2 2 1 2
## 5 2 2 2 1 2
## 7 1 2 1 1 2
## 10 2 1 1 1 2
Second set of data (33%)
bank.test <- newbc[ind==2,2:20]
Label training and test set
bank.trainlabels <-newbc[ind==1,1]
print(bank.trainlabels)
## [1] 0 1 0 1 0 1 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0
## [38] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 1
## [75] 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [112] 0 0 1 0 1 1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
## [149] 0 0 0 0 0 0 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1
## [186] 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 1
## [223] 1 1 0 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0
## [260] 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0
## [297] 1 0 0 0 0 1 0 1 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0
## [334] 0 1 1 1 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 0
## [371] 0 1 0 0 1 1 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 1 1 1 0 1
## [408] 1 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 1 1 1 1 1 1
## [445] 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0
## [482] 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 0
## [519] 1 0 0 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 1
## [556] 1 1 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0
## [593] 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1 1 0 0 0 1
## [630] 0 1 0 0 0 0 1 0 1 1 0 1 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0
## [667] 0 0 0 0 0 0 0 1 0
bank.testlabels <- newbc[ind==2,1]
print(bank.testlabels)
## [1] 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## [38] 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0
## [75] 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0
## [112] 1 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1
## [149] 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 1 1 1 0
## [186] 0 0 1 1 0 0 1 1 1 0 1 0 1 0 0 1 1 0 1 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0
## [223] 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1
## [260] 0 0 1 0 0 0 0 1 0 1 0 0 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
## [297] 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0
Run knn with k =10
bank_pred <-knn(train = bank.training, test = bank.test, cl=bank.trainlabels, k=8)
print(bank_pred)
## [1] 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0
## [38] 0 1 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1
## [75] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 1 1 0 0 0
## [112] 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0
## [223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1
## [297] 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 1 0 0
## Levels: 0 1
Join predicted values and original values
merge<- data.frame(bank_pred, bank.testlabels)
names(merge) <-c("Predicted Default", "Observed Default")
merge
## Predicted Default Observed Default
## 1 0 0
## 2 1 0
## 3 1 0
## 4 0 0
## 5 0 1
## 6 1 0
## 7 1 1
## 8 0 0
## 9 0 0
## 10 0 1
## 11 0 0
## 12 0 0
## 13 1 0
## 14 0 0
## 15 0 1
## 16 0 1
## 17 0 1
## 18 0 0
## 19 0 0
## 20 0 1
## 21 1 1
## 22 0 0
## 23 0 0
## 24 0 0
## 25 0 0
## 26 0 0
## 27 1 0
## 28 0 0
## 29 0 0
## 30 0 0
## 31 0 0
## 32 0 0
## 33 1 1
## 34 0 0
## 35 0 0
## 36 0 0
## 37 0 0
## 38 0 0
## 39 1 1
## 40 0 0
## 41 1 1
## 42 0 1
## 43 0 0
## 44 0 0
## 45 0 0
## 46 0 0
## 47 0 1
## 48 1 0
## 49 1 0
## 50 0 0
## 51 0 0
## 52 0 0
## 53 1 0
## 54 0 0
## 55 1 0
## 56 0 1
## 57 0 1
## 58 0 0
## 59 0 1
## 60 0 0
## 61 0 0
## 62 0 0
## 63 0 1
## 64 0 1
## 65 1 0
## 66 0 1
## 67 0 1
## 68 0 1
## 69 1 1
## 70 0 1
## 71 0 0
## 72 0 0
## 73 0 1
## 74 1 0
## 75 1 1
## 76 0 0
## 77 0 1
## 78 0 0
## 79 0 0
## 80 0 0
## 81 0 0
## 82 0 1
## 83 0 0
## 84 0 0
## 85 0 0
## 86 0 0
## 87 0 0
## 88 0 0
## 89 0 0
## 90 0 0
## 91 1 0
## 92 0 1
## 93 1 1
## 94 0 0
## 95 1 0
## 96 0 0
## 97 0 0
## 98 1 0
## 99 0 0
## 100 1 1
## 101 0 0
## 102 0 0
## 103 0 1
## 104 0 0
## 105 0 0
## 106 0 0
## 107 1 0
## 108 1 0
## 109 0 1
## 110 0 0
## 111 0 0
## 112 0 1
## 113 1 0
## 114 0 0
## 115 0 0
## 116 0 0
## 117 0 0
## 118 0 1
## 119 0 0
## 120 0 0
## 121 0 1
## 122 0 1
## 123 1 0
## 124 0 1
## 125 1 0
## 126 0 0
## 127 1 1
## 128 0 1
## 129 0 0
## 130 0 0
## 131 0 0
## 132 0 0
## 133 0 0
## 134 0 0
## 135 0 0
## 136 0 0
## 137 0 1
## 138 0 0
## 139 0 0
## 140 0 1
## 141 1 0
## 142 0 0
## 143 0 0
## 144 0 0
## 145 0 0
## 146 0 0
## 147 0 0
## 148 0 1
## 149 0 1
## 150 0 1
## 151 0 0
## 152 0 1
## 153 0 0
## 154 0 0
## 155 0 0
## 156 0 0
## 157 0 0
## 158 0 0
## 159 0 0
## 160 0 0
## 161 0 1
## 162 1 0
## 163 0 0
## 164 0 0
## 165 0 0
## 166 0 0
## 167 1 0
## 168 0 0
## 169 0 1
## 170 1 0
## 171 0 1
## 172 0 0
## 173 0 0
## 174 0 1
## 175 0 1
## 176 0 0
## 177 1 1
## 178 0 0
## 179 0 1
## 180 0 0
## 181 0 1
## 182 0 1
## 183 0 1
## 184 0 1
## 185 0 0
## 186 0 0
## 187 0 0
## 188 0 1
## 189 0 1
## 190 0 0
## 191 0 0
## 192 0 1
## 193 0 1
## 194 1 1
## 195 0 0
## 196 0 1
## 197 0 0
## 198 0 1
## 199 0 0
## 200 0 0
## 201 0 1
## 202 0 1
## 203 0 0
## 204 0 1
## 205 0 1
## 206 1 0
## 207 1 0
## 208 0 0
## 209 1 1
## 210 0 1
## 211 0 0
## 212 0 0
## 213 0 0
## 214 1 0
## 215 1 1
## 216 0 0
## 217 0 1
## 218 0 0
## 219 1 0
## 220 1 0
## 221 0 0
## 222 0 0
## 223 0 0
## 224 0 0
## 225 0 0
## 226 0 0
## 227 0 0
## 228 0 0
## 229 0 0
## 230 0 0
## 231 0 1
## 232 0 0
## 233 0 0
## 234 0 1
## 235 0 0
## 236 0 0
## 237 0 0
## 238 0 0
## 239 0 1
## 240 0 1
## 241 0 0
## 242 1 1
## 243 0 0
## 244 0 0
## 245 1 1
## 246 0 0
## 247 0 0
## 248 0 0
## 249 0 0
## 250 0 1
## 251 0 1
## 252 0 0
## 253 0 0
## 254 0 0
## 255 0 0
## 256 0 0
## 257 0 0
## 258 0 0
## 259 0 1
## 260 1 0
## 261 0 0
## 262 0 1
## 263 0 0
## 264 1 0
## 265 0 0
## 266 0 0
## 267 0 1
## 268 0 0
## 269 0 1
## 270 0 0
## 271 1 0
## 272 0 1
## 273 0 0
## 274 0 1
## 275 0 1
## 276 0 1
## 277 0 1
## 278 0 1
## 279 1 0
## 280 0 1
## 281 0 0
## 282 0 0
## 283 1 0
## 284 0 0
## 285 0 0
## 286 0 0
## 287 0 0
## 288 0 0
## 289 0 0
## 290 0 0
## 291 0 1
## 292 1 1
## 293 0 0
## 294 0 0
## 295 0 0
## 296 1 0
## 297 0 0
## 298 0 0
## 299 0 1
## 300 0 0
## 301 1 0
## 302 0 1
## 303 0 0
## 304 0 0
## 305 0 0
## 306 0 1
## 307 0 1
## 308 0 0
## 309 0 0
## 310 1 0
## 311 1 0
## 312 0 0
## 313 0 0
## 314 0 0
## 315 0 1
## 316 0 0
## 317 0 1
## 318 1 1
## 319 0 0
## 320 0 0
## 321 1 1
## 322 0 0
## 323 1 0
## 324 0 0
## 325 0 0
print(merge)
## Predicted Default Observed Default
## 1 0 0
## 2 1 0
## 3 1 0
## 4 0 0
## 5 0 1
## 6 1 0
## 7 1 1
## 8 0 0
## 9 0 0
## 10 0 1
## 11 0 0
## 12 0 0
## 13 1 0
## 14 0 0
## 15 0 1
## 16 0 1
## 17 0 1
## 18 0 0
## 19 0 0
## 20 0 1
## 21 1 1
## 22 0 0
## 23 0 0
## 24 0 0
## 25 0 0
## 26 0 0
## 27 1 0
## 28 0 0
## 29 0 0
## 30 0 0
## 31 0 0
## 32 0 0
## 33 1 1
## 34 0 0
## 35 0 0
## 36 0 0
## 37 0 0
## 38 0 0
## 39 1 1
## 40 0 0
## 41 1 1
## 42 0 1
## 43 0 0
## 44 0 0
## 45 0 0
## 46 0 0
## 47 0 1
## 48 1 0
## 49 1 0
## 50 0 0
## 51 0 0
## 52 0 0
## 53 1 0
## 54 0 0
## 55 1 0
## 56 0 1
## 57 0 1
## 58 0 0
## 59 0 1
## 60 0 0
## 61 0 0
## 62 0 0
## 63 0 1
## 64 0 1
## 65 1 0
## 66 0 1
## 67 0 1
## 68 0 1
## 69 1 1
## 70 0 1
## 71 0 0
## 72 0 0
## 73 0 1
## 74 1 0
## 75 1 1
## 76 0 0
## 77 0 1
## 78 0 0
## 79 0 0
## 80 0 0
## 81 0 0
## 82 0 1
## 83 0 0
## 84 0 0
## 85 0 0
## 86 0 0
## 87 0 0
## 88 0 0
## 89 0 0
## 90 0 0
## 91 1 0
## 92 0 1
## 93 1 1
## 94 0 0
## 95 1 0
## 96 0 0
## 97 0 0
## 98 1 0
## 99 0 0
## 100 1 1
## 101 0 0
## 102 0 0
## 103 0 1
## 104 0 0
## 105 0 0
## 106 0 0
## 107 1 0
## 108 1 0
## 109 0 1
## 110 0 0
## 111 0 0
## 112 0 1
## 113 1 0
## 114 0 0
## 115 0 0
## 116 0 0
## 117 0 0
## 118 0 1
## 119 0 0
## 120 0 0
## 121 0 1
## 122 0 1
## 123 1 0
## 124 0 1
## 125 1 0
## 126 0 0
## 127 1 1
## 128 0 1
## 129 0 0
## 130 0 0
## 131 0 0
## 132 0 0
## 133 0 0
## 134 0 0
## 135 0 0
## 136 0 0
## 137 0 1
## 138 0 0
## 139 0 0
## 140 0 1
## 141 1 0
## 142 0 0
## 143 0 0
## 144 0 0
## 145 0 0
## 146 0 0
## 147 0 0
## 148 0 1
## 149 0 1
## 150 0 1
## 151 0 0
## 152 0 1
## 153 0 0
## 154 0 0
## 155 0 0
## 156 0 0
## 157 0 0
## 158 0 0
## 159 0 0
## 160 0 0
## 161 0 1
## 162 1 0
## 163 0 0
## 164 0 0
## 165 0 0
## 166 0 0
## 167 1 0
## 168 0 0
## 169 0 1
## 170 1 0
## 171 0 1
## 172 0 0
## 173 0 0
## 174 0 1
## 175 0 1
## 176 0 0
## 177 1 1
## 178 0 0
## 179 0 1
## 180 0 0
## 181 0 1
## 182 0 1
## 183 0 1
## 184 0 1
## 185 0 0
## 186 0 0
## 187 0 0
## 188 0 1
## 189 0 1
## 190 0 0
## 191 0 0
## 192 0 1
## 193 0 1
## 194 1 1
## 195 0 0
## 196 0 1
## 197 0 0
## 198 0 1
## 199 0 0
## 200 0 0
## 201 0 1
## 202 0 1
## 203 0 0
## 204 0 1
## 205 0 1
## 206 1 0
## 207 1 0
## 208 0 0
## 209 1 1
## 210 0 1
## 211 0 0
## 212 0 0
## 213 0 0
## 214 1 0
## 215 1 1
## 216 0 0
## 217 0 1
## 218 0 0
## 219 1 0
## 220 1 0
## 221 0 0
## 222 0 0
## 223 0 0
## 224 0 0
## 225 0 0
## 226 0 0
## 227 0 0
## 228 0 0
## 229 0 0
## 230 0 0
## 231 0 1
## 232 0 0
## 233 0 0
## 234 0 1
## 235 0 0
## 236 0 0
## 237 0 0
## 238 0 0
## 239 0 1
## 240 0 1
## 241 0 0
## 242 1 1
## 243 0 0
## 244 0 0
## 245 1 1
## 246 0 0
## 247 0 0
## 248 0 0
## 249 0 0
## 250 0 1
## 251 0 1
## 252 0 0
## 253 0 0
## 254 0 0
## 255 0 0
## 256 0 0
## 257 0 0
## 258 0 0
## 259 0 1
## 260 1 0
## 261 0 0
## 262 0 1
## 263 0 0
## 264 1 0
## 265 0 0
## 266 0 0
## 267 0 1
## 268 0 0
## 269 0 1
## 270 0 0
## 271 1 0
## 272 0 1
## 273 0 0
## 274 0 1
## 275 0 1
## 276 0 1
## 277 0 1
## 278 0 1
## 279 1 0
## 280 0 1
## 281 0 0
## 282 0 0
## 283 1 0
## 284 0 0
## 285 0 0
## 286 0 0
## 287 0 0
## 288 0 0
## 289 0 0
## 290 0 0
## 291 0 1
## 292 1 1
## 293 0 0
## 294 0 0
## 295 0 0
## 296 1 0
## 297 0 0
## 298 0 0
## 299 0 1
## 300 0 0
## 301 1 0
## 302 0 1
## 303 0 0
## 304 0 0
## 305 0 0
## 306 0 1
## 307 0 1
## 308 0 0
## 309 0 0
## 310 1 0
## 311 1 0
## 312 0 0
## 313 0 0
## 314 0 0
## 315 0 1
## 316 0 0
## 317 0 1
## 318 1 1
## 319 0 0
## 320 0 0
## 321 1 1
## 322 0 0
## 323 1 0
## 324 0 0
## 325 0 0
Check lengths to make sure they match
length(bank.testlabels)
## [1] 325
length(bank_pred)
## [1] 325
Compute cross table
CrossTable(x= bank_pred, y= bank.testlabels, prop.chisq=TRUE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 325
##
##
## | bank.testlabels
## bank_pred | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 187 | 81 | 268 |
## | 0.012 | 0.026 | |
## | 0.698 | 0.302 | 0.825 |
## | 0.831 | 0.810 | |
## | 0.575 | 0.249 | |
## -------------|-----------|-----------|-----------|
## 1 | 38 | 19 | 57 |
## | 0.054 | 0.122 | |
## | 0.667 | 0.333 | 0.175 |
## | 0.169 | 0.190 | |
## | 0.117 | 0.058 | |
## -------------|-----------|-----------|-----------|
## Column Total | 225 | 100 | 325 |
## | 0.692 | 0.308 | |
## -------------|-----------|-----------|-----------|
##
##
print(CrossTable(x= bank_pred, y= bank.testlabels, prop.chisq=TRUE))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 325
##
##
## | bank.testlabels
## bank_pred | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 187 | 81 | 268 |
## | 0.012 | 0.026 | |
## | 0.698 | 0.302 | 0.825 |
## | 0.831 | 0.810 | |
## | 0.575 | 0.249 | |
## -------------|-----------|-----------|-----------|
## 1 | 38 | 19 | 57 |
## | 0.054 | 0.122 | |
## | 0.667 | 0.333 | 0.175 |
## | 0.169 | 0.190 | |
## | 0.117 | 0.058 | |
## -------------|-----------|-----------|-----------|
## Column Total | 225 | 100 | 325 |
## | 0.692 | 0.308 | |
## -------------|-----------|-----------|-----------|
##
##
## $t
## y
## x 0 1
## 0 187 81
## 1 38 19
##
## $prop.row
## y
## x 0 1
## 0 0.6977612 0.3022388
## 1 0.6666667 0.3333333
##
## $prop.col
## y
## x 0 1
## 0 0.8311111 0.8100000
## 1 0.1688889 0.1900000
##
## $prop.tbl
## y
## x 0 1
## 0 0.57538462 0.24923077
## 1 0.11692308 0.05846154