============================================================================================================
Data source: https://www.aeaweb.org/articles?id=10.1257/0002828042002561

 

Decision Tree (C5.0) Random Forest Decision Tree (rpart) Naïve Bayes Classifier Linear SVM Polynomial SVM Radial Basis SVM k-Nearest Neighbors
Training 91.41% 92.63% 90.19% 79.45% 79.88% 79.93% 88.29% 78.37%
Test 90.22% 91.41% 90.73% 77.64% 77.64% 77.47% 86.48% 76.79%

 

1. Exploratory Data Analysis

 

Step 1: Collecting the data

rawdata <- read.csv("~/Documents/HU/ANLY 525-50-B/525 paper/lakisha_aer.csv")
table(rawdata$firstname, rawdata$race) %>% kable() %>% kable_styling(font_size=11, full_width=F)
b w
Aisha 180 0
Allison 0 232
Anne 0 242
Brad 0 63
Brendan 0 65
Brett 0 59
Carrie 0 168
Darnell 42 0
Ebony 208 0
Emily 0 227
Geoffrey 0 59
Greg 0 51
Hakim 55 0
Jamal 61 0
Jay 0 67
Jermaine 52 0
Jill 0 203
Kareem 64 0
Keisha 183 0
Kenya 196 0
Kristen 0 213
Lakisha 200 0
Latonya 230 0
Latoya 226 0
Laurie 0 195
Leroy 64 0
Matthew 0 67
Meredith 0 187
Neil 0 76
Rasheed 67 0
Sarah 0 193
Tamika 256 0
Tanisha 207 0
Todd 0 68
Tremayne 69 0
Tyrone 75 0
vars <- c("call","h","sex","race","city","education","ofjobs","yearsexp","honors","volunteer","military","empholes","workinschool","email","computerskills","specialskills")
data <- rawdata[vars]
data[vars[-c(3:8)]] <- lapply(data[vars[-c(3:8)]], factor)

 

Step 2: Bar charts, histogram and descriptive analysis

ggplot(data,aes(education))+geom_bar()+labs(x="Education Level",y="Count")+ggtitle("Fig.1. Distribution of education on the resume")+theme_classic()

ggplot(data,aes(ofjobs))+geom_bar()+labs(x="Number of Jobs",y="Count")+ggtitle("Fig.2. Distribution of jobs on the resume")+theme_classic()

ggplot(data,aes(yearsexp))+geom_histogram(binwidth=1)+labs(x="Number of Years",y="Count")+ggtitle("Fig.3. Distribution of work experience on the resume")+theme_classic()

length(unique(rawdata$adid)) #the number of employment ads
## [1] 1323
#table(rawdata$id,rawdata$race,rawdata$h)
length(unique(rawdata$id))
## [1] 289
#table(rawdata$ad,rawdata$race,rawdata$h)
length(unique(rawdata$ad))
## [1] 303
rawdata[rawdata$id==1,vars] %>% kable() %>% kable_styling(font_size=10, full_width=F)
call h sex race city education ofjobs yearsexp honors volunteer military empholes workinschool email computerskills specialskills
179 0 1 m w b 4 6 18 0 1 0 0 0 0 1 0
189 0 0 f b b 4 5 26 0 0 0 0 0 0 0 0
193 0 0 m w b 4 4 8 0 0 0 0 0 0 1 0
198 0 1 f b b 4 4 14 0 1 0 0 1 0 1 1
prop.table(table(data$race, data$call)) %>% kable(digits=4) %>% kable_styling(full_width=F)
0 1
b 0.4678 0.0322
w 0.4517 0.0483
Hmisc::describe(data)
## data 
## 
##  16  Variables      4870  Observations
## --------------------------------------------------------------------------------
## call 
##        n  missing distinct 
##     4870        0        2 
##                     
## Value         0    1
## Frequency  4478  392
## Proportion 0.92 0.08
## --------------------------------------------------------------------------------
## h 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   2424  2446
## Proportion 0.498 0.502
## --------------------------------------------------------------------------------
## sex 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          f     m
## Frequency   3746  1124
## Proportion 0.769 0.231
## --------------------------------------------------------------------------------
## race 
##        n  missing distinct 
##     4870        0        2 
##                     
## Value         b    w
## Frequency  2435 2435
## Proportion  0.5  0.5
## --------------------------------------------------------------------------------
## city 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          b     c
## Frequency   2166  2704
## Proportion 0.445 0.555
## --------------------------------------------------------------------------------
## education 
##        n  missing distinct     Info     Mean      Gmd 
##     4870        0        5    0.619    3.618   0.5941 
## 
## lowest : 0 1 2 3 4, highest: 0 1 2 3 4
##                                         
## Value          0     1     2     3     4
## Frequency     46    40   274  1006  3504
## Proportion 0.009 0.008 0.056 0.207 0.720
## --------------------------------------------------------------------------------
## ofjobs 
##        n  missing distinct     Info     Mean      Gmd 
##     4870        0        7    0.933    3.661    1.336 
## 
## lowest : 1 2 3 4 5, highest: 3 4 5 6 7
##                                                     
## Value          1     2     3     4     5     6     7
## Frequency    110   704  1429  1611   533   464    19
## Proportion 0.023 0.145 0.293 0.331 0.109 0.095 0.004
## --------------------------------------------------------------------------------
## yearsexp 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     4870        0       26    0.989    7.843     5.09        2        3 
##      .25      .50      .75      .90      .95 
##        5        6        9       14       19 
## 
## lowest :  1  2  3  4  5, highest: 22 23 25 26 44
## --------------------------------------------------------------------------------
## honors 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   4613   257
## Proportion 0.947 0.053
## --------------------------------------------------------------------------------
## volunteer 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   2866  2004
## Proportion 0.589 0.411
## --------------------------------------------------------------------------------
## military 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   4397   473
## Proportion 0.903 0.097
## --------------------------------------------------------------------------------
## empholes 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   2688  2182
## Proportion 0.552 0.448
## --------------------------------------------------------------------------------
## workinschool 
##        n  missing distinct 
##     4870        0        2 
##                     
## Value         0    1
## Frequency  2145 2725
## Proportion 0.44 0.56
## --------------------------------------------------------------------------------
## email 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   2536  2334
## Proportion 0.521 0.479
## --------------------------------------------------------------------------------
## computerskills 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency    874  3996
## Proportion 0.179 0.821
## --------------------------------------------------------------------------------
## specialskills 
##        n  missing distinct 
##     4870        0        2 
##                       
## Value          0     1
## Frequency   3269  1601
## Proportion 0.671 0.329
## --------------------------------------------------------------------------------

 

Step 3: Chi-squared Test of Independence

N <- ncol(data)-1 #for the explanatory variables
chiTable <- data.frame(matrix(nrow=N,ncol=N))
for (i in 1:N) {
  for (j in 1:N) {
    if (chisq.test(table(data[,i+1],data[,j+1]))$p.value<0.05) {
      chiTable[i,j] <- "⬤"
    } else {
      chiTable[i,j] <- "◯"
    }
  }
}
colnames(chiTable) <- c(1:N)
chiTable$Variable <- c(1:N)
chiTable <- chiTable[,c(N+1,1:N)]
#full results of Chi-squared Test of Independence: ⬤ reject null hypothesis; ◯ fail to reject null hypothesis
chiTable %>% kable() %>% kable_styling(full_width=F)
Variable 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

 

 

2. Data Wrangling

 

Randomization in sampling
Unbalanced classification problems cause problems to many machine learning algorithms. These problems are characterized by the uneven proportion of the outcome/dependent/target/response variable classes.

The SMOTE (Synthetic Minority Oversampling Technique) function oversamples the rare event by using bootstrapping and k-nearest neighbor to synthetically create additional observations of that event.

The arguments perc.over and perc.under respectively control the amount of over-sampling of the minority class and under-sampling of the majority classes.

rawdata$call <- as.factor(rawdata$call)
set.seed(525)
data <- SMOTE(call~.,rawdata,perc.over=500,perc.under=120)
data <- data[vars[-2]]
rownames(data) <- c(1:nrow(data))

training <- sample(nrow(data), as.integer(nrow(data)*0.75))
dataTraining <- data[training,]
dataTest <- data[-training,]

prop.table(table(dataTraining$call)) %>% kable(digits=4) %>% kable_styling(full_width=F)
Var1 Freq
0 0.5014
1 0.4986
prop.table(table(dataTest$call)) %>% kable(digits=4) %>% kable_styling(full_width=F)
Var1 Freq
0 0.4957
1 0.5043

 

Inquiry about bootstrapping

nrow(na.omit(rawdata))
## [1] 447
nrow(distinct(rawdata))
## [1] 4870
nrow(distinct(data))
## [1] 3008
#minority x2
set.seed(525)
data1 <- SMOTE(call~.,rawdata,perc.over=100)

#minority x2'
set.seed(525)
data2 <- SMOTE(call~.,data,perc.over=100)

#minority x6
set.seed(525)
data3 <- SMOTE(call~.,rawdata,perc.over=500,perc.under=120)

#minority x6'
set.seed(525)
data4 <- SMOTE(call~.,data,perc.over=500,perc.under=120)

nrow(data1)
## [1] 1568
nrow(distinct(data1))
## [1] 1496
nrow(distinct(data2))
## [1] 4713
table(distinct(data1)$call)
## 
##   0   1 
## 712 784
table(distinct(data2)$call)
## 
##    0    1 
## 2767 1946
nrow(data3)
## [1] 4704
nrow(distinct(data3))
## [1] 4176
nrow(distinct(data4))
## [1] 12926
table(distinct(data3)$call)
## 
##    0    1 
## 1827 2349
table(distinct(data4)$call)
## 
##     0     1 
## 10708  2218

 

 

3. Decision Tree (C5.0)

 

Step 1: Training a model on the data

modelDT <- C5.0(x=dataTraining[-1], y=dataTraining$call)
trainPredDT <- predict(modelDT, dataTraining)
CrossTable(dataTraining$call, trainPredDT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  3528 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |      1741 |        28 |      1769 | 
##              |     0.493 |     0.008 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       275 |      1484 |      1759 | 
##              |     0.078 |     0.421 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      2016 |      1512 |      3528 | 
## -------------|-----------|-----------|-----------|
## 
## 

 

Step 2: Evaluating model performance

testPredDT <- predict(modelDT, dataTest)
CrossTable(dataTest$call, testPredDT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1176 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       562 |        21 |       583 | 
##              |     0.478 |     0.018 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        94 |       499 |       593 | 
##              |     0.080 |     0.424 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       656 |       520 |      1176 | 
## -------------|-----------|-----------|-----------|
## 
## 
C5imp(modelDT)
##                Overall
## computerskills  100.00
## honors           61.11
## volunteer        44.44
## workinschool     37.05
## empholes         36.14
## specialskills     3.34
## sex               1.70
## race              0.00
## city              0.00
## education         0.00
## ofjobs            0.00
## yearsexp          0.00
## military          0.00
## email             0.00
C5imp(modelDT, metric="splits")
##                Overall
## honors              30
## specialskills       20
## computerskills      10
## empholes            10
## sex                 10
## volunteer           10
## workinschool        10
## race                 0
## city                 0
## education            0
## ofjobs               0
## yearsexp             0
## military             0
## email                0

 

 

4. Random Forest

 

Step 1: Training a model on the data

set.seed(525)
modelRF <- randomForest(call~., data=dataTraining, importance=T)
trainPredRF <- predict(modelRF, dataTraining)
CrossTable(dataTraining$call, trainPredRF, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  3528 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |      1765 |         4 |      1769 | 
##              |     0.500 |     0.001 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       256 |      1503 |      1759 | 
##              |     0.073 |     0.426 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      2021 |      1507 |      3528 | 
## -------------|-----------|-----------|-----------|
## 
## 

 

Step 2: Evaluating model performance

testPredRF <- predict(modelRF, dataTest)
CrossTable(dataTest$call, testPredRF, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1176 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       577 |         6 |       583 | 
##              |     0.491 |     0.005 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        95 |       498 |       593 | 
##              |     0.081 |     0.423 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       672 |       504 |      1176 | 
## -------------|-----------|-----------|-----------|
## 
## 
importance(modelRF, type=1)
##                MeanDecreaseAccuracy
## sex                        22.13874
## race                       15.69424
## city                       25.79569
## education                  21.95828
## ofjobs                     26.04194
## yearsexp                   32.90873
## honors                     35.43818
## volunteer                  29.83858
## military                   18.32547
## empholes                   30.67743
## workinschool               32.40884
## email                      25.09637
## computerskills             42.97400
## specialskills              29.85340
#getTree(modelRF,labelVar=T)

 

 

5. Decision Tree (rpart)

 

Step 1: Training a model on the data

modelRT <- rpart(call~., data=dataTraining)
trainPredRT <- predict(modelRT, dataTraining, type="class")
CrossTable(dataTraining$call, trainPredRT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  3528 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |      1697 |        72 |      1769 | 
##              |     0.481 |     0.020 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       274 |      1485 |      1759 | 
##              |     0.078 |     0.421 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      1971 |      1557 |      3528 | 
## -------------|-----------|-----------|-----------|
## 
## 
rpart.plot(modelRT, digits=4, type=1)

 

Step 2: Evaluating model performance

testPredRT <- predict(modelRT, dataTest, type="class")
CrossTable(dataTest$call, testPredRT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1176 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       565 |        18 |       583 | 
##              |     0.480 |     0.015 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        91 |       502 |       593 | 
##              |     0.077 |     0.427 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       656 |       520 |      1176 | 
## -------------|-----------|-----------|-----------|
## 
## 
modelRT$variable.importance
## computerskills      volunteer         honors          email         ofjobs 
##      614.20139      404.29854      260.87263      238.43622      230.38539 
##   workinschool       empholes  specialskills      education       yearsexp 
##      214.74672      178.67514      177.61838      124.56380       49.23174

 

 

6. Naïve Bayes Classifier

 

Step 1: Training a model on the data

modelNB <- naive_bayes(call~., data=dataTraining)
trainPredNB <- predict(modelNB, dataTraining, type="class")
CrossTable(dataTraining$call, trainPredNB, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  3528 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |      1494 |       275 |      1769 | 
##              |     0.423 |     0.078 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       450 |      1309 |      1759 | 
##              |     0.128 |     0.371 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      1944 |      1584 |      3528 | 
## -------------|-----------|-----------|-----------|
## 
## 

 

Step 2: Evaluating model performance

testPredNB <- predict(modelNB, dataTest, type="class")
CrossTable(dataTest$call, testPredNB, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1176 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       481 |       102 |       583 | 
##              |     0.409 |     0.087 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       161 |       432 |       593 | 
##              |     0.137 |     0.367 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       642 |       534 |      1176 | 
## -------------|-----------|-----------|-----------|
## 
## 

 

 

7. Support Vector Machine

 

Step 1: Training a model on the data

tryKernel <- function(train, test, kern) {
  set.seed(525)
  model <- ksvm(call~., data=train, kernel=kern)
  trainAccuracy <- round((1-model@error)*100,2)
  pred <- predict(model, test)
  table <- table(test$call==pred)
  testAccuracy <- round((table[2]/(table[1]+table[2]))*100,2)
  names(testAccuracy) <- NULL
  return(c(trainAccuracy, testAccuracy))
}

modelSVM <- ksvm(call~., data=dataTraining)
trainPredSVM <- predict(modelSVM, dataTraining)
CrossTable(dataTraining$call, trainPredSVM, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  3528 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |      1737 |        32 |      1769 | 
##              |     0.492 |     0.009 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       380 |      1379 |      1759 | 
##              |     0.108 |     0.391 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      2117 |      1411 |      3528 | 
## -------------|-----------|-----------|-----------|
## 
## 

 

Step 2: Evaluating model performance

result <- data.frame("Accuracy"=c("Training (%)", "Test (%)"),
                     "Linear"=tryKernel(dataTraining,dataTest,kern="vanilladot"),
                     "Polynomial"=tryKernel(dataTraining,dataTest,kern="polydot"),
                     "RBF"=tryKernel(dataTraining,dataTest,kern="rbfdot"))
##  Setting default kernel parameters  
##  Setting default kernel parameters
result %>% kable() %>% kable_styling(full_width=F)
Accuracy Linear Polynomial RBF
Training (%) 79.88 79.93 88.29
Test (%) 77.64 77.47 86.48
testPredSVM <- predict(modelSVM, dataTest)
CrossTable(dataTest$call, testPredSVM, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1176 
## 
##  
##              | Predicted 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       561 |        22 |       583 | 
##              |     0.477 |     0.019 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       137 |       456 |       593 | 
##              |     0.116 |     0.388 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       698 |       478 |      1176 | 
## -------------|-----------|-----------|-----------|
## 
## 

 

 

8. k-Nearest Neighbors

 

Evaluating model performance

dataTraining[,-c(6:8)] <- lapply(dataTraining[,-c(6:8)], as.numeric)
dataTest[,-c(6:8)] <- lapply(dataTest[,-c(6:8)], as.numeric)

set.seed(525)
trainPredKNN <- knn(train=dataTraining[-1], test=dataTraining[-1], cl=dataTraining[,1], k=as.integer(sqrt(nrow(dataTraining))))
CrossTable(dataTraining$call, trainPredKNN, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (KNN)"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  3528 
## 
##  
##              | Predicted (KNN) 
##       Actual |         1 |         2 | Row Total | 
## -------------|-----------|-----------|-----------|
##            1 |      1433 |       336 |      1769 | 
##              |     0.406 |     0.095 |           | 
## -------------|-----------|-----------|-----------|
##            2 |       427 |      1332 |      1759 | 
##              |     0.121 |     0.378 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      1860 |      1668 |      3528 | 
## -------------|-----------|-----------|-----------|
## 
## 
testPredKNN <- knn(train=dataTraining[-1], test=dataTest[-1], cl=dataTraining[,1], k=as.integer(sqrt(nrow(dataTraining))))
CrossTable(dataTest$call, testPredKNN, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (KNN)"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1176 
## 
##  
##              | Predicted (KNN) 
##       Actual |         1 |         2 | Row Total | 
## -------------|-----------|-----------|-----------|
##            1 |       467 |       116 |       583 | 
##              |     0.397 |     0.099 |           | 
## -------------|-----------|-----------|-----------|
##            2 |       157 |       436 |       593 | 
##              |     0.134 |     0.371 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       624 |       552 |      1176 | 
## -------------|-----------|-----------|-----------|
## 
##