ANLY 530 - Project

	Decision Tree (C5.0)	Random Forest	Decision Tree (rpart)	Naïve Bayes Classifier	Linear SVM	Polynomial SVM	Radial Basis SVM	k-Nearest Neighbors
Training	75.91%	93.66%	76.96%	76.11%	75.91%	75.91%	75.91%	76.01%
Test	75.57%	75.26%	76.17%	75.57%	75.57%	75.57%	75.57%	76.02%

1. Exploratory Data Analysis

Step 1: Collecting the data

rawdata <- read.csv("~/Documents/HU/ANLY 530-53-B/Project Description/thornton_hiv_got.csv") #*
vars <- c("got", "any", "tinc", "distvct", "male", "age", "hiv2004", "site", "mar", "tb", "thinktreat") #*
data <- rawdata[vars]
colnames(data)[1] <- "target"
data <- na.omit(data)

Step 2: Histogram and descriptive analysis

f1 <- ggplot(data,aes(tinc))+geom_histogram(binwidth=10)+labs(x="Total value of the incentive (kwacha)",y="Count")+ggtitle("Fig.1 Distribution")+theme_classic() #*
f2 <- ggplot(data,aes(distvct))+geom_histogram(binwidth=0.2)+labs(x="Distance (km)",y="Count")+ggtitle("Fig.2 Distribution")+theme_classic() #*
f3 <- ggplot(data,aes(age))+geom_histogram(binwidth=1)+labs(x="Age",y="Count")+ggtitle("Fig.3 Distribution")+theme_classic() #*
f4 <- ggplot(aes(tinc, target), data=aggregate(target~tinc, data=data, mean))+ylim(0,1)+geom_point()+labs(x="Total value of the incentive (kwacha)",y="Mean response")+ggtitle("Fig.4 Scatter plot")+theme_classic() #*
gridExtra::grid.arrange(f1,f2,f3,f4, nrow=2) #*

prop.table(table(data$target)) %>% kable(digits=4) %>% kable_styling(full_width=F)

Var1	Freq
0	0.315
1	0.685

backup <- data
data[c("target", "any", "male", "hiv2004", "site", "mar", "tb", "thinktreat")] <- lapply(data[c("target", "any", "male", "hiv2004", "site", "mar", "tb", "thinktreat")], as.factor) #*
Hmisc::describe(data)

## data 
## 
##  11  Variables      2651  Observations
## --------------------------------------------------------------------------------
## target 
##        n  missing distinct 
##     2651        0        2 
##                       
## Value          0     1
## Frequency    835  1816
## Proportion 0.315 0.685
## --------------------------------------------------------------------------------
## any 
##        n  missing distinct 
##     2651        0        2 
##                       
## Value          0     1
## Frequency    592  2059
## Proportion 0.223 0.777
## --------------------------------------------------------------------------------
## tinc 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     2651        0       27    0.979    105.6      106        0        0 
##      .25      .50      .75      .90      .95 
##       20      100      200      250      300 
## 
## lowest :   0  10  20  30  40, highest: 230 240 250 260 300
## --------------------------------------------------------------------------------
## distvct 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     2651        0     2004        1    2.019    1.422   0.4062   0.5982 
##      .25      .50      .75      .90      .95 
##   1.0288   1.6911   2.8034   3.9795   4.5979 
## 
## lowest : 0.00000000 0.03288736 0.03637984 0.03829136 0.03986998
## highest: 5.13910870 5.14017820 5.18828110 5.18977120 5.19155880
## --------------------------------------------------------------------------------
## male 
##        n  missing distinct 
##     2651        0        2 
##                       
## Value          0     1
## Frequency   1411  1240
## Proportion 0.532 0.468
## --------------------------------------------------------------------------------
## age 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     2651        0       67    0.999    33.32    15.38       16       17 
##      .25      .50      .75      .90      .95 
##       22       32       43       52       57 
## 
## lowest : 11 12 13 14 15, highest: 73 74 75 76 80
## --------------------------------------------------------------------------------
## hiv2004 
##        n  missing distinct 
##     2651        0        3 
##                             
## Value         -1     0     1
## Frequency     12  2469   170
## Proportion 0.005 0.931 0.064
## --------------------------------------------------------------------------------
## site 
##        n  missing distinct 
##     2651        0        3 
##                             
## Value          1     2     3
## Frequency    705  1002   944
## Proportion 0.266 0.378 0.356
## --------------------------------------------------------------------------------
## mar 
##        n  missing distinct 
##     2651        0        2 
##                     
## Value         0    1
## Frequency   770 1881
## Proportion 0.29 0.71
## --------------------------------------------------------------------------------
## tb 
##        n  missing distinct 
##     2651        0        2 
##                       
## Value          0     1
## Frequency   2168   483
## Proportion 0.818 0.182
## --------------------------------------------------------------------------------
## thinktreat 
##        n  missing distinct 
##     2651        0        2 
##                       
## Value          0     1
## Frequency   1747   904
## Proportion 0.659 0.341
## --------------------------------------------------------------------------------

Step 3: Chi-squared Test of Independence

#REMINDER: "for" loop is not recommended in R
N <- ncol(backup)-1
pairs <- c()
for (i in 1:(ncol(backup)-2)) {
  for (j in (i+1):(ncol(backup)-1)) {
    if (chisq.test(table(backup[,i+1],backup[,j+1]))$p.value<0.05) {
      pairs <- c(pairs, c(i,j))
    }
  }
} #O(n²)
#pairs of variables that are rejected by Chi-squared Test of Independence
#such variables in a pair have a significant association
table(pairs)

## pairs
##  1  2  3  4  5  6  7  8  9 10 
##  3  4  2  3  4  3  8  5  5  3

#REMINDER: declaration with dimensions will speed up the execution in R
chiTable <- data.frame(matrix(nrow=N,ncol=N))
for (i in 1:N) {
  for (j in 1:N) {
    if (chisq.test(table(backup[,i+1],backup[,j+1]))$p.value<0.05) {
      chiTable[i,j] <- "⬤"
    } else {
      chiTable[i,j] <- "◯"
    }
  }
}
colnames(chiTable) <- c(1:N)
chiTable$Variable <- c(1:N)
chiTable <- chiTable[,c(N+1,1:N)]
#full results of Chi-squared Test of Independence: ⬤ reject null hypothesis; ◯ fail to reject null hypothesis
chiTable %>% kable() %>% kable_styling(full_width=F)

Variable	1	2	3	4	5	6	7	8	9	10
1	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	◯
2	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤
3	◯	◯	⬤	◯	◯	◯	⬤	⬤	◯	◯
4	◯	◯	◯	⬤	⬤	◯	◯	⬤	⬤	◯
5	◯	◯	◯	⬤	⬤	⬤	⬤	⬤	◯	◯
6	◯	◯	◯	◯	⬤	⬤	⬤	⬤	◯	◯
7	⬤	⬤	⬤	◯	⬤	⬤	⬤	⬤	⬤	⬤
8	◯	◯	⬤	⬤	⬤	⬤	⬤	⬤	◯	◯
9	⬤	⬤	◯	⬤	◯	◯	⬤	◯	⬤	⬤
10	◯	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤

2. Data Wrangling

Randomization in sampling

data[,c(3,4,6)] <- scale(data[,c(3,4,6)]) #*

set.seed(530)
training <- sample(nrow(data), as.integer(nrow(data)*0.75))
dataTraining <- data[training,]
dataTest <- data[-training,]
prop.table(table(dataTraining$target)) %>% kable(digits=4) %>% kable_styling(full_width=F)

Var1	Freq
0	0.3169
1	0.6831

prop.table(table(dataTest$target)) %>% kable(digits=4) %>% kable_styling(full_width=F)

Var1	Freq
0	0.3092
1	0.6908

3. Decision Tree (C5.0) Model

Step 1: Training a model on the data

modelDT <- C5.0(x=dataTraining[-1], y=dataTraining$target)
trainPredDT <- predict(modelDT, dataTraining)
CrossTable(dataTraining$target, trainPredDT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (DT)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1988 
## 
##  
##              | Predicted (DT) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       300 |       330 |       630 | 
##              |     0.151 |     0.166 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       149 |      1209 |      1358 | 
##              |     0.075 |     0.608 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       449 |      1539 |      1988 | 
## -------------|-----------|-----------|-----------|
## 
##

Step 2: Evaluating model performance

testPredDT <- predict(modelDT, dataTest)
CrossTable(dataTest$target, testPredDT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (DT)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  663 
## 
##  
##              | Predicted (DT) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        93 |       112 |       205 | 
##              |     0.140 |     0.169 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        50 |       408 |       458 | 
##              |     0.075 |     0.615 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       143 |       520 |       663 | 
## -------------|-----------|-----------|-----------|
## 
##

4. Random Forest Model

Step 1: Training a model on the data

modelRF <- randomForest(target~., data=dataTraining, importance=T)
trainPredRF <- predict(modelRF, dataTraining)
CrossTable(dataTraining$target, trainPredRF, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (RF)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1988 
## 
##  
##              | Predicted (RF) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       561 |        69 |       630 | 
##              |     0.282 |     0.035 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        57 |      1301 |      1358 | 
##              |     0.029 |     0.654 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       618 |      1370 |      1988 | 
## -------------|-----------|-----------|-----------|
## 
##

Step 2: Evaluating model performance

testPredRF <- predict(modelRF, dataTest)
CrossTable(dataTest$target, testPredRF, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (RF)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  663 
## 
##  
##              | Predicted (RF) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        86 |       119 |       205 | 
##              |     0.130 |     0.179 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        45 |       413 |       458 | 
##              |     0.068 |     0.623 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       131 |       532 |       663 | 
## -------------|-----------|-----------|-----------|
## 
##

5. Decision Tree (rpart) Model

Step 1: Training a model on the data

modelRT <- rpart(target~., data=dataTraining)
trainPredRT <- predict(modelRT, dataTraining, type="class")
CrossTable(dataTraining$target, trainPredRT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (RT)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1988 
## 
##  
##              | Predicted (RT) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       265 |       365 |       630 | 
##              |     0.133 |     0.184 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        93 |      1265 |      1358 | 
##              |     0.047 |     0.636 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       358 |      1630 |      1988 | 
## -------------|-----------|-----------|-----------|
## 
##

rpart.plot(modelRT, digits=4, type=1)

Step 2: Evaluating model performance

testPredRT <- predict(modelRT, dataTest, type="class")
CrossTable(dataTest$target, testPredRT, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (RT)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  663 
## 
##  
##              | Predicted (RT) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        72 |       133 |       205 | 
##              |     0.109 |     0.201 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        25 |       433 |       458 | 
##              |     0.038 |     0.653 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |        97 |       566 |       663 | 
## -------------|-----------|-----------|-----------|
## 
##

6. Naïve Bayes Classifier

Step 1: Training a model on the data

modelNB <- naive_bayes(target~., data=dataTraining)
trainPredNB <- predict(modelNB, dataTraining, type="class")
CrossTable(dataTraining$target, trainPredNB, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (NB)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1988 
## 
##  
##              | Predicted (NB) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       307 |       323 |       630 | 
##              |     0.154 |     0.162 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       152 |      1206 |      1358 | 
##              |     0.076 |     0.607 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       459 |      1529 |      1988 | 
## -------------|-----------|-----------|-----------|
## 
##

Step 2: Evaluating model performance

testPredNB <- predict(modelNB, dataTest, type="class")
CrossTable(dataTest$target, testPredNB, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (NB)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  663 
## 
##  
##              | Predicted (NB) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        94 |       111 |       205 | 
##              |     0.142 |     0.167 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        51 |       407 |       458 | 
##              |     0.077 |     0.614 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       145 |       518 |       663 | 
## -------------|-----------|-----------|-----------|
## 
##

7. Support Vector Machine

Step 1: Training a model on the data

tryKernel <- function(train, test, kern) {
  model <- ksvm(target~., data=train, kernel=kern)
  trainAccuracy <- round((1-model@error)*100,2)
  pred <- predict(model, test)
  table <- table(test$target==pred)
  testAccuracy <- round((table[2]/(table[1]+table[2]))*100,2)
  names(testAccuracy) <- NULL
  return(c(trainAccuracy, testAccuracy))
}

modelSVM <- ksvm(target~., data=dataTraining, kernel="rbfdot")
trainPredSVM <- predict(modelSVM, dataTraining)
CrossTable(dataTraining$target, trainPredSVM, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (SVM)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1988 
## 
##  
##              | Predicted (SVM) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       300 |       330 |       630 | 
##              |     0.151 |     0.166 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       149 |      1209 |      1358 | 
##              |     0.075 |     0.608 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       449 |      1539 |      1988 | 
## -------------|-----------|-----------|-----------|
## 
##

Step 2: Evaluating model performance

result <- data.frame("Accuracy"=c("Training (%)", "Test (%)"),
                     "Linear"=tryKernel(dataTraining,dataTest,kern="vanilladot"),
                     "Polynomial"=tryKernel(dataTraining,dataTest,kern="polydot"),
                     "RBF"=tryKernel(dataTraining,dataTest,kern="rbfdot"))

##  Setting default kernel parameters  
##  Setting default kernel parameters

result %>% kable() %>% kable_styling(full_width=F)

Accuracy	Linear	Polynomial	RBF
Training (%)	75.91	75.91	75.91
Test (%)	75.57	75.57	75.57

testPredSVM <- predict(modelSVM, dataTest)
CrossTable(dataTest$target, testPredSVM, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (SVM)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  663 
## 
##  
##              | Predicted (SVM) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        93 |       112 |       205 | 
##              |     0.140 |     0.169 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        50 |       408 |       458 | 
##              |     0.075 |     0.615 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       143 |       520 |       663 | 
## -------------|-----------|-----------|-----------|
## 
##

8. k-Nearest Neighbors Model

Evaluating model performance

trainPredKNN <- knn(train=dataTraining[-1], test=dataTraining[-1], cl=dataTraining[,1], k=as.integer(sqrt(nrow(dataTraining))))
CrossTable(dataTraining$target, trainPredKNN, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (KNN)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1988 
## 
##  
##              | Predicted (KNN) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |       263 |       367 |       630 | 
##              |     0.132 |     0.185 |           | 
## -------------|-----------|-----------|-----------|
##            1 |       110 |      1248 |      1358 | 
##              |     0.055 |     0.628 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       373 |      1615 |      1988 | 
## -------------|-----------|-----------|-----------|
## 
##

testPredKNN <- knn(train=dataTraining[-1], test=dataTest[-1], cl=dataTraining[,1], k=as.integer(sqrt(nrow(dataTraining))))
CrossTable(dataTest$target, testPredKNN, prop.chisq=F, prop.c=F, prop.r=F, dnn=c("Actual", "Predicted (KNN)"))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  663 
## 
##  
##              | Predicted (KNN) 
##       Actual |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            0 |        79 |       126 |       205 | 
##              |     0.119 |     0.190 |           | 
## -------------|-----------|-----------|-----------|
##            1 |        33 |       425 |       458 | 
##              |     0.050 |     0.641 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       112 |       551 |       663 | 
## -------------|-----------|-----------|-----------|
## 
##

9. Feature Importance

Model 1: Decision Tree (C5.0)

C5imp(modelDT)

##            Overall
## any            100
## tinc             0
## distvct          0
## male             0
## age              0
## hiv2004          0
## site             0
## mar              0
## tb               0
## thinktreat       0

C5imp(modelDT, metric="splits")

##            Overall
## any            100
## tinc             0
## distvct          0
## male             0
## age              0
## hiv2004          0
## site             0
## mar              0
## tb               0
## thinktreat       0

Model 2: Random Forest

importance(modelRF, type=1)

##            MeanDecreaseAccuracy
## any                  22.8513718
## tinc                 39.2694252
## distvct              16.6968812
## male                  0.6593123
## age                   5.4931463
## hiv2004               2.0147891
## site                 22.1241627
## mar                   6.7307767
## tb                    9.6326492
## thinktreat            5.6722017

Model 3: Decision Tree (rpart)

modelRT$variable.importance

##        any       tinc    distvct       site        age 
## 143.115562 143.115562  10.779899   8.532266   0.161306

10. Clustering

dmCommunities <- cluster::daisy(data[-1])
set.seed(530)
data$cluster <- as.factor(kmeans(dmCommunities,2)$cluster)
DT::datatable(data,filter="top")

table(data$cluster)

## 
##    1    2 
##  980 1671

table(data$target)

## 
##    0    1 
##  835 1816

table(data$cluster, data$target)

##    
##        0    1
##   1  479  501
##   2  356 1315

ANLY 530 - Project

Predicting the Chance of Getting HIV Results after being Tested

Zhengxiao Wei

2020-01-31

1. Exploratory Data Analysis

2. Data Wrangling

3. Decision Tree (C5.0) Model

4. Random Forest Model

5. Decision Tree (rpart) Model

6. Naïve Bayes Classifier

7. Support Vector Machine

8. k-Nearest Neighbors Model

9. Feature Importance

10. Clustering

Variable	1	2	3	4	5	6	7	8	9	10
1	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	◯
2	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤
3	◯	◯	⬤	◯	◯	◯	⬤	⬤	◯	◯
4	◯	◯	◯	⬤	⬤	◯	◯	⬤	⬤	◯
5	◯	◯	◯	⬤	⬤	⬤	⬤	⬤	◯	◯
6	◯	◯	◯	◯	⬤	⬤	⬤	⬤	◯	◯
7	⬤	⬤	⬤	◯	⬤	⬤	⬤	⬤	⬤	⬤
8	◯	◯	⬤	⬤	⬤	⬤	⬤	⬤	◯	◯
9	⬤	⬤	◯	⬤	◯	◯	⬤	◯	⬤	⬤
10	◯	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤

Variable	1	2	3	4	5	6	7	8	9	10
1	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	◯
2	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤
3	◯	◯	⬤	◯	◯	◯	⬤	⬤	◯	◯
4	◯	◯	◯	⬤	⬤	◯	◯	⬤	⬤	◯
5	◯	◯	◯	⬤	⬤	⬤	⬤	⬤	◯	◯
6	◯	◯	◯	◯	⬤	⬤	⬤	⬤	◯	◯
7	⬤	⬤	⬤	◯	⬤	⬤	⬤	⬤	⬤	⬤
8	◯	◯	⬤	⬤	⬤	⬤	⬤	⬤	◯	◯
9	⬤	⬤	◯	⬤	◯	◯	⬤	◯	⬤	⬤
10	◯	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤

Variable	1	2	3	4	5	6	7	8	9	10
1	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	◯
2	⬤	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤
3	◯	◯	⬤	◯	◯	◯	⬤	⬤	◯	◯
4	◯	◯	◯	⬤	⬤	◯	◯	⬤	⬤	◯
5	◯	◯	◯	⬤	⬤	⬤	⬤	⬤	◯	◯
6	◯	◯	◯	◯	⬤	⬤	⬤	⬤	◯	◯
7	⬤	⬤	⬤	◯	⬤	⬤	⬤	⬤	⬤	⬤
8	◯	◯	⬤	⬤	⬤	⬤	⬤	⬤	◯	◯
9	⬤	⬤	◯	⬤	◯	◯	⬤	◯	⬤	⬤
10	◯	⬤	◯	◯	◯	◯	⬤	◯	⬤	⬤