library(tidyr)
library(ca)
# import data
data <- read.csv(file = 'pizzafem truncated.csv')
head(data)
## myid RESP_RACE pizza
## 1 2354858 1 NA
## 2 2354859 1 NA
## 3 2354861 4 NA
## 4 2354862 4 2
## 5 2354863 4 2
## 6 2354872 1 NA
data.final=drop_na(data)
data.final.matrix= table(data.final$RESP_RACE, data.final$pizza)
data.final.matrix <- as.matrix(data.final.matrix)
data.final.matrix
##
## 1 2 3 4
## 1 560 508 238 476
## 2 103 76 30 67
## 3 27 33 13 7
## 4 100 165 32 139
contigency.table.raw= as.data.frame.matrix(table(data.final$RESP_RACE, data.final$pizza))
contigency.table.sum= as.data.frame.matrix(addmargins(table(data.final$RESP_RACE, data.final$pizza)))
RACE=c("1.00 WHITE","2.00 BLACK", "3.00 ASIAN","4.00 OTHER","SUM")
contigency.table.sum=cbind(contigency.table.sum, RACE)
contigency.table.sum=contigency.table.sum[,c(6,1,2,3,4,5)]
colnames(contigency.table.sum) <- c("RACE","1.00 pizza hut","2.00 dominos", "3.00 papa johns","4.00 little caesars","SUM")
contigency.table.sum
## RACE 1.00 pizza hut 2.00 dominos 3.00 papa johns 4.00 little caesars
## 1 1.00 WHITE 560 508 238 476
## 2 2.00 BLACK 103 76 30 67
## 3 3.00 ASIAN 27 33 13 7
## 4 4.00 OTHER 100 165 32 139
## Sum SUM 790 782 313 689
## SUM
## 1 1782
## 2 276
## 3 80
## 4 436
## Sum 2574
chisq <- chisq.test(data.final.matrix)
chisq
##
## Pearson's Chi-squared test
##
## data: data.final.matrix
## X-squared = 53.459, df = 9, p-value = 2.392e-08
chisq$observed
##
## 1 2 3 4
## 1 560 508 238 476
## 2 103 76 30 67
## 3 27 33 13 7
## 4 100 165 32 139
round(chisq$expected,2)
##
## 1 2 3 4
## 1 546.92 541.38 216.69 477.00
## 2 84.71 83.85 33.56 73.88
## 3 24.55 24.30 9.73 21.41
## 4 133.82 132.46 53.02 116.71
total.sum=marginSums(contigency.table.raw$`1`+contigency.table.raw$`2`+contigency.table.raw$`3`+contigency.table.raw$`4`)
exp.1.1=marginSums(contigency.table.raw$`1`)*marginSums(as.integer(contigency.table.raw[1,]))/total.sum
exp.1.2=marginSums(contigency.table.raw$`1`)*marginSums(as.integer(contigency.table.raw[2,]))/total.sum
exp.1.3=marginSums(contigency.table.raw$`1`)*marginSums(as.integer(contigency.table.raw[3,]))/total.sum
exp.1.4=marginSums(contigency.table.raw$`1`)*marginSums(as.integer(contigency.table.raw[4,]))/total.sum
exp.2.1=marginSums(contigency.table.raw$`2`)*marginSums(as.integer(contigency.table.raw[1,]))/total.sum
exp.2.2=marginSums(contigency.table.raw$`2`)*marginSums(as.integer(contigency.table.raw[2,]))/total.sum
exp.2.3=marginSums(contigency.table.raw$`2`)*marginSums(as.integer(contigency.table.raw[3,]))/total.sum
exp.2.4=marginSums(contigency.table.raw$`2`)*marginSums(as.integer(contigency.table.raw[4,]))/total.sum
exp.3.1=marginSums(contigency.table.raw$`3`)*marginSums(as.integer(contigency.table.raw[1,]))/total.sum
exp.3.2=marginSums(contigency.table.raw$`3`)*marginSums(as.integer(contigency.table.raw[2,]))/total.sum
exp.3.3=marginSums(contigency.table.raw$`3`)*marginSums(as.integer(contigency.table.raw[3,]))/total.sum
exp.3.4=marginSums(contigency.table.raw$`3`)*marginSums(as.integer(contigency.table.raw[4,]))/total.sum
exp.4.1=marginSums(contigency.table.raw$`4`)*marginSums(as.integer(contigency.table.raw[1,]))/total.sum
exp.4.2=marginSums(contigency.table.raw$`4`)*marginSums(as.integer(contigency.table.raw[2,]))/total.sum
exp.4.3=marginSums(contigency.table.raw$`4`)*marginSums(as.integer(contigency.table.raw[3,]))/total.sum
exp.4.4=marginSums(contigency.table.raw$`4`)*marginSums(as.integer(contigency.table.raw[4,]))/total.sum
exp.col.1=c(exp.1.1,exp.1.2,exp.1.3,exp.1.4)
exp.col.2=c(exp.2.1,exp.2.2,exp.2.3,exp.2.4)
exp.col.3=c(exp.3.1,exp.3.2,exp.3.3,exp.3.4)
exp.col.4=c(exp.4.1,exp.4.2,exp.4.3,exp.4.4)
expected.table.manual=data.frame(exp.col.1,exp.col.2,exp.col.3,exp.col.4)
expected.table.manual
## exp.col.1 exp.col.2 exp.col.3 exp.col.4
## 1 546.92308 541.38462 216.69231 477.00000
## 2 84.70862 83.85082 33.56177 73.87879
## 3 24.55322 24.30458 9.72805 21.41414
## 4 133.81507 132.45998 53.01787 116.70707
chisq.contribution <- chisq$residuals^2
chisq.contribution=round(chisq.contribution, 3)
chisq.contribution=as.matrix(chisq.contribution)
chisq.contribution
##
## 1 2 3 4
## 1 0.313 2.059 2.095 0.002
## 2 3.950 0.735 0.378 0.640
## 3 0.244 3.111 1.100 9.702
## 4 8.545 7.994 8.332 4.258
chisq.contribution.sum= as.data.frame.matrix(addmargins(chisq.contribution))
RACE=c("1.00 WHITE","2.00 BLACK", "3.00 ASIAN","4.00 OTHER","SUM")
chisq.contribution.sum=cbind(chisq.contribution.sum, RACE)
chisq.contribution.sum=chisq.contribution.sum[,c(6,1,2,3,4,5)]
colnames(chisq.contribution.sum) <- c("RACE","1.00 pizza hut","2.00 dominos", "3.00 papa johns","4.00 little caesars","SUM")
chisq.contribution.sum
## RACE 1.00 pizza hut 2.00 dominos 3.00 papa johns 4.00 little caesars
## 1 1.00 WHITE 0.313 2.059 2.095 0.002
## 2 2.00 BLACK 3.950 0.735 0.378 0.640
## 3 3.00 ASIAN 0.244 3.111 1.100 9.702
## 4 4.00 OTHER 8.545 7.994 8.332 4.258
## Sum SUM 13.052 13.899 11.905 14.602
## SUM
## 1 4.469
## 2 5.703
## 3 14.157
## 4 29.129
## Sum 53.458
metrics.result <- ca(data.final.matrix)
# Get mass, inertia and quality metrics for rows
names <- metrics.result$rownames
mass <- metrics.result$rowmass
inertia <- metrics.result$rowinertia
quality <- metrics.result$rowdist
metrics.row.table <- data.frame(names,mass,inertia,quality)
metrics.row.table
## names mass inertia quality
## 1 1 0.69230769 0.001736074 0.05007657
## 2 2 0.10722611 0.002215712 0.14374950
## 3 3 0.03108003 0.005500239 0.42067824
## 4 4 0.16938617 0.011316724 0.25847669
# Get mass, inertia and quality metrics for columns
names <- metrics.result$colnames
mass <- metrics.result$colmass
inertia <- metrics.result$colinertia
quality <- metrics.result$coldist
metrics.col.table <- data.frame(names,mass,inertia,quality)
metrics.col.table
## names mass inertia quality
## 1 1 0.3069153 0.005070425 0.1285325
## 2 2 0.3038073 0.005399546 0.1333151
## 3 3 0.1216006 0.004625416 0.1950327
## 4 4 0.2676768 0.005673361 0.1455844
metrics.result
##
## Principal inertias (eigenvalues):
## 1 2 3
## Value 0.014114 0.005141 0.001514
## Percentage 67.96% 24.75% 7.29%
##
##
## Rows:
## 1 2 3 4
## Mass 0.692308 0.107226 0.031080 0.169386
## ChiDist 0.050077 0.143750 0.420678 0.258477
## Inertia 0.001736 0.002216 0.005500 0.011317
## Dim. 1 -0.343356 -0.793746 -1.389359 2.160742
## Dim. 2 0.310330 0.179879 -5.392331 -0.392816
##
##
## Columns:
## 1 2 3 4
## Mass 0.306915 0.303807 0.121601 0.267677
## ChiDist 0.128532 0.133315 0.195033 0.145584
## Inertia 0.005070 0.005400 0.004625 0.005673
## Dim. 1 -1.017265 0.817238 -1.464276 0.904032
## Dim. 2 0.131312 -1.274237 -0.152200 1.364812
plot(metrics.result)
## Correspondence plot with arrows
plot(metrics.result, mass = TRUE, contrib = "absolute",map =
"rowgreen", arrows = c(TRUE, TRUE))
## Standardized Adjusted Residuals
std.residuals <- chisq$stdres
std.residuals <-as.matrix(std.residuals)
std.residuals
##
## 1 2 3 4
## 1 1.21085174 -3.10006194 2.78426354 -0.09645641
## 2 2.52649621 -1.08749100 -0.69426756 -0.98975998
## 3 0.60256339 2.14752172 1.13711356 -3.69779717
## 4 -3.85269907 3.71801464 -3.37934080 2.64586432