table <- matrix (c(26,261,41,171), ncol=2)
colnames(table) <- (c('white','black'))
rownames(table) <- (c('mentor', 'no_mentor'))
chisq.test(table)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table
## X-squared = 10.219, df = 1, p-value = 0.00139
results <- chisq.test(table)
Expected counts
#If we want to see the expected counts, we can type:
results$expected
## white black
## mentor 38.53507 28.46493
## no_mentor 248.46493 183.53507
Conditional distribution
prop.table(table,2) #To see the conditional distribution by column (second dimension of the table):
## white black
## mentor 0.09059233 0.1933962
## no_mentor 0.90940767 0.8066038
#If we wanted the conditional distribution by row (first dimension of the table), we could type:
prop.table(table,1)
## white black
## mentor 0.3880597 0.6119403
## no_mentor 0.6041667 0.3958333
Contribution to chi-squared
#We can also look at which cells in our table contribute the most to a high chi-squared by typing:
results$residuals
## white black
## mentor -2.0192900 2.3494798
## no_mentor 0.7952327 -0.9252673
#This gives the (observed – expected) / sqrt(expected).
#A positive number means that we have more observations than expected in the cell, while a negative number means that we have fewer observations than expected.
#Since we are summing the squares of these residuals to arrive at the chi-squared statistic,
#it is the absolute number that matters for the contribution to the chi-squared statistic: both very low numbers (negative and large) and very high numbers (positive and large) make very large contributions to the chi-squared.
#In this specific example, we can see that the biggest contribution is from the Low / High combination: among low physical activity students, the proportion with high fruit consumption is much lower than expected.