library(tibble)
library(tidyverse)
library(vtable)
library(ggplot2)
library(gplots)
library(graphics)
library(vcd)
library(corrplot)
FREQUENCY TEST
EXERCISE 3
3.1 Loading packages
3.2 Loading data (housetasks)
# Import the data
<- "http://www.sthda.com/sthda/RDoc/data/housetasks.txt"
file_path <- read.delim(file_path, row.names = 1)
housetasks head(housetasks)
Wife Alternating Husband Jointly
Laundry 156 14 2 4
Main_meal 124 20 5 4
Dinner 77 11 7 13
Breakfeast 82 36 15 7
Tidying 53 11 1 57
Dishes 32 24 4 53
3.3 Converting data to table
# 1. convert the data as a table
<- as.table(as.matrix(housetasks))
dt
# 2. Graph
balloonplot(t(dt), main ="housetasks", xlab ="", ylab="",
label = FALSE, show.margins = FALSE)
3.4 Mosaicplot of the data
# Mosaicplot of the data
mosaicplot(dt, shade = TRUE, las=2,
main = "housetasks")
3.5 Subset of the mosaicplot
# plot just a subset of the table
assoc(head(dt, 5), shade = TRUE, las=3)
3.6 Chi-square test of the data
# 1. Chi-square test of the data
<- chisq.test(housetasks)
chisq chisq
Pearson's Chi-squared test
data: housetasks
X-squared = 1944.5, df = 36, p-value < 2.2e-16
# 2. Observed counts
$observed chisq
Wife Alternating Husband Jointly
Laundry 156 14 2 4
Main_meal 124 20 5 4
Dinner 77 11 7 13
Breakfeast 82 36 15 7
Tidying 53 11 1 57
Dishes 32 24 4 53
Shopping 33 23 9 55
Official 12 46 23 15
Driving 10 51 75 3
Finances 13 13 21 66
Insurance 8 1 53 77
Repairs 0 3 160 2
Holidays 0 1 6 153
# 3. Expected counts
round(chisq$expected,2)
Wife Alternating Husband Jointly
Laundry 60.55 25.63 38.45 51.37
Main_meal 52.64 22.28 33.42 44.65
Dinner 37.16 15.73 23.59 31.52
Breakfeast 48.17 20.39 30.58 40.86
Tidying 41.97 17.77 26.65 35.61
Dishes 38.88 16.46 24.69 32.98
Shopping 41.28 17.48 26.22 35.02
Official 33.03 13.98 20.97 28.02
Driving 47.82 20.24 30.37 40.57
Finances 38.88 16.46 24.69 32.98
Insurance 47.82 20.24 30.37 40.57
Repairs 56.77 24.03 36.05 48.16
Holidays 55.05 23.30 34.95 46.70
3.7 Extraction of the Pearson residual after the chi-square test.
The Pearson residual is crucial in chi-square tests because it helps you understand the relationship between observed and expected frequencies in a contingency table.
While the chi-square statistic indicates whether there’s an association in the data, Pearson residuals help to clarify where these associations are located.
# 1. Extraction of the Pearson residual
round(chisq$residuals, 3)
Wife Alternating Husband Jointly
Laundry 12.266 -2.298 -5.878 -6.609
Main_meal 9.836 -0.484 -4.917 -6.084
Dinner 6.537 -1.192 -3.416 -3.299
Breakfeast 4.875 3.457 -2.818 -5.297
Tidying 1.702 -1.606 -4.969 3.585
Dishes -1.103 1.859 -4.163 3.486
Shopping -1.289 1.321 -3.362 3.376
Official -3.659 8.563 0.443 -2.459
Driving -5.469 6.836 8.100 -5.898
Finances -4.150 -0.852 -0.742 5.750
Insurance -5.758 -4.277 4.107 5.720
Repairs -7.534 -4.290 20.646 -6.651
Holidays -7.419 -4.620 -4.897 15.556
# 2. Visualizing Pearson residual
corrplot(chisq$residuals, is.cor = FALSE)
EXERCISE 4
4.1 Loading packages 2
library(dplyr)
library(tidyr)
library(knitr)
library(gmodels)
4.2 Loading data (mpg)
# summary by group
%>%
mpggroup_by(class, cyl)%>%
summarize(n=n())%>%
kable()
`summarise()` has grouped output by 'class'. You can override using the
`.groups` argument.
class | cyl | n |
---|---|---|
2seater | 8 | 5 |
compact | 4 | 32 |
compact | 5 | 2 |
compact | 6 | 13 |
midsize | 4 | 16 |
midsize | 6 | 23 |
midsize | 8 | 2 |
minivan | 4 | 1 |
minivan | 6 | 10 |
pickup | 4 | 3 |
pickup | 6 | 10 |
pickup | 8 | 20 |
subcompact | 4 | 21 |
subcompact | 5 | 2 |
subcompact | 6 | 7 |
subcompact | 8 | 5 |
suv | 4 | 8 |
suv | 6 | 16 |
suv | 8 | 38 |
4.3 Contingency table of data (mpg)
# 1. Contingency table
<- mpg %>%
mpg_counts group_by(class, cyl) %>%
summarise(n = n(), .groups = "drop") %>%
spread(cyl, n, fill = 0)
# Convert to matrix format for the chi-square test
<- as.matrix(mpg_counts[, -1])
mpg_matrix rownames(mpg_matrix) <- mpg_counts$class
4.4 Chi-square test (mgp)
# 1. Chi-square test of the data
<- chisq.test(mpg_matrix) chisq
Warning in chisq.test(mpg_matrix): Chi-squared approximation may be incorrect
chisq
Pearson's Chi-squared test
data: mpg_matrix
X-squared = 138.03, df = 18, p-value < 2.2e-16
# 2. Observed counts
$observed chisq
4 5 6 8
2seater 0 0 0 5
compact 32 2 13 0
midsize 16 0 23 2
minivan 1 0 10 0
pickup 3 0 10 20
subcompact 21 2 7 5
suv 8 0 16 38
# 3. Expected counts
round(chisq$expected,2)
4 5 6 8
2seater 1.73 0.09 1.69 1.50
compact 16.27 0.80 15.87 14.06
midsize 14.19 0.70 13.84 12.26
minivan 3.81 0.19 3.71 3.29
pickup 11.42 0.56 11.14 9.87
subcompact 12.12 0.60 11.82 10.47
suv 21.46 1.06 20.93 18.55
4.5 Extraction of the Pearson residual (mpg)
# 1. extraction of the Pearson residual
round(chisq$residuals, 3)
4 5 6 8
2seater -1.316 -0.292 -1.299 2.865
compact 3.900 1.335 -0.720 -3.750
midsize 0.480 -0.837 2.462 -2.931
minivan -1.439 -0.434 3.262 -1.814
pickup -2.492 -0.751 -0.342 3.224
subcompact 2.553 1.812 -1.401 -1.691
suv -2.906 -1.029 -1.078 4.517
# 2. Visualizing Pearson residual
corrplot(chisq$residuals, is.cor = FALSE)
4.6 CrossTable of data (mpg)
# Create a crosstable
CrossTable(mpg$class, mpg$cyl)
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 234
| mpg$cyl
mpg$class | 4 | 5 | 6 | 8 | Row Total |
-------------|-----------|-----------|-----------|-----------|-----------|
2seater | 0 | 0 | 0 | 5 | 5 |
| 1.731 | 0.085 | 1.688 | 8.210 | |
| 0.000 | 0.000 | 0.000 | 1.000 | 0.021 |
| 0.000 | 0.000 | 0.000 | 0.071 | |
| 0.000 | 0.000 | 0.000 | 0.021 | |
-------------|-----------|-----------|-----------|-----------|-----------|
compact | 32 | 2 | 13 | 0 | 47 |
| 15.210 | 1.782 | 0.518 | 14.060 | |
| 0.681 | 0.043 | 0.277 | 0.000 | 0.201 |
| 0.395 | 0.500 | 0.165 | 0.000 | |
| 0.137 | 0.009 | 0.056 | 0.000 | |
-------------|-----------|-----------|-----------|-----------|-----------|
midsize | 16 | 0 | 23 | 2 | 41 |
| 0.230 | 0.701 | 6.059 | 8.591 | |
| 0.390 | 0.000 | 0.561 | 0.049 | 0.175 |
| 0.198 | 0.000 | 0.291 | 0.029 | |
| 0.068 | 0.000 | 0.098 | 0.009 | |
-------------|-----------|-----------|-----------|-----------|-----------|
minivan | 1 | 0 | 10 | 0 | 11 |
| 2.070 | 0.188 | 10.641 | 3.291 | |
| 0.091 | 0.000 | 0.909 | 0.000 | 0.047 |
| 0.012 | 0.000 | 0.127 | 0.000 | |
| 0.004 | 0.000 | 0.043 | 0.000 | |
-------------|-----------|-----------|-----------|-----------|-----------|
pickup | 3 | 0 | 10 | 20 | 33 |
| 6.211 | 0.564 | 0.117 | 10.391 | |
| 0.091 | 0.000 | 0.303 | 0.606 | 0.141 |
| 0.037 | 0.000 | 0.127 | 0.286 | |
| 0.013 | 0.000 | 0.043 | 0.085 | |
-------------|-----------|-----------|-----------|-----------|-----------|
subcompact | 21 | 2 | 7 | 5 | 35 |
| 6.515 | 3.284 | 1.963 | 2.858 | |
| 0.600 | 0.057 | 0.200 | 0.143 | 0.150 |
| 0.259 | 0.500 | 0.089 | 0.071 | |
| 0.090 | 0.009 | 0.030 | 0.021 | |
-------------|-----------|-----------|-----------|-----------|-----------|
suv | 8 | 0 | 16 | 38 | 62 |
| 8.444 | 1.060 | 1.162 | 20.403 | |
| 0.129 | 0.000 | 0.258 | 0.613 | 0.265 |
| 0.099 | 0.000 | 0.203 | 0.543 | |
| 0.034 | 0.000 | 0.068 | 0.162 | |
-------------|-----------|-----------|-----------|-----------|-----------|
Column Total | 81 | 4 | 79 | 70 | 234 |
| 0.346 | 0.017 | 0.338 | 0.299 | |
-------------|-----------|-----------|-----------|-----------|-----------|