FREQUENCY TEST

Author

Inusa Yawuza Musa(N1349229)

EXERCISE 3

3.1 Loading packages

library(tibble)
library(tidyverse)
library(vtable)
library(ggplot2)
library(gplots)
library(graphics)
library(vcd)
library(corrplot)

3.2 Loading data (housetasks)

# Import the data
file_path <- "http://www.sthda.com/sthda/RDoc/data/housetasks.txt"
housetasks <- read.delim(file_path, row.names = 1)
head(housetasks)
           Wife Alternating Husband Jointly
Laundry     156          14       2       4
Main_meal   124          20       5       4
Dinner       77          11       7      13
Breakfeast   82          36      15       7
Tidying      53          11       1      57
Dishes       32          24       4      53

3.3 Converting data to table

# 1. convert the data as a table
dt <- as.table(as.matrix(housetasks))

# 2. Graph
balloonplot(t(dt), main ="housetasks", xlab ="", ylab="",
            label = FALSE, show.margins = FALSE)

3.4 Mosaicplot of the data

#  Mosaicplot of the data
mosaicplot(dt, shade = TRUE, las=2,
           main = "housetasks")

3.5 Subset of the mosaicplot

# plot just a subset of the table
assoc(head(dt, 5), shade = TRUE, las=3)

3.6 Chi-square test of the data

# 1. Chi-square test of the data
chisq <- chisq.test(housetasks)
chisq

    Pearson's Chi-squared test

data:  housetasks
X-squared = 1944.5, df = 36, p-value < 2.2e-16
# 2. Observed counts
chisq$observed
           Wife Alternating Husband Jointly
Laundry     156          14       2       4
Main_meal   124          20       5       4
Dinner       77          11       7      13
Breakfeast   82          36      15       7
Tidying      53          11       1      57
Dishes       32          24       4      53
Shopping     33          23       9      55
Official     12          46      23      15
Driving      10          51      75       3
Finances     13          13      21      66
Insurance     8           1      53      77
Repairs       0           3     160       2
Holidays      0           1       6     153
# 3. Expected counts
round(chisq$expected,2)
            Wife Alternating Husband Jointly
Laundry    60.55       25.63   38.45   51.37
Main_meal  52.64       22.28   33.42   44.65
Dinner     37.16       15.73   23.59   31.52
Breakfeast 48.17       20.39   30.58   40.86
Tidying    41.97       17.77   26.65   35.61
Dishes     38.88       16.46   24.69   32.98
Shopping   41.28       17.48   26.22   35.02
Official   33.03       13.98   20.97   28.02
Driving    47.82       20.24   30.37   40.57
Finances   38.88       16.46   24.69   32.98
Insurance  47.82       20.24   30.37   40.57
Repairs    56.77       24.03   36.05   48.16
Holidays   55.05       23.30   34.95   46.70

3.7 Extraction of the Pearson residual after the chi-square test.

The Pearson residual is crucial in chi-square tests because it helps you understand the relationship between observed and expected frequencies in a contingency table.

While the chi-square statistic indicates whether there’s an association in the data, Pearson residuals help to clarify where these associations are located.

# 1. Extraction of the Pearson residual
round(chisq$residuals, 3)
             Wife Alternating Husband Jointly
Laundry    12.266      -2.298  -5.878  -6.609
Main_meal   9.836      -0.484  -4.917  -6.084
Dinner      6.537      -1.192  -3.416  -3.299
Breakfeast  4.875       3.457  -2.818  -5.297
Tidying     1.702      -1.606  -4.969   3.585
Dishes     -1.103       1.859  -4.163   3.486
Shopping   -1.289       1.321  -3.362   3.376
Official   -3.659       8.563   0.443  -2.459
Driving    -5.469       6.836   8.100  -5.898
Finances   -4.150      -0.852  -0.742   5.750
Insurance  -5.758      -4.277   4.107   5.720
Repairs    -7.534      -4.290  20.646  -6.651
Holidays   -7.419      -4.620  -4.897  15.556
# 2. Visualizing Pearson residual
corrplot(chisq$residuals, is.cor = FALSE)

EXERCISE 4

4.1 Loading packages 2

library(dplyr)
library(tidyr)
library(knitr)
library(gmodels)

4.2 Loading data (mpg)

# summary by group
mpg%>%
  group_by(class, cyl)%>%
  summarize(n=n())%>%
  kable()
`summarise()` has grouped output by 'class'. You can override using the
`.groups` argument.
class cyl n
2seater 8 5
compact 4 32
compact 5 2
compact 6 13
midsize 4 16
midsize 6 23
midsize 8 2
minivan 4 1
minivan 6 10
pickup 4 3
pickup 6 10
pickup 8 20
subcompact 4 21
subcompact 5 2
subcompact 6 7
subcompact 8 5
suv 4 8
suv 6 16
suv 8 38

4.3 Contingency table of data (mpg)

# 1. Contingency table
mpg_counts <- mpg %>%
  group_by(class, cyl) %>%
  summarise(n = n(), .groups = "drop") %>%
  spread(cyl, n, fill = 0)

# Convert to matrix format for the chi-square test
mpg_matrix <- as.matrix(mpg_counts[, -1])
rownames(mpg_matrix) <- mpg_counts$class

4.4 Chi-square test (mgp)

# 1. Chi-square test of the data
chisq <- chisq.test(mpg_matrix)
Warning in chisq.test(mpg_matrix): Chi-squared approximation may be incorrect
chisq

    Pearson's Chi-squared test

data:  mpg_matrix
X-squared = 138.03, df = 18, p-value < 2.2e-16
# 2. Observed counts
chisq$observed
            4 5  6  8
2seater     0 0  0  5
compact    32 2 13  0
midsize    16 0 23  2
minivan     1 0 10  0
pickup      3 0 10 20
subcompact 21 2  7  5
suv         8 0 16 38
# 3. Expected counts
round(chisq$expected,2)
               4    5     6     8
2seater     1.73 0.09  1.69  1.50
compact    16.27 0.80 15.87 14.06
midsize    14.19 0.70 13.84 12.26
minivan     3.81 0.19  3.71  3.29
pickup     11.42 0.56 11.14  9.87
subcompact 12.12 0.60 11.82 10.47
suv        21.46 1.06 20.93 18.55

4.5 Extraction of the Pearson residual (mpg)

# 1. extraction of the Pearson residual
round(chisq$residuals, 3)
                4      5      6      8
2seater    -1.316 -0.292 -1.299  2.865
compact     3.900  1.335 -0.720 -3.750
midsize     0.480 -0.837  2.462 -2.931
minivan    -1.439 -0.434  3.262 -1.814
pickup     -2.492 -0.751 -0.342  3.224
subcompact  2.553  1.812 -1.401 -1.691
suv        -2.906 -1.029 -1.078  4.517
# 2. Visualizing Pearson residual
corrplot(chisq$residuals, is.cor = FALSE)

4.6 CrossTable of data (mpg)

# Create a crosstable 
CrossTable(mpg$class, mpg$cyl)

 
   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|

 
Total Observations in Table:  234 

 
             | mpg$cyl 
   mpg$class |         4 |         5 |         6 |         8 | Row Total | 
-------------|-----------|-----------|-----------|-----------|-----------|
     2seater |         0 |         0 |         0 |         5 |         5 | 
             |     1.731 |     0.085 |     1.688 |     8.210 |           | 
             |     0.000 |     0.000 |     0.000 |     1.000 |     0.021 | 
             |     0.000 |     0.000 |     0.000 |     0.071 |           | 
             |     0.000 |     0.000 |     0.000 |     0.021 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
     compact |        32 |         2 |        13 |         0 |        47 | 
             |    15.210 |     1.782 |     0.518 |    14.060 |           | 
             |     0.681 |     0.043 |     0.277 |     0.000 |     0.201 | 
             |     0.395 |     0.500 |     0.165 |     0.000 |           | 
             |     0.137 |     0.009 |     0.056 |     0.000 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
     midsize |        16 |         0 |        23 |         2 |        41 | 
             |     0.230 |     0.701 |     6.059 |     8.591 |           | 
             |     0.390 |     0.000 |     0.561 |     0.049 |     0.175 | 
             |     0.198 |     0.000 |     0.291 |     0.029 |           | 
             |     0.068 |     0.000 |     0.098 |     0.009 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
     minivan |         1 |         0 |        10 |         0 |        11 | 
             |     2.070 |     0.188 |    10.641 |     3.291 |           | 
             |     0.091 |     0.000 |     0.909 |     0.000 |     0.047 | 
             |     0.012 |     0.000 |     0.127 |     0.000 |           | 
             |     0.004 |     0.000 |     0.043 |     0.000 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
      pickup |         3 |         0 |        10 |        20 |        33 | 
             |     6.211 |     0.564 |     0.117 |    10.391 |           | 
             |     0.091 |     0.000 |     0.303 |     0.606 |     0.141 | 
             |     0.037 |     0.000 |     0.127 |     0.286 |           | 
             |     0.013 |     0.000 |     0.043 |     0.085 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
  subcompact |        21 |         2 |         7 |         5 |        35 | 
             |     6.515 |     3.284 |     1.963 |     2.858 |           | 
             |     0.600 |     0.057 |     0.200 |     0.143 |     0.150 | 
             |     0.259 |     0.500 |     0.089 |     0.071 |           | 
             |     0.090 |     0.009 |     0.030 |     0.021 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
         suv |         8 |         0 |        16 |        38 |        62 | 
             |     8.444 |     1.060 |     1.162 |    20.403 |           | 
             |     0.129 |     0.000 |     0.258 |     0.613 |     0.265 | 
             |     0.099 |     0.000 |     0.203 |     0.543 |           | 
             |     0.034 |     0.000 |     0.068 |     0.162 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|
Column Total |        81 |         4 |        79 |        70 |       234 | 
             |     0.346 |     0.017 |     0.338 |     0.299 |           | 
-------------|-----------|-----------|-----------|-----------|-----------|