Contingency table and Chi-square test

Loading packages

Loading required packages using pacman’s p_load() function -

> pacman::p_load(pacman, rio, tidyverse, magrittr)

Importing data and manipulation

Using import() function from rio package to import the data file and perform basic manipulations using tidyverse -

> df <- import("Data/StateData.csv") %>%
+   as_tibble()
> glimpse(df) # similar to str() but it is a function from tibble package

Rows: 48
Columns: 22
$ State             <chr> "Alabama", "Arizona", "Arkansas", "California", "...
$ state_code        <chr> "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "...
$ region            <chr> "South", "West", "South", "West", "West", "Northe...
$ governor          <chr> "Republican", "Republican", "Republican", "Democr...
$ psychRegions      <chr> "Friendly and Conventional", "Relaxed and Creativ...
$ extraversion      <dbl> 55.5, 50.6, 49.9, 51.4, 45.3, 57.6, 47.0, 60.9, 6...
$ agreeableness     <dbl> 52.7, 46.6, 52.7, 49.0, 47.5, 38.6, 38.8, 50.7, 6...
$ conscientiousness <dbl> 55.5, 58.4, 41.0, 43.2, 58.8, 34.2, 36.5, 62.7, 6...
$ neuroticism       <dbl> 48.7, 38.1, 56.2, 39.1, 34.3, 53.4, 62.4, 40.8, 3...
$ openness          <dbl> 42.7, 54.7, 40.3, 65.0, 57.9, 53.9, 42.7, 61.0, 5...
$ instagram         <dbl> 0.64, 0.18, 0.46, 1.47, -1.04, 0.37, 1.48, 0.85, ...
$ facebook          <dbl> 1.65, -0.26, 1.10, -0.42, -1.06, -0.98, -1.12, 0....
$ retweet           <dbl> 0.35, -0.57, -0.60, 0.48, -0.90, 1.15, 1.19, -0.2...
$ entrepreneur      <dbl> 0.26, 0.56, 0.25, 0.50, 0.02, 0.07, 2.55, 0.78, 1...
$ gdpr              <dbl> -0.77, -0.31, -0.60, 1.12, 0.59, 0.71, 1.21, -0.2...
$ privacy           <dbl> 0.58, -0.45, 0.69, 0.23, -0.22, 0.36, 0.90, -0.14...
$ university        <dbl> 1.74, -0.77, 0.02, -1.92, -0.44, 0.37, 2.19, -1.1...
$ mortgage          <dbl> 1.41, 1.01, -0.66, -0.88, 1.49, 0.48, 1.66, 0.11,...
$ volunteering      <dbl> -1.49, 0.96, -1.19, 0.57, 1.01, 1.28, 1.37, 0.46,...
$ museum            <dbl> -1.11, -0.13, -0.95, 0.05, 0.72, 1.18, 0.70, -0.7...
$ scrapbook         <dbl> 0.06, 0.34, 0.85, -0.69, -0.34, -0.81, -0.73, -0....
$ modernDance       <dbl> -1.27, 0.41, -1.44, 0.38, -0.29, 0.55, -0.22, -0....

Selecting our required three columns state_code, region, psychRegions -

> df %<>% select(state_code, region, psychRegions) %>%
+   mutate(psychRegions = as.factor(psychRegions))
> head(df)

# A tibble: 6 x 3
  state_code region    psychRegions                 
  <chr>      <chr>     <fct>                        
1 AL         South     Friendly and Conventional    
2 AZ         West      Relaxed and Creative         
3 AR         South     Friendly and Conventional    
4 CA         West      Relaxed and Creative         
5 CO         West      Friendly and Conventional    
6 CT         Northeast Temperamental and Uninhibited

Get help on your problems from experienced statisticians at homeworkhelponline.net.

Contingency table

Creating a cintingency table using table() -

> contable <- table(df$psychRegions, df$region)
> contable

                               
                                Midwest Northeast South West
  Friendly and Conventional          11         0    10    3
  Relaxed and Creative                0         0     2    8
  Temperamental and Uninhibited       1         9     4    0

Percentage in table

To see the row-wise percentage using prop.table -

> prop.table(contable, margin = 1) %>% round(2) # margin = 1 for row

                               
                                Midwest Northeast South West
  Friendly and Conventional        0.46      0.00  0.42 0.12
  Relaxed and Creative             0.00      0.00  0.20 0.80
  Temperamental and Uninhibited    0.07      0.64  0.29 0.00

To see the column-wise percentage using prop.table -

> prop.table(contable, margin = 2) %>% round(2) # margin = 2 for column

                               
                                Midwest Northeast South West
  Friendly and Conventional        0.92      0.00  0.62 0.27
  Relaxed and Creative             0.00      0.00  0.12 0.73
  Temperamental and Uninhibited    0.08      1.00  0.25 0.00

To see the overall percentage -

> prop.table(contable) %>% round(2)

                               
                                Midwest Northeast South West
  Friendly and Conventional        0.23      0.00  0.21 0.06
  Relaxed and Creative             0.00      0.00  0.04 0.17
  Temperamental and Uninhibited    0.02      0.19  0.08 0.00

Chi-square test

Calculating chi-square test statistic and associated results -

> chires <- chisq.test(contable)
> chires


    Pearson's Chi-squared test

data:  contable
X-squared = 50.002, df = 6, p-value = 4.697e-09

This can also be done in another way -

> chisq.test(df$psychRegions, df$region)


    Pearson's Chi-squared test

data:  df$psychRegions and df$region
X-squared = 50.002, df = 6, p-value = 4.697e-09

The created list object has these -

> typeof(chires)

[1] "list"

> glimpse(chires)

List of 9
 $ statistic: Named num 50
  ..- attr(*, "names")= chr "X-squared"
 $ parameter: Named int 6
  ..- attr(*, "names")= chr "df"
 $ p.value  : num 4.7e-09
 $ method   : chr "Pearson's Chi-squared test"
 $ data.name: chr "contable"
 $ observed : 'table' int [1:3, 1:4] 11 0 1 0 0 9 10 2 4 3 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 $ expected : num [1:3, 1:4] 6 2.5 3.5 4.5 1.88 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 $ residuals: 'table' num [1:3, 1:4] 2.04 -1.58 -1.34 -2.12 -1.37 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 $ stdres   : 'table' num [1:3, 1:4] 3.33 -2.05 -1.83 -3.33 -1.71 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 - attr(*, "class")= chr "htest"

That is -

> chires$statistic  # the value the chi-squared test statistic

X-squared 
 50.00173

> chires$parameter  # the degrees of freedom of the approximate chi-squared distribution

df 
 6

> chires$p.value

[1] 4.697313e-09

> chires$observed   # the observed counts

                               
                                Midwest Northeast South West
  Friendly and Conventional          11         0    10    3
  Relaxed and Creative                0         0     2    8
  Temperamental and Uninhibited       1         9     4    0

> chires$expected   # the expected counts under the null hypothesis

                               
                                Midwest Northeast    South     West
  Friendly and Conventional         6.0     4.500 8.000000 5.500000
  Relaxed and Creative              2.5     1.875 3.333333 2.291667
  Temperamental and Uninhibited     3.5     2.625 4.666667 3.208333

> chires$residuals  # the Pearson residuals, (observed - expected) / sqrt(expected)

                               
                                   Midwest  Northeast      South       West
  Friendly and Conventional      2.0412415 -2.1213203  0.7071068 -1.0660036
  Relaxed and Creative          -1.5811388 -1.3693064 -0.7302967  3.7708009
  Temperamental and Uninhibited -1.3363062  3.9347354 -0.3086067 -1.7911821

LS0tDQp0aXRsZTogIkNvbnRpbmdlbmN5IHRhYmxlIGFuZCBDaGktc3F1YXJlIHRlc3QiDQphdXRob3I6ICJNRCBBSFNBTlVMIElTTEFNIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHRydWUNCiAgICB0b2NfZmxvYXQ6IHRydWUNCiAgICB0b2NfZGVwdGg6IDQNCiAgICB0aGVtZTogY2VydWxlYW4NCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQogICAgI2NvZGVfZm9sZGluZzogc2hvdw0KLS0tDQpgYGB7ciwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldCgNCiAgY29tbWVudCA9ICIiLCBwcm9tcHQgPSBUUlVFLCBtZXNzYWdlPUYsIHdhcm5pbmcgPUYNCikNCmBgYA0KLS0tDQoNCiMjIExvYWRpbmcgcGFja2FnZXMNCg0KTG9hZGluZyByZXF1aXJlZCBwYWNrYWdlcyB1c2luZyBwYWNtYW4ncyBwX2xvYWQoKSBmdW5jdGlvbiAtDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHBhY21hbiwgcmlvLCB0aWR5dmVyc2UsIG1hZ3JpdHRyKQ0KYGBgDQoNCiMjIEltcG9ydGluZyBkYXRhIGFuZCBtYW5pcHVsYXRpb24NCg0KVXNpbmcgaW1wb3J0KCkgZnVuY3Rpb24gZnJvbSByaW8gcGFja2FnZSB0byBpbXBvcnQgdGhlIGRhdGEgZmlsZSBhbmQgcGVyZm9ybSBiYXNpYyBtYW5pcHVsYXRpb25zIHVzaW5nIHRpZHl2ZXJzZSAtIA0KYGBge3J9DQpkZiA8LSBpbXBvcnQoIkRhdGEvU3RhdGVEYXRhLmNzdiIpICU+JQ0KICBhc190aWJibGUoKQ0KZ2xpbXBzZShkZikgIyBzaW1pbGFyIHRvIHN0cigpIGJ1dCBpdCBpcyBhIGZ1bmN0aW9uIGZyb20gdGliYmxlIHBhY2thZ2UNCmBgYA0KDQpTZWxlY3Rpbmcgb3VyIHJlcXVpcmVkIHRocmVlIGNvbHVtbnMgc3RhdGVfY29kZSwgcmVnaW9uLCBwc3ljaFJlZ2lvbnMgLSANCmBgYHtyfQ0KZGYgJTw+JSBzZWxlY3Qoc3RhdGVfY29kZSwgcmVnaW9uLCBwc3ljaFJlZ2lvbnMpICU+JQ0KICBtdXRhdGUocHN5Y2hSZWdpb25zID0gYXMuZmFjdG9yKHBzeWNoUmVnaW9ucykpDQpoZWFkKGRmKQ0KYGBgDQoNCkdldCBoZWxwIG9uIHlvdXIgcHJvYmxlbXMgZnJvbSBleHBlcmllbmNlZCBzdGF0aXN0aWNpYW5zIGF0IFtob21ld29ya2hlbHBvbmxpbmUubmV0XShodHRwczovL3d3dy5ob21ld29ya2hlbHBvbmxpbmUubmV0L3Byb2dyYW1taW5nL3ItcHJvZ3JhbW1pbmcpLg0KDQojIyBDb250aW5nZW5jeSB0YWJsZQ0KDQpDcmVhdGluZyBhIGNpbnRpbmdlbmN5IHRhYmxlIHVzaW5nIHRhYmxlKCkgLSANCmBgYHtyfQ0KY29udGFibGUgPC0gdGFibGUoZGYkcHN5Y2hSZWdpb25zLCBkZiRyZWdpb24pDQpjb250YWJsZQ0KYGBgDQoNCiMjIFBlcmNlbnRhZ2UgaW4gdGFibGUNCg0KVG8gc2VlIHRoZSByb3ctd2lzZSBwZXJjZW50YWdlIHVzaW5nIHByb3AudGFibGUgLSANCmBgYHtyfQ0KcHJvcC50YWJsZShjb250YWJsZSwgbWFyZ2luID0gMSkgJT4lIHJvdW5kKDIpICMgbWFyZ2luID0gMSBmb3Igcm93DQpgYGANCg0KVG8gc2VlIHRoZSBjb2x1bW4td2lzZSBwZXJjZW50YWdlIHVzaW5nIHByb3AudGFibGUgLSANCmBgYHtyfQ0KcHJvcC50YWJsZShjb250YWJsZSwgbWFyZ2luID0gMikgJT4lIHJvdW5kKDIpICMgbWFyZ2luID0gMiBmb3IgY29sdW1uDQpgYGANCg0KVG8gc2VlIHRoZSBvdmVyYWxsIHBlcmNlbnRhZ2UgLSANCmBgYHtyfQ0KcHJvcC50YWJsZShjb250YWJsZSkgJT4lIHJvdW5kKDIpDQpgYGANCg0KIyMgQ2hpLXNxdWFyZSB0ZXN0IA0KDQpDYWxjdWxhdGluZyBjaGktc3F1YXJlIHRlc3Qgc3RhdGlzdGljIGFuZCBhc3NvY2lhdGVkIHJlc3VsdHMgLSANCmBgYHtyfQ0KY2hpcmVzIDwtIGNoaXNxLnRlc3QoY29udGFibGUpDQpjaGlyZXMNCmBgYA0KDQpUaGlzIGNhbiBhbHNvIGJlIGRvbmUgaW4gYW5vdGhlciB3YXkgLSANCmBgYHtyfQ0KY2hpc3EudGVzdChkZiRwc3ljaFJlZ2lvbnMsIGRmJHJlZ2lvbikNCmBgYA0KDQpUaGUgY3JlYXRlZCBsaXN0IG9iamVjdCBoYXMgdGhlc2UgLQ0KYGBge3J9DQp0eXBlb2YoY2hpcmVzKQ0KZ2xpbXBzZShjaGlyZXMpDQpgYGANCg0KVGhhdCBpcyAtIA0KYGBge3J9DQpjaGlyZXMkc3RhdGlzdGljICAjIHRoZSB2YWx1ZSB0aGUgY2hpLXNxdWFyZWQgdGVzdCBzdGF0aXN0aWMNCmNoaXJlcyRwYXJhbWV0ZXIgICMgdGhlIGRlZ3JlZXMgb2YgZnJlZWRvbSBvZiB0aGUgYXBwcm94aW1hdGUgY2hpLXNxdWFyZWQgZGlzdHJpYnV0aW9uDQpjaGlyZXMkcC52YWx1ZSAgDQpjaGlyZXMkb2JzZXJ2ZWQgICAjIHRoZSBvYnNlcnZlZCBjb3VudHMNCmNoaXJlcyRleHBlY3RlZCAgICMgdGhlIGV4cGVjdGVkIGNvdW50cyB1bmRlciB0aGUgbnVsbCBoeXBvdGhlc2lzDQpjaGlyZXMkcmVzaWR1YWxzICAjIHRoZSBQZWFyc29uIHJlc2lkdWFscywgKG9ic2VydmVkIC0gZXhwZWN0ZWQpIC8gc3FydChleHBlY3RlZCkNCmBgYA0KDQo=