Loading packages

Loading required packages using pacman’s p_load() function -

> pacman::p_load(pacman, rio, tidyverse, magrittr)

Importing data and manipulation

Using import() function from rio package to import the data file and perform basic manipulations using tidyverse -

> df <- import("Data/StateData.csv") %>%
+   as_tibble()
> glimpse(df) # similar to str() but it is a function from tibble package
Rows: 48
Columns: 22
$ State             <chr> "Alabama", "Arizona", "Arkansas", "California", "...
$ state_code        <chr> "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "...
$ region            <chr> "South", "West", "South", "West", "West", "Northe...
$ governor          <chr> "Republican", "Republican", "Republican", "Democr...
$ psychRegions      <chr> "Friendly and Conventional", "Relaxed and Creativ...
$ extraversion      <dbl> 55.5, 50.6, 49.9, 51.4, 45.3, 57.6, 47.0, 60.9, 6...
$ agreeableness     <dbl> 52.7, 46.6, 52.7, 49.0, 47.5, 38.6, 38.8, 50.7, 6...
$ conscientiousness <dbl> 55.5, 58.4, 41.0, 43.2, 58.8, 34.2, 36.5, 62.7, 6...
$ neuroticism       <dbl> 48.7, 38.1, 56.2, 39.1, 34.3, 53.4, 62.4, 40.8, 3...
$ openness          <dbl> 42.7, 54.7, 40.3, 65.0, 57.9, 53.9, 42.7, 61.0, 5...
$ instagram         <dbl> 0.64, 0.18, 0.46, 1.47, -1.04, 0.37, 1.48, 0.85, ...
$ facebook          <dbl> 1.65, -0.26, 1.10, -0.42, -1.06, -0.98, -1.12, 0....
$ retweet           <dbl> 0.35, -0.57, -0.60, 0.48, -0.90, 1.15, 1.19, -0.2...
$ entrepreneur      <dbl> 0.26, 0.56, 0.25, 0.50, 0.02, 0.07, 2.55, 0.78, 1...
$ gdpr              <dbl> -0.77, -0.31, -0.60, 1.12, 0.59, 0.71, 1.21, -0.2...
$ privacy           <dbl> 0.58, -0.45, 0.69, 0.23, -0.22, 0.36, 0.90, -0.14...
$ university        <dbl> 1.74, -0.77, 0.02, -1.92, -0.44, 0.37, 2.19, -1.1...
$ mortgage          <dbl> 1.41, 1.01, -0.66, -0.88, 1.49, 0.48, 1.66, 0.11,...
$ volunteering      <dbl> -1.49, 0.96, -1.19, 0.57, 1.01, 1.28, 1.37, 0.46,...
$ museum            <dbl> -1.11, -0.13, -0.95, 0.05, 0.72, 1.18, 0.70, -0.7...
$ scrapbook         <dbl> 0.06, 0.34, 0.85, -0.69, -0.34, -0.81, -0.73, -0....
$ modernDance       <dbl> -1.27, 0.41, -1.44, 0.38, -0.29, 0.55, -0.22, -0....

Selecting our required three columns state_code, region, psychRegions -

> df %<>% select(state_code, region, psychRegions) %>%
+   mutate(psychRegions = as.factor(psychRegions))
> head(df)
# A tibble: 6 x 3
  state_code region    psychRegions                 
  <chr>      <chr>     <fct>                        
1 AL         South     Friendly and Conventional    
2 AZ         West      Relaxed and Creative         
3 AR         South     Friendly and Conventional    
4 CA         West      Relaxed and Creative         
5 CO         West      Friendly and Conventional    
6 CT         Northeast Temperamental and Uninhibited

Get help on your problems from experienced statisticians at homeworkhelponline.net.

Contingency table

Creating a cintingency table using table() -

> contable <- table(df$psychRegions, df$region)
> contable
                               
                                Midwest Northeast South West
  Friendly and Conventional          11         0    10    3
  Relaxed and Creative                0         0     2    8
  Temperamental and Uninhibited       1         9     4    0

Percentage in table

To see the row-wise percentage using prop.table -

> prop.table(contable, margin = 1) %>% round(2) # margin = 1 for row
                               
                                Midwest Northeast South West
  Friendly and Conventional        0.46      0.00  0.42 0.12
  Relaxed and Creative             0.00      0.00  0.20 0.80
  Temperamental and Uninhibited    0.07      0.64  0.29 0.00

To see the column-wise percentage using prop.table -

> prop.table(contable, margin = 2) %>% round(2) # margin = 2 for column
                               
                                Midwest Northeast South West
  Friendly and Conventional        0.92      0.00  0.62 0.27
  Relaxed and Creative             0.00      0.00  0.12 0.73
  Temperamental and Uninhibited    0.08      1.00  0.25 0.00

To see the overall percentage -

> prop.table(contable) %>% round(2)
                               
                                Midwest Northeast South West
  Friendly and Conventional        0.23      0.00  0.21 0.06
  Relaxed and Creative             0.00      0.00  0.04 0.17
  Temperamental and Uninhibited    0.02      0.19  0.08 0.00

Chi-square test

Calculating chi-square test statistic and associated results -

> chires <- chisq.test(contable)
> chires

    Pearson's Chi-squared test

data:  contable
X-squared = 50.002, df = 6, p-value = 4.697e-09

This can also be done in another way -

> chisq.test(df$psychRegions, df$region)

    Pearson's Chi-squared test

data:  df$psychRegions and df$region
X-squared = 50.002, df = 6, p-value = 4.697e-09

The created list object has these -

> typeof(chires)
[1] "list"
> glimpse(chires)
List of 9
 $ statistic: Named num 50
  ..- attr(*, "names")= chr "X-squared"
 $ parameter: Named int 6
  ..- attr(*, "names")= chr "df"
 $ p.value  : num 4.7e-09
 $ method   : chr "Pearson's Chi-squared test"
 $ data.name: chr "contable"
 $ observed : 'table' int [1:3, 1:4] 11 0 1 0 0 9 10 2 4 3 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 $ expected : num [1:3, 1:4] 6 2.5 3.5 4.5 1.88 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 $ residuals: 'table' num [1:3, 1:4] 2.04 -1.58 -1.34 -2.12 -1.37 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 $ stdres   : 'table' num [1:3, 1:4] 3.33 -2.05 -1.83 -3.33 -1.71 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
  .. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
 - attr(*, "class")= chr "htest"

That is -

> chires$statistic  # the value the chi-squared test statistic
X-squared 
 50.00173 
> chires$parameter  # the degrees of freedom of the approximate chi-squared distribution
df 
 6 
> chires$p.value  
[1] 4.697313e-09
> chires$observed   # the observed counts
                               
                                Midwest Northeast South West
  Friendly and Conventional          11         0    10    3
  Relaxed and Creative                0         0     2    8
  Temperamental and Uninhibited       1         9     4    0
> chires$expected   # the expected counts under the null hypothesis
                               
                                Midwest Northeast    South     West
  Friendly and Conventional         6.0     4.500 8.000000 5.500000
  Relaxed and Creative              2.5     1.875 3.333333 2.291667
  Temperamental and Uninhibited     3.5     2.625 4.666667 3.208333
> chires$residuals  # the Pearson residuals, (observed - expected) / sqrt(expected)
                               
                                   Midwest  Northeast      South       West
  Friendly and Conventional      2.0412415 -2.1213203  0.7071068 -1.0660036
  Relaxed and Creative          -1.5811388 -1.3693064 -0.7302967  3.7708009
  Temperamental and Uninhibited -1.3363062  3.9347354 -0.3086067 -1.7911821
LS0tDQp0aXRsZTogIkNvbnRpbmdlbmN5IHRhYmxlIGFuZCBDaGktc3F1YXJlIHRlc3QiDQphdXRob3I6ICJNRCBBSFNBTlVMIElTTEFNIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHRydWUNCiAgICB0b2NfZmxvYXQ6IHRydWUNCiAgICB0b2NfZGVwdGg6IDQNCiAgICB0aGVtZTogY2VydWxlYW4NCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQogICAgI2NvZGVfZm9sZGluZzogc2hvdw0KLS0tDQpgYGB7ciwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldCgNCiAgY29tbWVudCA9ICIiLCBwcm9tcHQgPSBUUlVFLCBtZXNzYWdlPUYsIHdhcm5pbmcgPUYNCikNCmBgYA0KLS0tDQoNCiMjIExvYWRpbmcgcGFja2FnZXMNCg0KTG9hZGluZyByZXF1aXJlZCBwYWNrYWdlcyB1c2luZyBwYWNtYW4ncyBwX2xvYWQoKSBmdW5jdGlvbiAtDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHBhY21hbiwgcmlvLCB0aWR5dmVyc2UsIG1hZ3JpdHRyKQ0KYGBgDQoNCiMjIEltcG9ydGluZyBkYXRhIGFuZCBtYW5pcHVsYXRpb24NCg0KVXNpbmcgaW1wb3J0KCkgZnVuY3Rpb24gZnJvbSByaW8gcGFja2FnZSB0byBpbXBvcnQgdGhlIGRhdGEgZmlsZSBhbmQgcGVyZm9ybSBiYXNpYyBtYW5pcHVsYXRpb25zIHVzaW5nIHRpZHl2ZXJzZSAtIA0KYGBge3J9DQpkZiA8LSBpbXBvcnQoIkRhdGEvU3RhdGVEYXRhLmNzdiIpICU+JQ0KICBhc190aWJibGUoKQ0KZ2xpbXBzZShkZikgIyBzaW1pbGFyIHRvIHN0cigpIGJ1dCBpdCBpcyBhIGZ1bmN0aW9uIGZyb20gdGliYmxlIHBhY2thZ2UNCmBgYA0KDQpTZWxlY3Rpbmcgb3VyIHJlcXVpcmVkIHRocmVlIGNvbHVtbnMgc3RhdGVfY29kZSwgcmVnaW9uLCBwc3ljaFJlZ2lvbnMgLSANCmBgYHtyfQ0KZGYgJTw+JSBzZWxlY3Qoc3RhdGVfY29kZSwgcmVnaW9uLCBwc3ljaFJlZ2lvbnMpICU+JQ0KICBtdXRhdGUocHN5Y2hSZWdpb25zID0gYXMuZmFjdG9yKHBzeWNoUmVnaW9ucykpDQpoZWFkKGRmKQ0KYGBgDQoNCkdldCBoZWxwIG9uIHlvdXIgcHJvYmxlbXMgZnJvbSBleHBlcmllbmNlZCBzdGF0aXN0aWNpYW5zIGF0IFtob21ld29ya2hlbHBvbmxpbmUubmV0XShodHRwczovL3d3dy5ob21ld29ya2hlbHBvbmxpbmUubmV0L3Byb2dyYW1taW5nL3ItcHJvZ3JhbW1pbmcpLg0KDQojIyBDb250aW5nZW5jeSB0YWJsZQ0KDQpDcmVhdGluZyBhIGNpbnRpbmdlbmN5IHRhYmxlIHVzaW5nIHRhYmxlKCkgLSANCmBgYHtyfQ0KY29udGFibGUgPC0gdGFibGUoZGYkcHN5Y2hSZWdpb25zLCBkZiRyZWdpb24pDQpjb250YWJsZQ0KYGBgDQoNCiMjIFBlcmNlbnRhZ2UgaW4gdGFibGUNCg0KVG8gc2VlIHRoZSByb3ctd2lzZSBwZXJjZW50YWdlIHVzaW5nIHByb3AudGFibGUgLSANCmBgYHtyfQ0KcHJvcC50YWJsZShjb250YWJsZSwgbWFyZ2luID0gMSkgJT4lIHJvdW5kKDIpICMgbWFyZ2luID0gMSBmb3Igcm93DQpgYGANCg0KVG8gc2VlIHRoZSBjb2x1bW4td2lzZSBwZXJjZW50YWdlIHVzaW5nIHByb3AudGFibGUgLSANCmBgYHtyfQ0KcHJvcC50YWJsZShjb250YWJsZSwgbWFyZ2luID0gMikgJT4lIHJvdW5kKDIpICMgbWFyZ2luID0gMiBmb3IgY29sdW1uDQpgYGANCg0KVG8gc2VlIHRoZSBvdmVyYWxsIHBlcmNlbnRhZ2UgLSANCmBgYHtyfQ0KcHJvcC50YWJsZShjb250YWJsZSkgJT4lIHJvdW5kKDIpDQpgYGANCg0KIyMgQ2hpLXNxdWFyZSB0ZXN0IA0KDQpDYWxjdWxhdGluZyBjaGktc3F1YXJlIHRlc3Qgc3RhdGlzdGljIGFuZCBhc3NvY2lhdGVkIHJlc3VsdHMgLSANCmBgYHtyfQ0KY2hpcmVzIDwtIGNoaXNxLnRlc3QoY29udGFibGUpDQpjaGlyZXMNCmBgYA0KDQpUaGlzIGNhbiBhbHNvIGJlIGRvbmUgaW4gYW5vdGhlciB3YXkgLSANCmBgYHtyfQ0KY2hpc3EudGVzdChkZiRwc3ljaFJlZ2lvbnMsIGRmJHJlZ2lvbikNCmBgYA0KDQpUaGUgY3JlYXRlZCBsaXN0IG9iamVjdCBoYXMgdGhlc2UgLQ0KYGBge3J9DQp0eXBlb2YoY2hpcmVzKQ0KZ2xpbXBzZShjaGlyZXMpDQpgYGANCg0KVGhhdCBpcyAtIA0KYGBge3J9DQpjaGlyZXMkc3RhdGlzdGljICAjIHRoZSB2YWx1ZSB0aGUgY2hpLXNxdWFyZWQgdGVzdCBzdGF0aXN0aWMNCmNoaXJlcyRwYXJhbWV0ZXIgICMgdGhlIGRlZ3JlZXMgb2YgZnJlZWRvbSBvZiB0aGUgYXBwcm94aW1hdGUgY2hpLXNxdWFyZWQgZGlzdHJpYnV0aW9uDQpjaGlyZXMkcC52YWx1ZSAgDQpjaGlyZXMkb2JzZXJ2ZWQgICAjIHRoZSBvYnNlcnZlZCBjb3VudHMNCmNoaXJlcyRleHBlY3RlZCAgICMgdGhlIGV4cGVjdGVkIGNvdW50cyB1bmRlciB0aGUgbnVsbCBoeXBvdGhlc2lzDQpjaGlyZXMkcmVzaWR1YWxzICAjIHRoZSBQZWFyc29uIHJlc2lkdWFscywgKG9ic2VydmVkIC0gZXhwZWN0ZWQpIC8gc3FydChleHBlY3RlZCkNCmBgYA0KDQo=