Loading required packages using pacman’s p_load() function -
> pacman::p_load(pacman, rio, tidyverse, magrittr)
Using import() function from rio package to import the data file and perform basic manipulations using tidyverse -
> df <- import("Data/StateData.csv") %>%
+ as_tibble()
> glimpse(df) # similar to str() but it is a function from tibble package
Rows: 48
Columns: 22
$ State <chr> "Alabama", "Arizona", "Arkansas", "California", "...
$ state_code <chr> "AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "...
$ region <chr> "South", "West", "South", "West", "West", "Northe...
$ governor <chr> "Republican", "Republican", "Republican", "Democr...
$ psychRegions <chr> "Friendly and Conventional", "Relaxed and Creativ...
$ extraversion <dbl> 55.5, 50.6, 49.9, 51.4, 45.3, 57.6, 47.0, 60.9, 6...
$ agreeableness <dbl> 52.7, 46.6, 52.7, 49.0, 47.5, 38.6, 38.8, 50.7, 6...
$ conscientiousness <dbl> 55.5, 58.4, 41.0, 43.2, 58.8, 34.2, 36.5, 62.7, 6...
$ neuroticism <dbl> 48.7, 38.1, 56.2, 39.1, 34.3, 53.4, 62.4, 40.8, 3...
$ openness <dbl> 42.7, 54.7, 40.3, 65.0, 57.9, 53.9, 42.7, 61.0, 5...
$ instagram <dbl> 0.64, 0.18, 0.46, 1.47, -1.04, 0.37, 1.48, 0.85, ...
$ facebook <dbl> 1.65, -0.26, 1.10, -0.42, -1.06, -0.98, -1.12, 0....
$ retweet <dbl> 0.35, -0.57, -0.60, 0.48, -0.90, 1.15, 1.19, -0.2...
$ entrepreneur <dbl> 0.26, 0.56, 0.25, 0.50, 0.02, 0.07, 2.55, 0.78, 1...
$ gdpr <dbl> -0.77, -0.31, -0.60, 1.12, 0.59, 0.71, 1.21, -0.2...
$ privacy <dbl> 0.58, -0.45, 0.69, 0.23, -0.22, 0.36, 0.90, -0.14...
$ university <dbl> 1.74, -0.77, 0.02, -1.92, -0.44, 0.37, 2.19, -1.1...
$ mortgage <dbl> 1.41, 1.01, -0.66, -0.88, 1.49, 0.48, 1.66, 0.11,...
$ volunteering <dbl> -1.49, 0.96, -1.19, 0.57, 1.01, 1.28, 1.37, 0.46,...
$ museum <dbl> -1.11, -0.13, -0.95, 0.05, 0.72, 1.18, 0.70, -0.7...
$ scrapbook <dbl> 0.06, 0.34, 0.85, -0.69, -0.34, -0.81, -0.73, -0....
$ modernDance <dbl> -1.27, 0.41, -1.44, 0.38, -0.29, 0.55, -0.22, -0....
Selecting our required three columns state_code, region, psychRegions -
> df %<>% select(state_code, region, psychRegions) %>%
+ mutate(psychRegions = as.factor(psychRegions))
> head(df)
# A tibble: 6 x 3
state_code region psychRegions
<chr> <chr> <fct>
1 AL South Friendly and Conventional
2 AZ West Relaxed and Creative
3 AR South Friendly and Conventional
4 CA West Relaxed and Creative
5 CO West Friendly and Conventional
6 CT Northeast Temperamental and Uninhibited
Get help on your problems from experienced statisticians at homeworkhelponline.net.
Creating a cintingency table using table() -
> contable <- table(df$psychRegions, df$region)
> contable
Midwest Northeast South West
Friendly and Conventional 11 0 10 3
Relaxed and Creative 0 0 2 8
Temperamental and Uninhibited 1 9 4 0
To see the row-wise percentage using prop.table -
> prop.table(contable, margin = 1) %>% round(2) # margin = 1 for row
Midwest Northeast South West
Friendly and Conventional 0.46 0.00 0.42 0.12
Relaxed and Creative 0.00 0.00 0.20 0.80
Temperamental and Uninhibited 0.07 0.64 0.29 0.00
To see the column-wise percentage using prop.table -
> prop.table(contable, margin = 2) %>% round(2) # margin = 2 for column
Midwest Northeast South West
Friendly and Conventional 0.92 0.00 0.62 0.27
Relaxed and Creative 0.00 0.00 0.12 0.73
Temperamental and Uninhibited 0.08 1.00 0.25 0.00
To see the overall percentage -
> prop.table(contable) %>% round(2)
Midwest Northeast South West
Friendly and Conventional 0.23 0.00 0.21 0.06
Relaxed and Creative 0.00 0.00 0.04 0.17
Temperamental and Uninhibited 0.02 0.19 0.08 0.00
Calculating chi-square test statistic and associated results -
> chires <- chisq.test(contable)
> chires
Pearson's Chi-squared test
data: contable
X-squared = 50.002, df = 6, p-value = 4.697e-09
This can also be done in another way -
> chisq.test(df$psychRegions, df$region)
Pearson's Chi-squared test
data: df$psychRegions and df$region
X-squared = 50.002, df = 6, p-value = 4.697e-09
The created list object has these -
> typeof(chires)
[1] "list"
> glimpse(chires)
List of 9
$ statistic: Named num 50
..- attr(*, "names")= chr "X-squared"
$ parameter: Named int 6
..- attr(*, "names")= chr "df"
$ p.value : num 4.7e-09
$ method : chr "Pearson's Chi-squared test"
$ data.name: chr "contable"
$ observed : 'table' int [1:3, 1:4] 11 0 1 0 0 9 10 2 4 3 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
.. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
$ expected : num [1:3, 1:4] 6 2.5 3.5 4.5 1.88 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
.. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
$ residuals: 'table' num [1:3, 1:4] 2.04 -1.58 -1.34 -2.12 -1.37 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
.. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
$ stdres : 'table' num [1:3, 1:4] 3.33 -2.05 -1.83 -3.33 -1.71 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:3] "Friendly and Conventional" "Relaxed and Creative" "Temperamental and Uninhibited"
.. ..$ : chr [1:4] "Midwest" "Northeast" "South" "West"
- attr(*, "class")= chr "htest"
That is -
> chires$statistic # the value the chi-squared test statistic
X-squared
50.00173
> chires$parameter # the degrees of freedom of the approximate chi-squared distribution
df
6
> chires$p.value
[1] 4.697313e-09
> chires$observed # the observed counts
Midwest Northeast South West
Friendly and Conventional 11 0 10 3
Relaxed and Creative 0 0 2 8
Temperamental and Uninhibited 1 9 4 0
> chires$expected # the expected counts under the null hypothesis
Midwest Northeast South West
Friendly and Conventional 6.0 4.500 8.000000 5.500000
Relaxed and Creative 2.5 1.875 3.333333 2.291667
Temperamental and Uninhibited 3.5 2.625 4.666667 3.208333
> chires$residuals # the Pearson residuals, (observed - expected) / sqrt(expected)
Midwest Northeast South West
Friendly and Conventional 2.0412415 -2.1213203 0.7071068 -1.0660036
Relaxed and Creative -1.5811388 -1.3693064 -0.7302967 3.7708009
Temperamental and Uninhibited -1.3363062 3.9347354 -0.3086067 -1.7911821