library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(socviz)
library(gmodels)
table(gss_sm$religion,gss_sm$bigregion)
##
## Northeast Midwest South West
## Protestant 158 325 650 238
## Catholic 162 172 160 155
## Jewish 27 3 11 10
## None 112 157 170 180
## Other 28 33 50 48
CrossTable(gss_sm$religion,gss_sm$bigregion)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 2849
##
##
## | gss_sm$bigregion
## gss_sm$religion | Northeast | Midwest | South | West | Row Total |
## ----------------|-----------|-----------|-----------|-----------|-----------|
## Protestant | 158 | 325 | 650 | 238 | 1371 |
## | 24.877 | 0.149 | 44.346 | 14.194 | |
## | 0.115 | 0.237 | 0.474 | 0.174 | 0.481 |
## | 0.324 | 0.471 | 0.624 | 0.377 | |
## | 0.055 | 0.114 | 0.228 | 0.084 | |
## ----------------|-----------|-----------|-----------|-----------|-----------|
## Catholic | 162 | 172 | 160 | 155 | 649 |
## | 23.502 | 1.397 | 25.093 | 0.882 | |
## | 0.250 | 0.265 | 0.247 | 0.239 | 0.228 |
## | 0.333 | 0.249 | 0.154 | 0.246 | |
## | 0.057 | 0.060 | 0.056 | 0.054 | |
## ----------------|-----------|-----------|-----------|-----------|-----------|
## Jewish | 27 | 3 | 11 | 10 | 51 |
## | 38.340 | 7.080 | 3.128 | 0.149 | |
## | 0.529 | 0.059 | 0.216 | 0.196 | 0.018 |
## | 0.055 | 0.004 | 0.011 | 0.016 | |
## | 0.009 | 0.001 | 0.004 | 0.004 | |
## ----------------|-----------|-----------|-----------|-----------|-----------|
## None | 112 | 157 | 170 | 180 | 619 |
## | 0.362 | 0.335 | 13.953 | 13.426 | |
## | 0.181 | 0.254 | 0.275 | 0.291 | 0.217 |
## | 0.230 | 0.228 | 0.163 | 0.285 | |
## | 0.039 | 0.055 | 0.060 | 0.063 | |
## ----------------|-----------|-----------|-----------|-----------|-----------|
## Other | 28 | 33 | 50 | 48 | 159 |
## | 0.025 | 0.788 | 1.129 | 4.641 | |
## | 0.176 | 0.208 | 0.314 | 0.302 | 0.056 |
## | 0.057 | 0.048 | 0.048 | 0.076 | |
## | 0.010 | 0.012 | 0.018 | 0.017 | |
## ----------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 487 | 690 | 1041 | 631 | 2849 |
## | 0.171 | 0.242 | 0.365 | 0.221 | |
## ----------------|-----------|-----------|-----------|-----------|-----------|
##
##
gss_sm %>% select(religion,bigregion) %>% ggplot(aes(x=bigregion,y=religion)) +
geom_jitter(alpha=.5,size=.5)

gss_sm %>%
select(religion,bigregion) %>%
group_by(religion, bigregion) %>%
summarize(count = n()) %>%
ungroup() %>%
ggplot(aes(x=bigregion,y=religion,size=count)) + geom_point()

gss_sm %>%
select(religion,bigregion) %>%
group_by(religion, bigregion) %>%
summarize(count = n()) %>%
ungroup() %>%
ggplot(aes(x=bigregion,y=religion,color=count)) + geom_point(size=9)

rel_by_region <- gss_sm %>%
group_by(bigregion, religion) %>%
summarize(N = n()) %>%
mutate(freq = N / sum(N),
pct = round((freq*100), 0))
head(rel_by_region,10)
region_by_rel <- gss_sm %>%
group_by(religion,bigregion) %>%
summarize(N = n()) %>%
mutate(freq = N / sum(N),
pct = round((freq*100), 0))
head(region_by_rel,10)
p <- ggplot(rel_by_region, aes(x = bigregion, y = pct, fill = religion))
p + geom_col(position = "dodge2") +
labs(x = "Region",y = "Percent", fill = "Religion") +
theme(legend.position = "top")

p <- ggplot(rel_by_region, aes(x = bigregion, y = pct))
p + geom_point(aes(color = religion),size=4) +
labs(x = "Region",y = "Percent", fill = "Religion") +
theme(legend.position = "top")

p <- ggplot(rel_by_region, aes(x = bigregion, y = pct))
p + geom_jitter(aes(color = religion),size=4) +
labs(x = "Region",y = "Percent", fill = "Religion") +
theme(legend.position = "top")

p <- ggplot(region_by_rel, aes(x = religion, y = pct, fill = bigregion))
p + geom_col(position = "dodge2") +
labs(x = "Religion",y = "Percent", fill = "Region") +
theme(legend.position = "top")

p <- ggplot(region_by_rel, aes(x = religion, y = pct))
p + geom_jitter(aes(color = bigregion),size=4) +
labs(x = "Religion",y = "Percent", fill = "Region") +
theme(legend.position = "top")

p <- ggplot(rel_by_region, aes(x = religion, y = pct, fill = religion))
p + geom_col(position = "dodge2") +
labs(x = NULL, y = "Percent", fill = "Religion") +
guides(fill = FALSE) +
coord_flip() +
facet_grid(~ bigregion)

p <- ggplot(region_by_rel, aes(x = bigregion, y = pct, fill = bigregion))
p + geom_col(position = "dodge2") +
labs(x = NULL, y = "Percent", fill = "bigregion") +
guides(fill = FALSE) +
coord_flip() +
facet_grid(~ religion)

rr = gss_sm %>%
select(religion,bigregion) %>%
group_by(religion, bigregion) %>%
summarize(count = n()) %>%
ungroup() %>%
mutate(relreg = " ")
ggplot(data = rr,aes(x=relreg,y=count)) + geom_col() +
facet_grid(religion~bigregion) +
labs(x="")

t = table(gss_sm$religion,gss_sm$bigregion, useNA="ifany")
t
##
## Northeast Midwest South West
## Protestant 158 325 650 238
## Catholic 162 172 160 155
## Jewish 27 3 11 10
## None 112 157 170 180
## Other 28 33 50 48
## <NA> 1 5 11 1
t = table(gss_sm$religion,gss_sm$bigregion, useNA="ifany")
mosaicplot(t)
