Let’s start with a basic bar plot for Religion
First, let’s look at the count/frequency statistics for the religion variable.
gss %>%
group_by(relig) %>%
summarise(count = n())
# A tibble: 15 x 2
relig count
<chr> <int>
1 Buddhism 147
2 Catholic 5124
3 Christian 689
4 Don't know 15
5 Hinduism 71
6 Inter-nondenominational 109
7 Jewish 388
8 Moslem/islam 104
9 Native american 23
10 No answer 93
11 None 3523
12 Orthodox-christian 95
13 Other 224
14 Other eastern 32
15 Protestant 10846
Here is the default graphic for a barplot of the counts. Notice, if we use geom_bar() it will automatically summarize for us.
gss %>%
ggplot(aes(x=relig)) +
geom_bar()
Notice that going with just the default is not going to give us a great graph. We need to consider some modifications.
The forcats package allows us to work directly with categorical variables in ways that make applying Gestalt Principle.
Learn more here: https://forcats.tidyverse.org/
We can use fct_infreq to apply the principle of simplicity for comparison.
gss %>%
ggplot(aes(x=fct_infreq(relig))) +
geom_bar()
We can use fct_rev to do the reverse order.
gss %>%
ggplot(aes(x=fct_rev(fct_infreq(relig)))) +
geom_bar()
We can use fct_collapse to redefine our factor levels. Remember - doing this is a form of analysis because you are making some assumptions about what groups go together!
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
ggplot(aes(x=fct_infreq(religion))) +
geom_bar()
Use facet_wrap to create groups of visualizations based on a specific variable.
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
ggplot(aes(x=fct_infreq(religion))) +
geom_bar() +
facet_wrap(~marital, nrow=2)
Do we need to do a bit of data cleaning? Should we consider something other than count to display here? Should we switch the variable?
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
group_by(religion, marital) %>%
summarise(n=n()) %>%
group_by(religion) %>%
mutate(proportion = n/sum(n)) %>%
filter(marital != "No answer") %>%
ggplot(aes(x=fct_reorder(marital, proportion), y=proportion, fill = marital)) +
geom_col() +
coord_flip() +
facet_wrap(~religion, nrow=3) +
guides(fill = FALSE)
`summarise()` has grouped output by 'religion'. You can override using the
`.groups` argument.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
mutate(part_aff = fct_collapse(partyid,
Conservative = c("Strong republican",
"Not str republican",
"Ind,near rep"),
Liberal = c("Strong democrat",
"Not str democrat",
"Ind,near dem")) %>%
fct_lump()) %>%
group_by(religion, part_aff) %>%
summarise(n=n()) %>%
group_by(religion) %>%
mutate(proportion = n/sum(n)) %>%
mutate(religion = factor(religion, level = c("Jewish", "Muslim", "Eastern",
"None","Other",
"Christian", "Unknown"))) %>%
ggplot(aes(x= religion, y = proportion, fill = part_aff)) +
geom_col(position = position_dodge()) + #position = position_stack()
labs(x = "Religion Groups", y = "Proportion within Religious Group", fill = "Party Affiliation") +
scale_fill_viridis_d() +
theme_bw() +
theme(legend.position = "top")
`summarise()` has grouped output by 'religion'. You can override using the
`.groups` argument.
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
mutate(part_aff = fct_collapse(partyid,
Conservative = c("Strong republican",
"Not str republican",
"Ind,near rep"),
Liberal = c("Strong democrat",
"Not str democrat",
"Ind,near dem")) %>%
fct_lump()) %>%
filter(religion == "None") %>%
#group_by(year, part_aff) %>%
#summarise(n=n()) %>%
#group_by(year) %>%
#mutate(proportion = n/sum(n)) %>%
ggplot(aes(x= year, fill = part_aff)) +
geom_bar() + #position = position_dodge()
labs(x = "Year", y = "Proportion within Christian Group", fill = "Party Affiliation") +
scale_fill_viridis_d() +
theme_bw() +
theme(legend.position = "top")
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
group_by(religion) %>%
summarise(Avg_age = mean(age, na.rm=TRUE)) %>%
ggplot(aes(x=fct_reorder(religion, Avg_age), y=Avg_age)) +
geom_col(aes(fill=religion)) +
coord_flip() +
guides(fill = FALSE) +
scale_fill_viridis_d() +
labs(x = "Religion Groups", y = "Average Age (years)") +
theme_bw()
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.
library(see)
Warning: package 'see' was built under R version 4.1.3
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
drop_na(age) %>%
ggplot(aes(x = fct_reorder(religion, age), y = age)) +
geom_boxplot(aes(fill = religion), alpha = 0.5) +
geom_violinhalf(aes(fill = religion), alpha = 0.2) + #from package "see"
labs(x = "Religion Groups", y = "Age (years)") +
coord_flip() +
scale_fill_viridis_d() +
guides(fill = FALSE, color=FALSE) +
theme_bw()
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.
gss %>%
mutate(religion = fct_collapse(relig,
Christian = c("Catholic",
"Christian",
"Orthodox-christian",
"Protestant"),
Eastern = c("Buddhism",
"Hinduism",
"Other eastern"),
Jewish = c("Jewish"),
Muslim = c("Moslem/islam"),
None = c("None"),
Unknown = c("No answer",
"Don't know"),
Other = c("Other",
"Native american",
"Inter-nondenominational"))) %>%
filter(!rincome %in% c("Refused","Not applicable","No answer","Don't know")) %>%
mutate(rincome = factor(rincome, levels = c("Lt $1000", "$1000 to 2999",
"$3000 to 3999", "$4000 to 4999",
"$5000 to 5999", "$6000 to 6999",
"$7000 to 7999", "$8000 to 9999",
"$10000 - 14999", "$15000 - 19999",
"$20000 - 24999", "$25000 or more"))) %>%
mutate(rincome = fct_collapse(rincome, '$0 to 4999' = c("Lt $1000", "$1000 to 2999",
"$3000 to 3999", "$4000 to 4999"),
'$5000 to 9999' = c("$5000 to 5999", "$6000 to 6999",
"$7000 to 7999", "$8000 to 9999"))) %>%
group_by(religion, rincome) %>%
summarise(n=n()) %>%
group_by(religion) %>%
mutate(proportion = n/sum(n)) %>%
ggplot(aes(x=religion , y=rincome, fill = proportion)) +
geom_tile()+
scale_fill_viridis_c(option = "plasma") +
labs(x = "Religion Groups", y = "Income Level", fill="Proportion") +
theme_bw()
`summarise()` has grouped output by 'religion'. You can override using the
`.groups` argument.