options(scipen=999)
library(tidyverse)
library(socviz)
library(datasetsICR)
data("gss_sm")LAB 5 - EXAMPLES
First things First THIS QUARTO FILE OFFERS A BASIC INSITE TO THE TWO VARIABLES partyid and polviews. THE TWO OTHER VARIABLES ARE CHILDREN AND AGE FOR. THE MAIN ONE I WILL BE USING IS polviews FOR THE CHARTS.
PART 1: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CATEGORICAL VARIABLES
library(socviz)
pip1 <- gss_sm %>%
group_by(partyid, polviews) %>%
summarize(N = n()) %>%
mutate(freq = N/sum(N),
pct = round((freq*100),0))
#group, count, divide
pip1# A tibble: 72 × 5
# Groups: partyid [9]
partyid polviews N freq pct
<fct> <fct> <int> <dbl> <dbl>
1 Strong Democrat Extremely Liberal 55 0.119 12
2 Strong Democrat Liberal 133 0.287 29
3 Strong Democrat Slightly Liberal 57 0.123 12
4 Strong Democrat Moderate 128 0.276 28
5 Strong Democrat Slightly Conservative 33 0.0713 7
6 Strong Democrat Conservative 33 0.0713 7
7 Strong Democrat Extremely Conservative 10 0.0216 2
8 Strong Democrat <NA> 14 0.0302 3
9 Not Str Democrat Extremely Liberal 18 0.0363 4
10 Not Str Democrat Liberal 80 0.161 16
# ℹ 62 more rows
PART 2: CREATE STACKED AND DODGED BAR CHARTS FROM 2 CATEGORICAL VARIABLES
p_title <- "Political Party by Political View"
p_caption <- "gss_sm dataset"
# AS STACKED BAR CHART
p <- ggplot(data = subset(pip1, !is.na(partyid) & !is.na(polviews)),
aes(x=partyid, y=pct, fill = polviews))
p + geom_col(position = "stack") +
labs(x="The selected Party", y="Percent", fill = "Con/Liberal",
title = p_title, caption = p_caption,
subtitle = "As a stacked bar chart") +
geom_text(aes(label=pct), position = position_stack(vjust=.5))# AS DODGED BAR CHART
p + geom_col(position = "dodge2") +
labs(x="The selected Party", y="Percent", fill = "Con/Liberal",
title = p_title, caption = p_caption,
subtitle = "As a dodged bar chart") +
geom_text(aes(label = pct), position = position_dodge(width = .9)) # AS FACETED HORIZONTAL BAR CHART
p + geom_col(position = "dodge2") +
labs(x=NULL, y="Percent", fill = "Con/Liberal",
title = p_title, caption = p_caption,
subtitle = "As a faceted horizontal bar chart") +
guides(fill = "none") +
coord_flip() +
facet_grid(~ polviews) + #fix here
geom_text(aes(label = pct), position = position_dodge2(width = 1))PART 3: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CONTINUOUS & ONE CATEGORICAL VARIABLE
pip2 <- gss_sm %>%
group_by(polviews) %>%
summarize(N = n(),
childs_mean = mean(childs, na.rm=TRUE),
age_mean = mean(age, na.rm=TRUE)) %>%
mutate(freq = N/sum(N),
pct = round((freq*100),0))
pip2# A tibble: 8 × 6
polviews N childs_mean age_mean freq pct
<fct> <int> <dbl> <dbl> <dbl> <dbl>
1 Extremely Liberal 136 1.60 48.6 0.0474 5
2 Liberal 350 1.47 46.8 0.122 12
3 Slightly Liberal 310 1.70 47.3 0.108 11
4 Moderate 1032 1.84 48.0 0.360 36
5 Slightly Conservative 382 2.07 50.3 0.133 13
6 Conservative 426 2.03 53.8 0.149 15
7 Extremely Conservative 120 2.29 54.3 0.0419 4
8 <NA> 111 2.08 46.6 0.0387 4
The results show that there are more conservatives then liberals, and there are more moderate then both of them. Conservatives also have a higher child average and age average. Thankfully the NA is only 4%.
PART 4: SCATTERPLOT WITH A THIRD CATEGORICAL VARIABLE
Scatterplot for: polviews
p <- ggplot(pip2, aes(x=childs_mean, y=age_mean, color=polviews))
p + geom_point(size=5) +
annotate(geom = "text", x = 1.6, y=58,
label = "These show liberal at the bottom left and \n conservatives in the top right.", hjust=0) +
labs(y="Average Age", x="Average Child.",
title="Age and Children by Political View",
subtitle = "Conservatives seem to have better age and more children.",
caption = "gss_sm dataset{socviz}")PART 5: LEGEND AND GUIDES
Adding a bit with titles and legends
p <- ggplot(pip2, aes(x=childs_mean, y=age_mean, color=polviews))
p + geom_point(size=5) +
annotate(geom = "text", x = 1.6, y=57,
label = "This box shows liberal at the bottom \n left and conservatives in the top right.", hjust=-0.3, color="blue") +
annotate(geom = "rect", xmin = 1.9, xmax = 2.4,
ymin = 49, ymax = 56, alpha = 0.2) +
labs(y="Average Age", x="Average Child.",
title="The Averge Age and Children by Political View",
subtitle = "Conservatives seem to have better age and more children.",
caption = "gss_sm dataset{socviz}", color="Political \n Views")PART 6: DATA LABELS VS LEGEND
Changing to data labels.
p <- ggplot(pip2, aes(x = childs_mean, y = age_mean, label = polviews))
p + geom_point(aes(color = polviews), size = 5) +
geom_text(size = 3, vjust = -0.5) +
annotate(geom = "text", x = 1.6, y = 57,
label = "This box shows liberal at the bottom \n left and conservatives in the top right.",
hjust = -0.9, color = "blue") +
annotate(geom = "rect", xmin = 1.9, xmax = 2.4,
ymin = 49, ymax = 56, alpha = 0.2, fill="grey") +
labs(y = "Average Age", x = "Average Child.",
title = "The averge Age and Children by Political View",
subtitle = "Conservatives seem to have better age and more children.",
caption = "gss_sm dataset{socviz}")+ guides(color = FALSE)PART 7: INTERPRETATION
This is my final chart with the simple interpretation of Conservatives have higher age and children.
p <- ggplot(pip2, aes(x = childs_mean, y = age_mean, label = polviews))
p + geom_point(aes(color = polviews), size = 5) +
geom_text(size = 3, vjust = -0.5) +
annotate(geom = "text", x = 1.6, y = 57,
label = "This box shows liberal at the bottom \n left and conservatives in the top right.",
hjust = -0.4, color = "blue") +
annotate(geom = "rect", xmin = 1.9, xmax = 2.4,
ymin = 49, ymax = 56, alpha = 0.2, fill="grey") +
labs(y = "Average Age", x = "Average Child.",
title = "The averge Age and Children by Political View",
subtitle = "Conservatives seem to have better age and more children.",
caption = "gss_sm dataset{socviz}", color="Political \n Views")