LAB 5 - EXAMPLES

Author

Andy Koerner

options(scipen=999) 
library(tidyverse)
library(socviz)
library(datasetsICR)
data("gss_sm")

First things First THIS QUARTO FILE OFFERS A BASIC INSITE TO THE TWO VARIABLES partyid and polviews. THE TWO OTHER VARIABLES ARE CHILDREN AND AGE FOR. THE MAIN ONE I WILL BE USING IS polviews FOR THE CHARTS.

PART 1: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CATEGORICAL VARIABLES

library(socviz)



pip1 <- gss_sm %>%         
  group_by(partyid, polviews) %>%
  summarize(N = n()) %>% 
  mutate(freq = N/sum(N),
         pct = round((freq*100),0))

#group, count, divide
pip1
# A tibble: 72 × 5
# Groups:   partyid [9]
   partyid          polviews                   N   freq   pct
   <fct>            <fct>                  <int>  <dbl> <dbl>
 1 Strong Democrat  Extremely Liberal         55 0.119     12
 2 Strong Democrat  Liberal                  133 0.287     29
 3 Strong Democrat  Slightly Liberal          57 0.123     12
 4 Strong Democrat  Moderate                 128 0.276     28
 5 Strong Democrat  Slightly Conservative     33 0.0713     7
 6 Strong Democrat  Conservative              33 0.0713     7
 7 Strong Democrat  Extremely Conservative    10 0.0216     2
 8 Strong Democrat  <NA>                      14 0.0302     3
 9 Not Str Democrat Extremely Liberal         18 0.0363     4
10 Not Str Democrat Liberal                   80 0.161     16
# ℹ 62 more rows

PART 2: CREATE STACKED AND DODGED BAR CHARTS FROM 2 CATEGORICAL VARIABLES

p_title <- "Political Party by Political View" 
p_caption <- "gss_sm dataset"

# AS STACKED BAR CHART
p <- ggplot(data = subset(pip1, !is.na(partyid) & !is.na(polviews)), 
                        aes(x=partyid, y=pct, fill = polviews))

p + geom_col(position = "stack") +
    labs(x="The selected Party", y="Percent", fill = "Con/Liberal",
         title = p_title, caption = p_caption, 
         subtitle = "As a stacked bar chart") +
    geom_text(aes(label=pct), position = position_stack(vjust=.5))

# AS DODGED BAR CHART
p + geom_col(position = "dodge2") +
    labs(x="The selected Party", y="Percent", fill = "Con/Liberal",
         title = p_title, caption = p_caption, 
         subtitle = "As a dodged bar chart") + 
    geom_text(aes(label = pct), position = position_dodge(width = .9)) 

# AS FACETED HORIZONTAL BAR CHART
p + geom_col(position = "dodge2") +
    labs(x=NULL, y="Percent", fill = "Con/Liberal",
         title = p_title, caption = p_caption, 
         subtitle = "As a faceted horizontal bar chart") +
         guides(fill = "none") +
         coord_flip() +
         facet_grid(~ polviews) + #fix here
    geom_text(aes(label = pct), position = position_dodge2(width = 1))

PART 3: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CONTINUOUS & ONE CATEGORICAL VARIABLE

pip2 <- gss_sm %>%         
  group_by(polviews) %>%
  summarize(N = n(),
            childs_mean = mean(childs, na.rm=TRUE), 
            age_mean = mean(age, na.rm=TRUE)) %>% 
  mutate(freq = N/sum(N),
         pct = round((freq*100),0))
pip2
# A tibble: 8 × 6
  polviews                   N childs_mean age_mean   freq   pct
  <fct>                  <int>       <dbl>    <dbl>  <dbl> <dbl>
1 Extremely Liberal        136        1.60     48.6 0.0474     5
2 Liberal                  350        1.47     46.8 0.122     12
3 Slightly Liberal         310        1.70     47.3 0.108     11
4 Moderate                1032        1.84     48.0 0.360     36
5 Slightly Conservative    382        2.07     50.3 0.133     13
6 Conservative             426        2.03     53.8 0.149     15
7 Extremely Conservative   120        2.29     54.3 0.0419     4
8 <NA>                     111        2.08     46.6 0.0387     4

The results show that there are more conservatives then liberals, and there are more moderate then both of them. Conservatives also have a higher child average and age average. Thankfully the NA is only 4%.

PART 4: SCATTERPLOT WITH A THIRD CATEGORICAL VARIABLE

Scatterplot for: polviews

p <- ggplot(pip2, aes(x=childs_mean, y=age_mean, color=polviews))
p + geom_point(size=5) +
    annotate(geom = "text", x = 1.6, y=58, 
                     label = "These show liberal at the bottom left and \n conservatives in the top right.", hjust=0) +
    labs(y="Average Age", x="Average Child.", 
         title="Age and Children by Political View", 
         subtitle = "Conservatives seem to have better age and more children.",
         caption = "gss_sm dataset{socviz}")

PART 5: LEGEND AND GUIDES

Adding a bit with titles and legends

p <- ggplot(pip2, aes(x=childs_mean, y=age_mean, color=polviews))
p + geom_point(size=5) +
    annotate(geom = "text", x = 1.6, y=57, 
                     label = "This box shows liberal at the bottom \n left and conservatives in the top right.", hjust=-0.3, color="blue") +
    annotate(geom = "rect", xmin = 1.9, xmax = 2.4,
             ymin = 49, ymax = 56, alpha = 0.2) + 
    labs(y="Average Age", x="Average Child.", 
         title="The Averge Age and Children by Political View", 
         subtitle = "Conservatives seem to have better age and more children.",
         caption = "gss_sm dataset{socviz}", color="Political \n Views")

PART 6: DATA LABELS VS LEGEND

Changing to data labels.

p <- ggplot(pip2, aes(x = childs_mean, y = age_mean, label = polviews))

p + geom_point(aes(color = polviews), size = 5) +
    geom_text(size = 3, vjust = -0.5) +
    annotate(geom = "text", x = 1.6, y = 57, 
             label = "This box shows liberal at the bottom \n left and conservatives in the top right.", 
             hjust = -0.9, color = "blue") +
    annotate(geom = "rect", xmin = 1.9, xmax = 2.4,
             ymin = 49, ymax = 56, alpha = 0.2, fill="grey") + 
    labs(y = "Average Age", x = "Average Child.", 
         title = "The averge Age and Children by Political View", 
         subtitle = "Conservatives seem to have better age and more children.",
         caption = "gss_sm dataset{socviz}")+ guides(color = FALSE)

PART 7: INTERPRETATION

This is my final chart with the simple interpretation of Conservatives have higher age and children.

p <- ggplot(pip2, aes(x = childs_mean, y = age_mean, label = polviews))

p + geom_point(aes(color = polviews), size = 5) +
    geom_text(size = 3, vjust = -0.5) +
    annotate(geom = "text", x = 1.6, y = 57, 
             label = "This box shows liberal at the bottom \n left and conservatives in the top right.", 
             hjust = -0.4, color = "blue") +
    annotate(geom = "rect", xmin = 1.9, xmax = 2.4,
             ymin = 49, ymax = 56, alpha = 0.2, fill="grey") + 
    labs(y = "Average Age", x = "Average Child.", 
         title = "The averge Age and Children by Political View", 
         subtitle = "Conservatives seem to have better age and more children.",
         caption = "gss_sm dataset{socviz}", color="Political \n Views")

END