Challenge 6

Upload Feds Data

fedFundsRate <- read_csv("challenge_datasets/FedFundsRate.csv")
## Rows: 904 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): Year, Month, Day, Federal Funds Target Rate, Federal Funds Upper T...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(fedFundsRate)
## # A tibble: 6 × 10
##    Year Month   Day `Federal Funds Target Rate` `Federal Funds Upper Target`
##   <dbl> <dbl> <dbl>                       <dbl>                        <dbl>
## 1  1954     7     1                          NA                           NA
## 2  1954     8     1                          NA                           NA
## 3  1954     9     1                          NA                           NA
## 4  1954    10     1                          NA                           NA
## 5  1954    11     1                          NA                           NA
## 6  1954    12     1                          NA                           NA
## # ℹ 5 more variables: `Federal Funds Lower Target` <dbl>,
## #   `Effective Federal Funds Rate` <dbl>, `Real GDP (Percent Change)` <dbl>,
## #   `Unemployment Rate` <dbl>, `Inflation Rate` <dbl>

Make a date column

fedFundsRateClean <- fedFundsRate %>%
  mutate(date = str_c(Year, Month, Day, sep="-"),
         date = ymd(date))

#checking to see if it worked
fedFundsRateClean %>% 
  select(date)
## # A tibble: 904 × 1
##    date      
##    <date>    
##  1 1954-07-01
##  2 1954-08-01
##  3 1954-09-01
##  4 1954-10-01
##  5 1954-11-01
##  6 1954-12-01
##  7 1955-01-01
##  8 1955-02-01
##  9 1955-03-01
## 10 1955-04-01
## # ℹ 894 more rows

Plot over time

Looking to see inflation rate over the years. There are two spikes in inflation during the 1970’s and one spike in 1980 with the general trend of inflation decreasing after that point.

fedFundsRateClean %>% 
  filter(!is.na(`Inflation Rate`)) %>%
  ggplot(aes(date, `Inflation Rate`)) +
  geom_point() +
  scale_x_date(limits = range(fedFundsRateClean$date), breaks = "5 years", labels = scales::date_format("%Y")) + 
  scale_y_continuous(labels = scales::label_percent(scale = 1), n.breaks = 5) +
  theme_dark() +
  labs(title = "Inflation over time", x = "Year", y = "Inflation Rate")

Upload ABC Poll Data

abcPoll <- read_csv("challenge_datasets/abc_poll_2021.csv")
## Rows: 527 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): xspanish, complete_status, ppeduc5, ppeducat, ppgender, ppethm, pp...
## dbl  (3): id, ppage, weights_pid
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(abcPoll)
## # A tibble: 6 × 31
##        id xspanish complete_status ppage ppeduc5        ppeducat ppgender ppethm
##     <dbl> <chr>    <chr>           <dbl> <chr>          <chr>    <chr>    <chr> 
## 1 7230001 English  qualified          68 "High school … High sc… Female   White…
## 2 7230002 English  qualified          85 "Bachelor\x92… Bachelo… Male     White…
## 3 7230003 English  qualified          69 "High school … High sc… Male     White…
## 4 7230004 English  qualified          74 "Bachelor\x92… Bachelo… Female   White…
## 5 7230005 English  qualified          77 "High school … High sc… Male     White…
## 6 7230006 English  qualified          70 "Bachelor\x92… Bachelo… Male     White…
## # ℹ 23 more variables: pphhsize <chr>, ppinc7 <chr>, ppmarit5 <chr>,
## #   ppmsacat <chr>, ppreg4 <chr>, pprent <chr>, ppstaten <chr>, PPWORKA <chr>,
## #   ppemploy <chr>, Q1_a <chr>, Q1_b <chr>, Q1_c <chr>, Q1_d <chr>, Q1_e <chr>,
## #   Q1_f <chr>, Q2 <chr>, Q3 <chr>, Q4 <chr>, Q5 <chr>, QPID <chr>,
## #   ABCAGE <chr>, Contact <chr>, weights_pid <dbl>

Clean up some columns

removing pp before the demographic information to make the columns easier to read

abcPollClean <- abcPoll %>% 
  rename_all(~gsub('pp', '', .))

# Checking to see that 'pp' was removed
head(abcPollClean)
## # A tibble: 6 × 31
##        id xspanish complete_status   age educ5  educat gender ethm  hhsize inc7 
##     <dbl> <chr>    <chr>           <dbl> <chr>  <chr>  <chr>  <chr> <chr>  <chr>
## 1 7230001 English  qualified          68 "High… High … Female Whit… 2      $25,…
## 2 7230002 English  qualified          85 "Bach… Bache… Male   Whit… 2      $150…
## 3 7230003 English  qualified          69 "High… High … Male   Whit… 2      $100…
## 4 7230004 English  qualified          74 "Bach… Bache… Female Whit… 1      $25,…
## 5 7230005 English  qualified          77 "High… High … Male   Whit… 3      $10,…
## 6 7230006 English  qualified          70 "Bach… Bache… Male   Whit… 2      $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## #   staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## #   Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## #   Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## #   weights_pid <dbl>

Removing 92s from the education level question (ppeduc5)

abcPollClean1 <- abcPollClean %>% 
  mutate(educ5 = str_replace_all(educ5, "\\\\x92s", ""))

#checking if it removed the \x92s but now there are �
head(abcPollClean1)
## # A tibble: 6 × 31
##        id xspanish complete_status   age educ5  educat gender ethm  hhsize inc7 
##     <dbl> <chr>    <chr>           <dbl> <chr>  <chr>  <chr>  <chr> <chr>  <chr>
## 1 7230001 English  qualified          68 High … High … Female Whit… 2      $25,…
## 2 7230002 English  qualified          85 Bache… Bache… Male   Whit… 2      $150…
## 3 7230003 English  qualified          69 High … High … Male   Whit… 2      $100…
## 4 7230004 English  qualified          74 Bache… Bache… Female Whit… 1      $25,…
## 5 7230005 English  qualified          77 High … High … Male   Whit… 3      $10,…
## 6 7230006 English  qualified          70 Bache… Bache… Male   Whit… 2      $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## #   staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## #   Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## #   Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## #   weights_pid <dbl>
abcPollClean2 <- abcPollClean1 %>% 
  mutate(educ5 = str_replace_all(educ5, "�", ""))

#that worked to remove the odd symbols in the column data
head(abcPollClean2)
## # A tibble: 6 × 31
##        id xspanish complete_status   age educ5  educat gender ethm  hhsize inc7 
##     <dbl> <chr>    <chr>           <dbl> <chr>  <chr>  <chr>  <chr> <chr>  <chr>
## 1 7230001 English  qualified          68 High … High … Female Whit… 2      $25,…
## 2 7230002 English  qualified          85 Bache… Bache… Male   Whit… 2      $150…
## 3 7230003 English  qualified          69 High … High … Male   Whit… 2      $100…
## 4 7230004 English  qualified          74 Bache… Bache… Female Whit… 1      $25,…
## 5 7230005 English  qualified          77 High … High … Male   Whit… 3      $10,…
## 6 7230006 English  qualified          70 Bache… Bache… Male   Whit… 2      $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## #   staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## #   Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## #   Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## #   weights_pid <dbl>

Visualizing Part-Whole Relationships

Since education goes in a certain order, I will make the education order into a factor to put it in the desired order and make a new column and remove educat column

educationOrder <- c("Less than high school",
          "High school",
          "Some college",
          "Bachelors degree", 
          "Master's degree or highter")

abcPollClean3 <- abcPollClean2%>%
  mutate(Education = factor(educat, 
                       levels=educationOrder)) %>%
                      select(-educat)

Then I will take the abcPollClean3 to graph into a pie chart using geom_bar with the width set to one to make it into a circle. Use coord_polar(“y”) to plot the values from the y-axis for each education category. I lastly gave the legend and chart labels.

abcPollClean3 %>% 
  ggplot(aes(x = "", y = Education)) +
  geom_bar(stat = "identity", width = 1, aes(fill = educ5)) +
  coord_polar("y") +
  labs(title = "Breakdown of education levels in sample",  fill = "Education") +
  theme_void() +
  theme(legend.position = "right")

I would prefer this to have labels and have those labels be proportions rather than raw numbers. I made a new data frame with just the education level and the percentage of each of the education levels as eduPercent. I then used that data frame into a ggplot as I did before but used the labels column (which has the percentage information) to label the pie chart.

#Make the percentages
eduPercent <- abcPollClean3 %>% 
  group_by(educ5) %>% # Variable to be transformed
  count() %>% 
  ungroup() %>% 
  mutate(perc = `n` / sum(`n`)) %>% 
  arrange(perc) %>%
  mutate(labels = scales::percent(perc))


eduPercent %>%
  ggplot(aes(x = "", y = educ5)) +
  geom_bar(stat = "identity", width = 1, aes(fill = educ5)) +
  geom_text(aes(label = `labels`), 
            position = position_stack(vjust = 0.5),
            size = 3,
            color = "black",
            angle = 0,
            fontface = "bold",
            hjust = .25) +
  coord_polar("y") +
  labs(title = "Breakdown of education levels in sample", fill = "Education") +
  theme_void() +
  theme(legend.position = "right")