Goal is to use the same data from challenge 6 but two complete two other graphs I have not yet done yet.
fedFundsRate <- read_csv("challenge_datasets/FedFundsRate.csv")
## Rows: 904 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): Year, Month, Day, Federal Funds Target Rate, Federal Funds Upper T...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(fedFundsRate)
## # A tibble: 6 × 10
## Year Month Day `Federal Funds Target Rate` `Federal Funds Upper Target`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1954 7 1 NA NA
## 2 1954 8 1 NA NA
## 3 1954 9 1 NA NA
## 4 1954 10 1 NA NA
## 5 1954 11 1 NA NA
## 6 1954 12 1 NA NA
## # ℹ 5 more variables: `Federal Funds Lower Target` <dbl>,
## # `Effective Federal Funds Rate` <dbl>, `Real GDP (Percent Change)` <dbl>,
## # `Unemployment Rate` <dbl>, `Inflation Rate` <dbl>
fedFundsRateClean <- fedFundsRate %>%
mutate(date = str_c(Year, Month, Day, sep="-"),
date = ymd(date)) %>%
select(-Year, -Month, -Day)
#checking to see if it worked
fedFundsRateClean %>%
select(date)
## # A tibble: 904 × 1
## date
## <date>
## 1 1954-07-01
## 2 1954-08-01
## 3 1954-09-01
## 4 1954-10-01
## 5 1954-11-01
## 6 1954-12-01
## 7 1955-01-01
## 8 1955-02-01
## 9 1955-03-01
## 10 1955-04-01
## # ℹ 894 more rows
The geom_line is the function to give us line graphs. I will use this twice with a + in-between so they are two separate line in one graph.
The rest of the the functions are to make the graph presentation ready.
From the graph, we can see unemployment and inflation was tracking between the 1970s to 2000 but before and after that point they diverge. This period of correlation is not enough to indicate that they trend together nor that one can be be used to know the other.
ggplot(fedFundsRateClean, aes(x = date)) +
geom_line(aes(y = `Unemployment Rate`, color = "Unemployment Rate"), size = 1, alpha = 0.8) +
geom_line(aes(y = `Inflation Rate`, color = "Inflation Rate"), size = 1) +
labs(title = "Unemployment and Inflation Over Time",
y = "Value",
color = "Indicator") +
theme_minimal() +
theme(plot.title = element_text(hjust=0.5)) +
scale_color_okabeito(name="U.S. Rates")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Warning: Removed 44 rows containing missing values (`geom_line()`).
abcPoll <- read_csv("challenge_datasets/abc_poll_2021.csv")
## Rows: 527 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): xspanish, complete_status, ppeduc5, ppeducat, ppgender, ppethm, pp...
## dbl (3): id, ppage, weights_pid
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(abcPoll)
## # A tibble: 6 × 31
## id xspanish complete_status ppage ppeduc5 ppeducat ppgender ppethm
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 "High school … High sc… Female White…
## 2 7230002 English qualified 85 "Bachelor\x92… Bachelo… Male White…
## 3 7230003 English qualified 69 "High school … High sc… Male White…
## 4 7230004 English qualified 74 "Bachelor\x92… Bachelo… Female White…
## 5 7230005 English qualified 77 "High school … High sc… Male White…
## 6 7230006 English qualified 70 "Bachelor\x92… Bachelo… Male White…
## # ℹ 23 more variables: pphhsize <chr>, ppinc7 <chr>, ppmarit5 <chr>,
## # ppmsacat <chr>, ppreg4 <chr>, pprent <chr>, ppstaten <chr>, PPWORKA <chr>,
## # ppemploy <chr>, Q1_a <chr>, Q1_b <chr>, Q1_c <chr>, Q1_d <chr>, Q1_e <chr>,
## # Q1_f <chr>, Q2 <chr>, Q3 <chr>, Q4 <chr>, Q5 <chr>, QPID <chr>,
## # ABCAGE <chr>, Contact <chr>, weights_pid <dbl>
removing pp before the demographic information to make the columns easier to read
abcPollClean <- abcPoll %>%
rename_all(~gsub('pp', '', .))
# Checking to see that 'pp' was removed
head(abcPollClean)
## # A tibble: 6 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 "High… High … Female Whit… 2 $25,…
## 2 7230002 English qualified 85 "Bach… Bache… Male Whit… 2 $150…
## 3 7230003 English qualified 69 "High… High … Male Whit… 2 $100…
## 4 7230004 English qualified 74 "Bach… Bache… Female Whit… 1 $25,…
## 5 7230005 English qualified 77 "High… High … Male Whit… 3 $10,…
## 6 7230006 English qualified 70 "Bach… Bache… Male Whit… 2 $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## # weights_pid <dbl>
abcPollClean1 <- abcPollClean %>%
mutate(educ5 = str_replace_all(educ5, "\\\\x92s", ""))
#checking if it removed the \x92s but now there are �
head(abcPollClean1)
## # A tibble: 6 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 High … High … Female Whit… 2 $25,…
## 2 7230002 English qualified 85 Bache… Bache… Male Whit… 2 $150…
## 3 7230003 English qualified 69 High … High … Male Whit… 2 $100…
## 4 7230004 English qualified 74 Bache… Bache… Female Whit… 1 $25,…
## 5 7230005 English qualified 77 High … High … Male Whit… 3 $10,…
## 6 7230006 English qualified 70 Bache… Bache… Male Whit… 2 $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## # weights_pid <dbl>
abcPollClean2 <- abcPollClean1 %>%
mutate(educ5 = str_replace_all(educ5, "�", ""))
#that worked to remove the odd symbols in the column data
head(abcPollClean2)
## # A tibble: 6 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 High … High … Female Whit… 2 $25,…
## 2 7230002 English qualified 85 Bache… Bache… Male Whit… 2 $150…
## 3 7230003 English qualified 69 High … High … Male Whit… 2 $100…
## 4 7230004 English qualified 74 Bache… Bache… Female Whit… 1 $25,…
## 5 7230005 English qualified 77 High … High … Male Whit… 3 $10,…
## 6 7230006 English qualified 70 Bache… Bache… Male Whit… 2 $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## # weights_pid <dbl>
#remove excess language for political affiliation category
abcPollClean3 <- abcPollClean2%>%
mutate(partyid = str_remove(QPID, "A[n]* "),
partyid = case_when(
str_detect(QPID, "Skipped")~NA_character_,
TRUE~partyid
)) %>%
select(-QPID)
Notes added directly to code.
The sample size is small that it is difficult to tell if there is a significant difference between political affiliation and if employment status may influence that.
abcPollClean3 %>%
filter(is.na(partyid)) #I see the individuals refused to be interviewed and for this challenge am making the assumption that these three individuals declined to give their political affiliation and will remove them from the visuliation
## # A tibble: 3 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230055 English qualified 74 High … High … Female Whit… 1 $75,…
## 2 7230214 English qualified 45 Some … Some … Female Whit… 3 $75,…
## 3 7230391 English qualified 18 No hi… Less … Female Blac… 5 $10,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, ABCAGE <chr>, Contact <chr>, weights_pid <dbl>,
## # partyid <chr>
#removing n/a values
abcPollClean4 <- abcPollClean3 %>%
drop_na(partyid)
ggplot(abcPollClean4, aes(x = partyid, fill = partyid)) +
geom_bar(stat = "count") +
facet_wrap(vars(employ)) +
theme_dark() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
labs(title = "Employment Status compared to age and political affiliation", x = "Political Affiliation", y = "Count", fill = "Political Affiliation")
#I want to put the two largest US political parties visually next to each other
# Define the new order of political affiliations
politicalOrder <- c("Democrat", "Republican", "Independent", "Something else")
# Convert 'partyid' to a factor with the specified order
abcPollClean4$partyid <- factor(abcPollClean4$partyid, levels = politicalOrder)
# Create the updated bar plot
ggplot(abcPollClean4, aes(x = partyid, fill = partyid)) +
geom_bar(stat = "count") +
facet_wrap(vars(employ)) +
theme_dark() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(hjust = 0.5)) +
labs(title = "Employment Status compared to political affiliation", x = "Political Affiliation", y = "Count", fill = "Political Affiliation")