fedFundsRate <- read_csv("challenge_datasets/FedFundsRate.csv")
## Rows: 904 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): Year, Month, Day, Federal Funds Target Rate, Federal Funds Upper T...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(fedFundsRate)
## # A tibble: 6 × 10
## Year Month Day `Federal Funds Target Rate` `Federal Funds Upper Target`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1954 7 1 NA NA
## 2 1954 8 1 NA NA
## 3 1954 9 1 NA NA
## 4 1954 10 1 NA NA
## 5 1954 11 1 NA NA
## 6 1954 12 1 NA NA
## # ℹ 5 more variables: `Federal Funds Lower Target` <dbl>,
## # `Effective Federal Funds Rate` <dbl>, `Real GDP (Percent Change)` <dbl>,
## # `Unemployment Rate` <dbl>, `Inflation Rate` <dbl>
fedFundsRateClean <- fedFundsRate %>%
mutate(date = str_c(Year, Month, Day, sep="-"),
date = ymd(date))
#checking to see if it worked
fedFundsRateClean %>%
select(date)
## # A tibble: 904 × 1
## date
## <date>
## 1 1954-07-01
## 2 1954-08-01
## 3 1954-09-01
## 4 1954-10-01
## 5 1954-11-01
## 6 1954-12-01
## 7 1955-01-01
## 8 1955-02-01
## 9 1955-03-01
## 10 1955-04-01
## # ℹ 894 more rows
Looking to see inflation rate over the years. There are two spikes in inflation during the 1970’s and one spike in 1980 with the general trend of inflation decreasing after that point.
fedFundsRateClean %>%
filter(!is.na(`Inflation Rate`)) %>%
ggplot(aes(date, `Inflation Rate`)) +
geom_point() +
scale_x_date(limits = range(fedFundsRateClean$date), breaks = "5 years", labels = scales::date_format("%Y")) +
scale_y_continuous(labels = scales::label_percent(scale = 1), n.breaks = 5) +
theme_dark() +
labs(title = "Inflation over time", x = "Year", y = "Inflation Rate")
abcPoll <- read_csv("challenge_datasets/abc_poll_2021.csv")
## Rows: 527 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): xspanish, complete_status, ppeduc5, ppeducat, ppgender, ppethm, pp...
## dbl (3): id, ppage, weights_pid
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(abcPoll)
## # A tibble: 6 × 31
## id xspanish complete_status ppage ppeduc5 ppeducat ppgender ppethm
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 "High school … High sc… Female White…
## 2 7230002 English qualified 85 "Bachelor\x92… Bachelo… Male White…
## 3 7230003 English qualified 69 "High school … High sc… Male White…
## 4 7230004 English qualified 74 "Bachelor\x92… Bachelo… Female White…
## 5 7230005 English qualified 77 "High school … High sc… Male White…
## 6 7230006 English qualified 70 "Bachelor\x92… Bachelo… Male White…
## # ℹ 23 more variables: pphhsize <chr>, ppinc7 <chr>, ppmarit5 <chr>,
## # ppmsacat <chr>, ppreg4 <chr>, pprent <chr>, ppstaten <chr>, PPWORKA <chr>,
## # ppemploy <chr>, Q1_a <chr>, Q1_b <chr>, Q1_c <chr>, Q1_d <chr>, Q1_e <chr>,
## # Q1_f <chr>, Q2 <chr>, Q3 <chr>, Q4 <chr>, Q5 <chr>, QPID <chr>,
## # ABCAGE <chr>, Contact <chr>, weights_pid <dbl>
removing pp before the demographic information to make the columns easier to read
abcPollClean <- abcPoll %>%
rename_all(~gsub('pp', '', .))
# Checking to see that 'pp' was removed
head(abcPollClean)
## # A tibble: 6 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 "High… High … Female Whit… 2 $25,…
## 2 7230002 English qualified 85 "Bach… Bache… Male Whit… 2 $150…
## 3 7230003 English qualified 69 "High… High … Male Whit… 2 $100…
## 4 7230004 English qualified 74 "Bach… Bache… Female Whit… 1 $25,…
## 5 7230005 English qualified 77 "High… High … Male Whit… 3 $10,…
## 6 7230006 English qualified 70 "Bach… Bache… Male Whit… 2 $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## # weights_pid <dbl>
Removing 92s from the education level question (ppeduc5)
abcPollClean1 <- abcPollClean %>%
mutate(educ5 = str_replace_all(educ5, "\\\\x92s", ""))
#checking if it removed the \x92s but now there are �
head(abcPollClean1)
## # A tibble: 6 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 High … High … Female Whit… 2 $25,…
## 2 7230002 English qualified 85 Bache… Bache… Male Whit… 2 $150…
## 3 7230003 English qualified 69 High … High … Male Whit… 2 $100…
## 4 7230004 English qualified 74 Bache… Bache… Female Whit… 1 $25,…
## 5 7230005 English qualified 77 High … High … Male Whit… 3 $10,…
## 6 7230006 English qualified 70 Bache… Bache… Male Whit… 2 $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## # weights_pid <dbl>
abcPollClean2 <- abcPollClean1 %>%
mutate(educ5 = str_replace_all(educ5, "�", ""))
#that worked to remove the odd symbols in the column data
head(abcPollClean2)
## # A tibble: 6 × 31
## id xspanish complete_status age educ5 educat gender ethm hhsize inc7
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7230001 English qualified 68 High … High … Female Whit… 2 $25,…
## 2 7230002 English qualified 85 Bache… Bache… Male Whit… 2 $150…
## 3 7230003 English qualified 69 High … High … Male Whit… 2 $100…
## 4 7230004 English qualified 74 Bache… Bache… Female Whit… 1 $25,…
## 5 7230005 English qualified 77 High … High … Male Whit… 3 $10,…
## 6 7230006 English qualified 70 Bache… Bache… Male Whit… 2 $75,…
## # ℹ 21 more variables: marit5 <chr>, msacat <chr>, reg4 <chr>, rent <chr>,
## # staten <chr>, PPWORKA <chr>, employ <chr>, Q1_a <chr>, Q1_b <chr>,
## # Q1_c <chr>, Q1_d <chr>, Q1_e <chr>, Q1_f <chr>, Q2 <chr>, Q3 <chr>,
## # Q4 <chr>, Q5 <chr>, QPID <chr>, ABCAGE <chr>, Contact <chr>,
## # weights_pid <dbl>
Since education goes in a certain order, I will make the education order into a factor to put it in the desired order and make a new column and remove educat column
educationOrder <- c("Less than high school",
"High school",
"Some college",
"Bachelors degree",
"Master's degree or highter")
abcPollClean3 <- abcPollClean2%>%
mutate(Education = factor(educat,
levels=educationOrder)) %>%
select(-educat)
Then I will take the abcPollClean3 to graph into a pie chart using geom_bar with the width set to one to make it into a circle. Use coord_polar(“y”) to plot the values from the y-axis for each education category. I lastly gave the legend and chart labels.
abcPollClean3 %>%
ggplot(aes(x = "", y = Education)) +
geom_bar(stat = "identity", width = 1, aes(fill = educ5)) +
coord_polar("y") +
labs(title = "Breakdown of education levels in sample", fill = "Education") +
theme_void() +
theme(legend.position = "right")
I would prefer this to have labels and have those labels be proportions rather than raw numbers. I made a new data frame with just the education level and the percentage of each of the education levels as eduPercent. I then used that data frame into a ggplot as I did before but used the labels column (which has the percentage information) to label the pie chart.
#Make the percentages
eduPercent <- abcPollClean3 %>%
group_by(educ5) %>% # Variable to be transformed
count() %>%
ungroup() %>%
mutate(perc = `n` / sum(`n`)) %>%
arrange(perc) %>%
mutate(labels = scales::percent(perc))
eduPercent %>%
ggplot(aes(x = "", y = educ5)) +
geom_bar(stat = "identity", width = 1, aes(fill = educ5)) +
geom_text(aes(label = `labels`),
position = position_stack(vjust = 0.5),
size = 3,
color = "black",
angle = 0,
fontface = "bold",
hjust = .25) +
coord_polar("y") +
labs(title = "Breakdown of education levels in sample", fill = "Education") +
theme_void() +
theme(legend.position = "right")