Sorry for submitting this late!
Function to Read in and Clean Dataset
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
read_and_clean <- function(path){
debt <- read_excel(path)
debt <- debt %>%
mutate(`Year and Quarter` = yq(`Year and Quarter`)) %>%
rename(Date = `Year and Quarter`)
return(debt)
}
debt <- read_and_clean("challenge_datasets/debt_in_trillions.xlsx")
debt
# A tibble: 74 × 8
Date Mortgage `HE Revolving` `Auto Loan` `Credit Card` `Student Loan`
<date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2003-01-01 4.94 0.242 0.641 0.688 0.241
2 2003-04-01 5.08 0.26 0.622 0.693 0.243
3 2003-07-01 5.18 0.269 0.684 0.693 0.249
4 2003-10-01 5.66 0.302 0.704 0.698 0.253
5 2004-01-01 5.84 0.328 0.72 0.695 0.260
6 2004-04-01 5.97 0.367 0.743 0.697 0.263
7 2004-07-01 6.21 0.426 0.751 0.706 0.33
8 2004-10-01 6.36 0.468 0.728 0.717 0.346
9 2005-01-01 6.51 0.502 0.725 0.71 0.364
10 2005-04-01 6.70 0.528 0.774 0.717 0.374
# ℹ 64 more rows
# ℹ 2 more variables: Other <dbl>, Total <dbl>
Function to Compute Summary Statistics
Computes the z_score of a vector if it is numeric.
z_score <- function(vec) {
if (is.numeric(vec)) {
mu <- mean(vec)
s <- sd(vec)
return((vec - mu) / s)
}
}
z_score(debt$Mortgage)
[1] -2.812097806 -2.695629898 -2.608700951 -2.206127094 -2.054212430
[6] -1.947028196 -1.741943400 -1.615347848 -1.487064354 -1.331773810
[11] -1.154540036 -0.989121847 -0.707235750 -0.433789356 -0.193257805
[16] -0.033747409 0.124919017 0.364606597 0.549436104 0.697974886
[21] 0.810222942 0.843137786 0.860861163 0.829634260 0.726669878
[26] 0.665904012 0.565471540 0.480230535 0.472634802 0.362074686
[31] 0.282741473 0.150238128 0.227883400 0.204252230 0.108039610
[36] -0.005052417 -0.073414016 -0.107172830 -0.207605301 -0.203385450
[41] -0.288626455 -0.365427757 -0.318165417 -0.189881924 -0.091981363
[46] -0.150215318 -0.120676355 -0.087761512 -0.086917541 -0.133335911
[51] -0.011804180 -0.021087854 0.080188588 0.074280796 0.064153152
[56] 0.173869297 0.297932939 0.351947042 0.395833500 0.513145379
[61] 0.561251689 0.611889910 0.730889729 0.717386204 0.818662646
[66] 0.955385843 0.981548924 1.082825366 1.214484741 1.267654873
[71] 1.339392353 1.492994957 1.591739488 1.829739127
Function to Plot a Histogram
make_hist <- function(vec, x_label, title, bins=10) {
ggplot(as.data.frame(vec), aes(x = vec)) +
geom_histogram(bins = bins) +
labs(title = title, x = x_label, y = "Count")
}
make_hist(debt$Mortgage, "Mortgage Debt in Trillions", "Histogram of Mortgage Debt in Trillions")