Challenge 9

Sorry for submitting this late!

Function to Read in and Clean Dataset

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)

read_and_clean <- function(path){
  debt <- read_excel(path)
  debt <- debt %>%
    mutate(`Year and Quarter` = yq(`Year and Quarter`)) %>%
    rename(Date = `Year and Quarter`)
  return(debt)
}
debt <- read_and_clean("challenge_datasets/debt_in_trillions.xlsx")
debt
# A tibble: 74 × 8
   Date       Mortgage `HE Revolving` `Auto Loan` `Credit Card` `Student Loan`
   <date>        <dbl>          <dbl>       <dbl>         <dbl>          <dbl>
 1 2003-01-01     4.94          0.242       0.641         0.688          0.241
 2 2003-04-01     5.08          0.26        0.622         0.693          0.243
 3 2003-07-01     5.18          0.269       0.684         0.693          0.249
 4 2003-10-01     5.66          0.302       0.704         0.698          0.253
 5 2004-01-01     5.84          0.328       0.72          0.695          0.260
 6 2004-04-01     5.97          0.367       0.743         0.697          0.263
 7 2004-07-01     6.21          0.426       0.751         0.706          0.33 
 8 2004-10-01     6.36          0.468       0.728         0.717          0.346
 9 2005-01-01     6.51          0.502       0.725         0.71           0.364
10 2005-04-01     6.70          0.528       0.774         0.717          0.374
# ℹ 64 more rows
# ℹ 2 more variables: Other <dbl>, Total <dbl>

Function to Compute Summary Statistics

Computes the z_score of a vector if it is numeric.

z_score <- function(vec) {
  if (is.numeric(vec)) {
      mu <- mean(vec)
      s <- sd(vec)
      return((vec - mu) / s)
  }
}

z_score(debt$Mortgage)
 [1] -2.812097806 -2.695629898 -2.608700951 -2.206127094 -2.054212430
 [6] -1.947028196 -1.741943400 -1.615347848 -1.487064354 -1.331773810
[11] -1.154540036 -0.989121847 -0.707235750 -0.433789356 -0.193257805
[16] -0.033747409  0.124919017  0.364606597  0.549436104  0.697974886
[21]  0.810222942  0.843137786  0.860861163  0.829634260  0.726669878
[26]  0.665904012  0.565471540  0.480230535  0.472634802  0.362074686
[31]  0.282741473  0.150238128  0.227883400  0.204252230  0.108039610
[36] -0.005052417 -0.073414016 -0.107172830 -0.207605301 -0.203385450
[41] -0.288626455 -0.365427757 -0.318165417 -0.189881924 -0.091981363
[46] -0.150215318 -0.120676355 -0.087761512 -0.086917541 -0.133335911
[51] -0.011804180 -0.021087854  0.080188588  0.074280796  0.064153152
[56]  0.173869297  0.297932939  0.351947042  0.395833500  0.513145379
[61]  0.561251689  0.611889910  0.730889729  0.717386204  0.818662646
[66]  0.955385843  0.981548924  1.082825366  1.214484741  1.267654873
[71]  1.339392353  1.492994957  1.591739488  1.829739127

Function to Plot a Histogram

make_hist <- function(vec, x_label, title, bins=10) {
  ggplot(as.data.frame(vec), aes(x = vec)) +
    geom_histogram(bins = bins) +
    labs(title = title, x = x_label, y = "Count")
}

make_hist(debt$Mortgage, "Mortgage Debt in Trillions", "Histogram of Mortgage Debt in Trillions")