loans <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-11-26/loans.csv")
## Parsed with column specification:
## cols(
## agency_name = col_character(),
## year = col_double(),
## quarter = col_double(),
## starting = col_double(),
## added = col_double(),
## total = col_double(),
## consolidation = col_double(),
## rehabilitation = col_double(),
## voluntary_payments = col_double(),
## wage_garnishments = col_double()
## )
divide_mil <- function(x) {
x = x / 1E6
}
divide_tril <- function(x) {
x = x / 1E9
}
percent_var <- function(x, y) {
y = y / x
}
loans %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_mil) %>%
arrange(desc(total))
## # A tibble: 291 x 10
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACT 18 2 9803. NA 395. 22.9
## 2 Account Co~ 17 4 10332. NA 358. 30.0
## 3 Account Co~ 18 1 9903. NA 354. 23.2
## 4 Account Co~ 17 3 10659. NA 354. 45.3
## 5 ConServe 17 4 9697. NA 348. 29.1
## 6 ConServe 18 1 9246. NA 343. 23.5
## 7 ConServe 17 3 10026. NA 341. 43.9
## 8 Account Co~ 17 2 11185. NA 332. 52.1
## 9 Account Co~ 17 1 11181. 1762. 325. 38.0
## 10 ConServe 17 1 10142. 1762. 318. 36.4
## # ... with 281 more rows, and 3 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_mil) %>%
mutate(percent_total = total / starting * 100)
## # A tibble: 291 x 11
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Account Co~ 15 4 5808. 1041. 123. 20.1
## 2 Allied Int~ 15 4 3693. NA 113. 11.5
## 3 CBE Group 15 4 2364. NA 83.9 7.38
## 4 Coast Prof~ 15 4 704. NA 99.6 3.40
## 5 Collection~ 15 4 2953. NA 75.7 8.95
## 6 Collecto, ~ 15 4 2297. NA 78.8 6.95
## 7 ConServe 15 4 4803. 1041. 149. 19.7
## 8 Delta Mana~ 15 4 2174. NA 43.8 7.00
## 9 Diversifie~ 15 4 2518. NA 60.6 8.09
## 10 ERS 15 4 952. NA 91.8 4.63
## # ... with 281 more rows, and 4 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>, percent_total <dbl>
loans %>%
count(agency_name, sort = TRUE)
## # A tibble: 42 x 2
## agency_name n
## <chr> <int>
## 1 ConServe 13
## 2 Action Financial Services 11
## 3 Central Research 11
## 4 ERS 11
## 5 Account Control Technology, Inc. 10
## 6 Bass and Associates 9
## 7 Coast Professional, Inc. 9
## 8 FH Cann and Associates 9
## 9 FMS Investment Corp 9
## 10 GC Services LP 9
## # ... with 32 more rows
DT1 <- loans %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_mil) %>%
filter(agency_name == "ConServe")
library(ggplot2)
DT1 %>%
mutate(year_q = paste0(year, "_", quarter)) %>%
ggplot(aes(x = year_q, y = starting)) +
geom_point() +
geom_line()
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
loans %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_mil) %>%
select(agency_name, year, quarter, starting) %>%
mutate(year_q = paste0(year, "_", quarter)) %>%
ggplot(aes(x = year_q, y = starting, color = agency_name)) +
geom_point()
## Warning: Removed 9 rows containing missing values (geom_point).
loans %>%
arrange(desc(agency_name)) %>% pull(agency_name) %>% unique() %>%
View()
loans %>%
filter(str_detect(agency_name, "Windham"))
## # A tibble: 13 x 10
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Windham Pr~ 15 4 4.57e 9 1.04e9 1.21e8 19228981.
## 2 Windham Pr~ 16 1 5.54e 9 9.87e8 1.49e8 24631930
## 3 Windham Pr~ 16 2 6.40e 9 5.69e8 2.03e8 31373865.
## 4 Windham Pr~ 16 3 6.72e 9 2.24e9 2.55e8 33915336.
## 5 Windham Pr~ 16 4 8.58e 9 1.65e9 2.54e8 39913680.
## 6 Windham Pr~ 17 1 1.00e10 1.76e9 2.82e8 35346217.
## 7 Windham Pr~ 17 2 8.47e 9 NA 2.33e8 37079215.
## 8 Windham Pr~ 17 3 8.06e 9 NA 2.70e8 29380228.
## 9 Windham 17 4 7.79e 9 NA 2.78e8 22035921.
## 10 Windham Pr~ 18 1 7.42e 9 NA 2.66e8 17395610.
## 11 Windham 18 2 7.35e 9 NA 2.00e8 17280439.
## 12 Windham 18 3 7.10e 9 NA 1.71e8 17347352.
## 13 Windham 18 4 6.90e 9 NA 1.52e8 7648379.
## # ... with 3 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>%
filter(agency_name %in% c("Account Control Technology, Inc.", "ACT"))
## # A tibble: 13 x 10
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Account Co~ 15 4 5.81e 9 1.04e9 1.23e8 20081894.
## 2 Account Co~ 16 1 6.80e 9 9.87e8 1.84e8 23239061.
## 3 Account Co~ 16 2 7.64e 9 5.69e8 2.30e8 35431325.
## 4 Account Co~ 16 3 7.95e 9 2.24e9 3.06e8 38180002.
## 5 Account Co~ 16 4 9.75e 9 1.65e9 2.82e8 39679424.
## 6 Account Co~ 17 1 1.12e10 1.76e9 3.25e8 37989717.
## 7 Account Co~ 17 2 1.12e10 NA 3.32e8 52126784.
## 8 Account Co~ 17 3 1.07e10 NA 3.54e8 45340705.
## 9 Account Co~ 17 4 1.03e10 NA 3.58e8 29977209.
## 10 Account Co~ 18 1 9.90e 9 NA 3.54e8 23214940.
## 11 ACT 18 2 9.80e 9 NA 3.95e8 22850758.
## 12 ACT 18 3 9.37e 9 NA 2.81e8 21002430.
## 13 ACT 18 4 9.09e 9 NA 2.71e8 9619977.
## # ... with 3 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>%
filter(agency_name %in% c("FMS", "FMS Investment Corp", "Financial Asset Management Systems, Inc."))
## # A tibble: 20 x 10
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Financial ~ 15 4 1.00e 9 NA 5.19e7 2868015.
## 2 FMS Invest~ 15 4 4.81e 9 1.04e9 1.48e8 20018885.
## 3 Financial ~ 16 1 9.72e 8 NA 4.76e7 2298349.
## 4 FMS Invest~ 16 1 5.77e 9 9.87e8 1.71e8 23948303.
## 5 Financial ~ 16 2 9.24e 8 NA 4.04e7 2841371.
## 6 FMS Invest~ 16 2 6.60e 9 5.69e8 2.20e8 31481056.
## 7 Financial ~ 16 3 8.91e 8 NA 3.15e7 2699873.
## 8 FMS Invest~ 16 3 6.92e 9 2.24e9 2.61e8 34826000.
## 9 Financial ~ 16 4 2.13e 8 NA 1.75e7 271033.
## 10 FMS Invest~ 16 4 8.76e 9 1.65e9 2.40e8 35030272.
## 11 Financial ~ 17 1 1.22e 8 NA 1.78e7 269607.
## 12 FMS Invest~ 17 1 1.02e10 1.76e9 2.94e8 36139648.
## 13 Financial ~ 17 2 9.43e 7 NA 7.43e6 107265.
## 14 FMS Invest~ 17 2 1.05e10 NA 2.42e8 52340470.
## 15 FMS Invest~ 17 3 1.01e10 NA 2.99e8 43495127.
## 16 FMS 17 4 9.82e 9 NA 2.87e8 32768434.
## 17 FMS Invest~ 18 1 9.53e 9 NA 2.76e8 24214027.
## 18 FMS 18 2 9.46e 9 NA 2.67e8 27904065.
## 19 FMS 18 3 9.17e 9 NA 2.26e8 22455269.
## 20 FMS 18 4 8.95e 9 NA 2.09e8 9724624.
## # ... with 3 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>%
filter(agency_name %in% c("Pioneer Credit Recovery, Inc", "Pioneer"))
## # A tibble: 11 x 10
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Pioneer Cr~ 15 4 6.63e8 NA 7.06e7 3085110.
## 2 Pioneer Cr~ 16 1 6.08e8 NA 5.58e7 1858437.
## 3 Pioneer Cr~ 16 2 2.67e8 NA 1.98e7 1027051.
## 4 Pioneer Cr~ 16 3 2.13e8 NA 1.63e7 861171.
## 5 Pioneer Cr~ 16 4 1.93e8 NA 1.04e7 729499.
## 6 Pioneer Cr~ 17 1 1.31e8 NA 1.01e7 432938.
## 7 Pioneer Cr~ 17 2 1.12e8 NA 4.65e6 607714.
## 8 Pioneer Cr~ 18 1 NA 3.56e9 1.94e6 1698413.
## 9 Pioneer 18 2 3.57e9 5.36e8 2.74e7 22558336.
## 10 Pioneer 18 3 3.92e9 NA 3.37e7 26016031.
## 11 Pioneer 18 4 3.90e9 9.46e9 1.05e8 27306635.
## # ... with 3 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>%
filter(str_detect(agency_name, "GC Services"))
## # A tibble: 13 x 10
## agency_name year quarter starting added total consolidation
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GC Service~ 15 4 4.46e 9 1.04e9 1.10e8 16529578.
## 2 GC Service~ 16 1 5.44e 9 9.87e8 1.30e8 21940761.
## 3 GC Service~ 16 2 6.32e 9 5.69e8 1.68e8 26898015.
## 4 GC Service~ 16 3 6.67e 9 2.24e9 2.32e8 34287491.
## 5 GC Service~ 16 4 8.56e 9 1.65e9 2.10e8 35371922.
## 6 GC Service~ 17 1 1.00e10 1.76e9 2.40e8 36136069.
## 7 GC Service~ 17 2 1.01e10 NA 2.45e8 48161564.
## 8 GC Service~ 17 3 9.64e 9 NA 2.71e8 38634012.
## 9 GC Services 17 4 9.37e 9 NA 2.76e8 25062946.
## 10 GC Service~ 18 1 9.01e 9 NA 2.88e8 20977152.
## 11 GC Services 18 2 8.95e 9 NA 2.54e8 25229551.
## 12 GC Services 18 3 8.65e 9 NA 2.24e8 21285172.
## 13 GC Services 18 4 8.41e 9 NA 2.09e8 7520996.
## # ... with 3 more variables: rehabilitation <dbl>,
## # voluntary_payments <dbl>, wage_garnishments <dbl>
loans_clean <- loans %>%
mutate(agency_name = case_when(
agency_name == "ACT" ~ "Account Control Technology, Inc.",
agency_name == "Windham" ~ "Windham Professionals, Inc.",
agency_name == "FMS" ~ "FMS Investment Corp",
agency_name == "Pioneer" ~ "Pioneer Credit Recovery, Inc",
agency_name == "GC Services" ~ "GC Services LP",
TRUE ~ agency_name
)) %>%
mutate(agency_name = str_remove_all(agency_name, "[[:punct:]]")) %>%
mutate(agency_name = str_remove_all(agency_name, regex("Inc", ignore_case = TRUE))) %>%
mutate(agency_name = str_trim(agency_name)) # %>%
# pull(agency_name) %>% unique() %>% View()
Then clean some NA’s
loans_clean %>%
arrange(agency_name, year, quarter) %>%
View()
loans_clean <- loans_clean %>%
mutate(starting = if_else(is.na(starting) == TRUE, 0, starting)) # %>% View()
loans_clean %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_tril) %>%
select(agency_name, year, quarter, starting) %>%
group_by(year, quarter) %>%
summarise(
starting = sum(starting)
) %>%
mutate(year_q = as.numeric(paste0(year, quarter))) %>%
mutate(year_q_fct = factor(year_q,
levels = c(154, 161:164, 171:174, 181:184),
labels = c("2015 Q4",
paste0("2016 Q", 1:4),
paste0("2017 Q", 1:4),
paste0("2018 Q", 1:4)
))
) %>%
ggplot(aes(x = year_q_fct, y = starting, group = 1)) +
geom_point() +
geom_line() +
geom_label(aes(label = round(starting, 0))
# , hjust = 0, nudge_y = 0.05
) +
theme_bw() +
# scale_y_continuous(limits = c(6E04, 1E05, 1.4E05)) +
# scale_x_date("Year and Quarter",
# breaks = scales::date_breaks("3 months"),
# labels = "1:17")
# theme(axis.title.x = element_text(angle = -45, vjust = 0.5)) +
labs(title = "Overall Student Loans over time for 2015 Quarter 4 to 2018 Quarter 4",
subtitle = "Made by Jason Tang with ggplot2",
x = "Year and Quarter",
y = "$ in trillions")
# +
loans_clean %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_mil) %>%
select(agency_name, year, quarter, starting) %>%
mutate(year_q = paste0(year, "_", quarter)) %>%
ggplot(aes(x = year_q, y = starting, color = agency_name, group = agency_name)) +
geom_point() +
geom_line() +
theme_bw() +
labs(title = "Student Loans over time for 22 collection agencies",
subtitle = "@jasontang using ggplot2 #tidytuesday",
x = "Year and quarter",
y = "$ in millions")
ggp1 <- loans_clean %>%
mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments),
divide_mil) %>%
select(agency_name, year, quarter, starting, total) %>%
mutate(percent_total = round( total * 100 / starting, 1 )) %>%
mutate(year_q = paste0(year, "_", quarter)) %>%
ggplot(aes(x = year_q, y = percent_total, group = agency_name, color = agency_name)) +
geom_point() +
geom_line() # +
# expand_limits(y = 0)
ggplotly(ggp1)
## This version of Shiny is designed to work with 'htmlwidgets' >= 1.5.
## Please upgrade via install.packages('htmlwidgets').