loans <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-11-26/loans.csv")
## Parsed with column specification:
## cols(
##   agency_name = col_character(),
##   year = col_double(),
##   quarter = col_double(),
##   starting = col_double(),
##   added = col_double(),
##   total = col_double(),
##   consolidation = col_double(),
##   rehabilitation = col_double(),
##   voluntary_payments = col_double(),
##   wage_garnishments = col_double()
## )

For this year

divide_mil <- function(x) {
  x = x / 1E6
}

divide_tril <- function(x) {
  x = x / 1E9
}

percent_var <- function(x, y) {
  y = y / x
}


loans %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_mil) %>% 
  arrange(desc(total))
## # A tibble: 291 x 10
##    agency_name  year quarter starting added total consolidation
##    <chr>       <dbl>   <dbl>    <dbl> <dbl> <dbl>         <dbl>
##  1 ACT            18       2    9803.   NA   395.          22.9
##  2 Account Co~    17       4   10332.   NA   358.          30.0
##  3 Account Co~    18       1    9903.   NA   354.          23.2
##  4 Account Co~    17       3   10659.   NA   354.          45.3
##  5 ConServe       17       4    9697.   NA   348.          29.1
##  6 ConServe       18       1    9246.   NA   343.          23.5
##  7 ConServe       17       3   10026.   NA   341.          43.9
##  8 Account Co~    17       2   11185.   NA   332.          52.1
##  9 Account Co~    17       1   11181. 1762.  325.          38.0
## 10 ConServe       17       1   10142. 1762.  318.          36.4
## # ... with 281 more rows, and 3 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_mil) %>% 
  mutate(percent_total = total / starting * 100)
## # A tibble: 291 x 11
##    agency_name  year quarter starting added total consolidation
##    <chr>       <dbl>   <dbl>    <dbl> <dbl> <dbl>         <dbl>
##  1 Account Co~    15       4    5808. 1041. 123.          20.1 
##  2 Allied Int~    15       4    3693.   NA  113.          11.5 
##  3 CBE Group      15       4    2364.   NA   83.9          7.38
##  4 Coast Prof~    15       4     704.   NA   99.6          3.40
##  5 Collection~    15       4    2953.   NA   75.7          8.95
##  6 Collecto, ~    15       4    2297.   NA   78.8          6.95
##  7 ConServe       15       4    4803. 1041. 149.          19.7 
##  8 Delta Mana~    15       4    2174.   NA   43.8          7.00
##  9 Diversifie~    15       4    2518.   NA   60.6          8.09
## 10 ERS            15       4     952.   NA   91.8          4.63
## # ... with 281 more rows, and 4 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>, percent_total <dbl>
loans %>% 
  count(agency_name, sort = TRUE)
## # A tibble: 42 x 2
##    agency_name                          n
##    <chr>                            <int>
##  1 ConServe                            13
##  2 Action Financial Services           11
##  3 Central Research                    11
##  4 ERS                                 11
##  5 Account Control Technology, Inc.    10
##  6 Bass and Associates                  9
##  7 Coast Professional, Inc.             9
##  8 FH Cann and Associates               9
##  9 FMS Investment Corp                  9
## 10 GC Services LP                       9
## # ... with 32 more rows
DT1 <- loans %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_mil) %>% 
  filter(agency_name == "ConServe")
library(ggplot2)
DT1 %>% 
  mutate(year_q = paste0(year, "_", quarter)) %>% 
  ggplot(aes(x = year_q, y = starting)) +
  geom_point() + 
  geom_line()
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

loans %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_mil) %>% 
  select(agency_name, year, quarter, starting) %>% 
  mutate(year_q = paste0(year, "_", quarter)) %>% 
  ggplot(aes(x = year_q, y = starting, color = agency_name)) +
  geom_point()
## Warning: Removed 9 rows containing missing values (geom_point).

Some cleaning of names

loans %>% 
  arrange(desc(agency_name)) %>% pull(agency_name) %>% unique() %>% 
  View()
loans %>% 
  filter(str_detect(agency_name, "Windham"))
## # A tibble: 13 x 10
##    agency_name  year quarter starting   added  total consolidation
##    <chr>       <dbl>   <dbl>    <dbl>   <dbl>  <dbl>         <dbl>
##  1 Windham Pr~    15       4  4.57e 9  1.04e9 1.21e8     19228981.
##  2 Windham Pr~    16       1  5.54e 9  9.87e8 1.49e8     24631930 
##  3 Windham Pr~    16       2  6.40e 9  5.69e8 2.03e8     31373865.
##  4 Windham Pr~    16       3  6.72e 9  2.24e9 2.55e8     33915336.
##  5 Windham Pr~    16       4  8.58e 9  1.65e9 2.54e8     39913680.
##  6 Windham Pr~    17       1  1.00e10  1.76e9 2.82e8     35346217.
##  7 Windham Pr~    17       2  8.47e 9 NA      2.33e8     37079215.
##  8 Windham Pr~    17       3  8.06e 9 NA      2.70e8     29380228.
##  9 Windham        17       4  7.79e 9 NA      2.78e8     22035921.
## 10 Windham Pr~    18       1  7.42e 9 NA      2.66e8     17395610.
## 11 Windham        18       2  7.35e 9 NA      2.00e8     17280439.
## 12 Windham        18       3  7.10e 9 NA      1.71e8     17347352.
## 13 Windham        18       4  6.90e 9 NA      1.52e8      7648379.
## # ... with 3 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>% 
  filter(agency_name %in% c("Account Control Technology, Inc.", "ACT"))
## # A tibble: 13 x 10
##    agency_name  year quarter starting   added  total consolidation
##    <chr>       <dbl>   <dbl>    <dbl>   <dbl>  <dbl>         <dbl>
##  1 Account Co~    15       4  5.81e 9  1.04e9 1.23e8     20081894.
##  2 Account Co~    16       1  6.80e 9  9.87e8 1.84e8     23239061.
##  3 Account Co~    16       2  7.64e 9  5.69e8 2.30e8     35431325.
##  4 Account Co~    16       3  7.95e 9  2.24e9 3.06e8     38180002.
##  5 Account Co~    16       4  9.75e 9  1.65e9 2.82e8     39679424.
##  6 Account Co~    17       1  1.12e10  1.76e9 3.25e8     37989717.
##  7 Account Co~    17       2  1.12e10 NA      3.32e8     52126784.
##  8 Account Co~    17       3  1.07e10 NA      3.54e8     45340705.
##  9 Account Co~    17       4  1.03e10 NA      3.58e8     29977209.
## 10 Account Co~    18       1  9.90e 9 NA      3.54e8     23214940.
## 11 ACT            18       2  9.80e 9 NA      3.95e8     22850758.
## 12 ACT            18       3  9.37e 9 NA      2.81e8     21002430.
## 13 ACT            18       4  9.09e 9 NA      2.71e8      9619977.
## # ... with 3 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>% 
  filter(agency_name %in% c("FMS", "FMS Investment Corp", "Financial Asset Management Systems, Inc."))
## # A tibble: 20 x 10
##    agency_name  year quarter starting   added  total consolidation
##    <chr>       <dbl>   <dbl>    <dbl>   <dbl>  <dbl>         <dbl>
##  1 Financial ~    15       4  1.00e 9 NA      5.19e7      2868015.
##  2 FMS Invest~    15       4  4.81e 9  1.04e9 1.48e8     20018885.
##  3 Financial ~    16       1  9.72e 8 NA      4.76e7      2298349.
##  4 FMS Invest~    16       1  5.77e 9  9.87e8 1.71e8     23948303.
##  5 Financial ~    16       2  9.24e 8 NA      4.04e7      2841371.
##  6 FMS Invest~    16       2  6.60e 9  5.69e8 2.20e8     31481056.
##  7 Financial ~    16       3  8.91e 8 NA      3.15e7      2699873.
##  8 FMS Invest~    16       3  6.92e 9  2.24e9 2.61e8     34826000.
##  9 Financial ~    16       4  2.13e 8 NA      1.75e7       271033.
## 10 FMS Invest~    16       4  8.76e 9  1.65e9 2.40e8     35030272.
## 11 Financial ~    17       1  1.22e 8 NA      1.78e7       269607.
## 12 FMS Invest~    17       1  1.02e10  1.76e9 2.94e8     36139648.
## 13 Financial ~    17       2  9.43e 7 NA      7.43e6       107265.
## 14 FMS Invest~    17       2  1.05e10 NA      2.42e8     52340470.
## 15 FMS Invest~    17       3  1.01e10 NA      2.99e8     43495127.
## 16 FMS            17       4  9.82e 9 NA      2.87e8     32768434.
## 17 FMS Invest~    18       1  9.53e 9 NA      2.76e8     24214027.
## 18 FMS            18       2  9.46e 9 NA      2.67e8     27904065.
## 19 FMS            18       3  9.17e 9 NA      2.26e8     22455269.
## 20 FMS            18       4  8.95e 9 NA      2.09e8      9724624.
## # ... with 3 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>% 
  filter(agency_name %in% c("Pioneer Credit Recovery, Inc", "Pioneer"))
## # A tibble: 11 x 10
##    agency_name  year quarter starting   added  total consolidation
##    <chr>       <dbl>   <dbl>    <dbl>   <dbl>  <dbl>         <dbl>
##  1 Pioneer Cr~    15       4   6.63e8 NA      7.06e7      3085110.
##  2 Pioneer Cr~    16       1   6.08e8 NA      5.58e7      1858437.
##  3 Pioneer Cr~    16       2   2.67e8 NA      1.98e7      1027051.
##  4 Pioneer Cr~    16       3   2.13e8 NA      1.63e7       861171.
##  5 Pioneer Cr~    16       4   1.93e8 NA      1.04e7       729499.
##  6 Pioneer Cr~    17       1   1.31e8 NA      1.01e7       432938.
##  7 Pioneer Cr~    17       2   1.12e8 NA      4.65e6       607714.
##  8 Pioneer Cr~    18       1  NA       3.56e9 1.94e6      1698413.
##  9 Pioneer        18       2   3.57e9  5.36e8 2.74e7     22558336.
## 10 Pioneer        18       3   3.92e9 NA      3.37e7     26016031.
## 11 Pioneer        18       4   3.90e9  9.46e9 1.05e8     27306635.
## # ... with 3 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>
loans %>% 
  filter(str_detect(agency_name, "GC Services"))
## # A tibble: 13 x 10
##    agency_name  year quarter starting   added  total consolidation
##    <chr>       <dbl>   <dbl>    <dbl>   <dbl>  <dbl>         <dbl>
##  1 GC Service~    15       4  4.46e 9  1.04e9 1.10e8     16529578.
##  2 GC Service~    16       1  5.44e 9  9.87e8 1.30e8     21940761.
##  3 GC Service~    16       2  6.32e 9  5.69e8 1.68e8     26898015.
##  4 GC Service~    16       3  6.67e 9  2.24e9 2.32e8     34287491.
##  5 GC Service~    16       4  8.56e 9  1.65e9 2.10e8     35371922.
##  6 GC Service~    17       1  1.00e10  1.76e9 2.40e8     36136069.
##  7 GC Service~    17       2  1.01e10 NA      2.45e8     48161564.
##  8 GC Service~    17       3  9.64e 9 NA      2.71e8     38634012.
##  9 GC Services    17       4  9.37e 9 NA      2.76e8     25062946.
## 10 GC Service~    18       1  9.01e 9 NA      2.88e8     20977152.
## 11 GC Services    18       2  8.95e 9 NA      2.54e8     25229551.
## 12 GC Services    18       3  8.65e 9 NA      2.24e8     21285172.
## 13 GC Services    18       4  8.41e 9 NA      2.09e8      7520996.
## # ... with 3 more variables: rehabilitation <dbl>,
## #   voluntary_payments <dbl>, wage_garnishments <dbl>
loans_clean <- loans %>% 
  mutate(agency_name = case_when(
    agency_name == "ACT" ~ "Account Control Technology, Inc.", 
    agency_name == "Windham" ~ "Windham Professionals, Inc.",
    agency_name == "FMS" ~ "FMS Investment Corp",
    agency_name == "Pioneer" ~ "Pioneer Credit Recovery, Inc",
    agency_name == "GC Services" ~ "GC Services LP",
    TRUE ~ agency_name
  )) %>% 
  mutate(agency_name = str_remove_all(agency_name, "[[:punct:]]")) %>% 
  mutate(agency_name = str_remove_all(agency_name, regex("Inc", ignore_case = TRUE))) %>% 
  mutate(agency_name = str_trim(agency_name)) # %>% 
  # pull(agency_name) %>% unique() %>% View()

Then clean some NA’s

loans_clean %>% 
  arrange(agency_name, year, quarter) %>% 
  View()
loans_clean <- loans_clean %>% 
  mutate(starting  = if_else(is.na(starting) == TRUE, 0, starting)) # %>% View()

Data_visualization

Graph for overall student loans

loans_clean %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_tril) %>% 
  select(agency_name, year, quarter, starting) %>% 
  group_by(year, quarter) %>% 
  summarise(
    starting = sum(starting)
  ) %>% 
  mutate(year_q = as.numeric(paste0(year, quarter)))  %>% 
  mutate(year_q_fct = factor(year_q, 
                             levels = c(154, 161:164, 171:174, 181:184), 
                             labels = c("2015 Q4", 
                                        paste0("2016 Q", 1:4), 
                                        paste0("2017 Q", 1:4),
                                        paste0("2018 Q", 1:4)
                              ))
         ) %>% 
  ggplot(aes(x = year_q_fct, y = starting, group = 1)) +
  geom_point() + 
  geom_line() +
  geom_label(aes(label = round(starting, 0))
            # , hjust = 0, nudge_y = 0.05
            ) + 
  theme_bw() +
  # scale_y_continuous(limits = c(6E04, 1E05, 1.4E05)) +
  # scale_x_date("Year and Quarter", 
                # breaks = scales::date_breaks("3 months"),
                # labels = "1:17")
  # theme(axis.title.x = element_text(angle = -45, vjust = 0.5)) +
  labs(title = "Overall Student Loans over time for 2015 Quarter 4 to 2018 Quarter 4", 
       subtitle = "Made by Jason Tang with ggplot2", 
       x = "Year and Quarter", 
       y = "$ in trillions")

  # +

Graph for student loans by loaning agency

loans_clean %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_mil) %>% 
  select(agency_name, year, quarter, starting) %>% 
  mutate(year_q = paste0(year, "_", quarter)) %>% 
  ggplot(aes(x = year_q, y = starting, color = agency_name, group = agency_name)) +
  geom_point() +
  geom_line() +
  theme_bw() +
  labs(title = "Student Loans over time for 22 collection agencies", 
       subtitle = "@jasontang using ggplot2 #tidytuesday", 
       x = "Year and quarter", 
       y = "$ in millions")

New

ggp1 <- loans_clean %>% 
  mutate_at(.vars = vars(starting, added, total, consolidation, rehabilitation, voluntary_payments, wage_garnishments), 
            divide_mil) %>% 
  select(agency_name, year, quarter, starting, total) %>% 
  mutate(percent_total = round( total * 100 / starting, 1 )) %>% 
  mutate(year_q = paste0(year, "_", quarter)) %>% 
  ggplot(aes(x = year_q, y = percent_total, group = agency_name, color = agency_name)) +
  geom_point() +
  geom_line() # +
  # expand_limits(y = 0)
ggplotly(ggp1)
## This version of Shiny is designed to work with 'htmlwidgets' >= 1.5.
##     Please upgrade via install.packages('htmlwidgets').