Exploring Datasets

# reading in data from the National Student Loan Data System
# https://studentaid.gov/help-center/answers/article/what-is-nslds
library(readxl)

Portfolio.LoanStatus <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/PortfoliobyLoanStatus.xls",
                                   trim_ws = TRUE)

Portfolio.LoanType <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/PortfoliobyLoanType.xls",
                                 trim_ws = TRUE)

Portfolio.Summary <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/PortfolioSummary.xls",
                                trim_ws = TRUE)

# cleaning portfolio data
Portfolio.LoanStatus <- Portfolio.LoanStatus[4:35,]
Portfolio.LoanType <- Portfolio.LoanType[5:33,]
Portfolio.Summary <- Portfolio.Summary[6:43,]

#further formatting data
# DL.DO - Direct Loans; dollars outstanding
# FFEL - Federal Family Education Loans
# PL - Perkins Loans
# DO in billions
# Recipients in millions

colnames(Portfolio.Summary) <- c("Year", "Quarter",
                                 "DL.DO", "DL.Recipients",
                                 "FFEL.DO", "FFEL.Recipients",
                                 "PL.DO", "PL.Recipients",
                                 "Total.DO", "Total.Recipients")
Portfolio.Summary[c(8,9,10),1] <- c("2013","2013","2013")
Portfolio.Summary[c(12,13,14),1] <- c("2014","2014","2014")
Portfolio.Summary[c(16,17,18),1] <- c("2015","2015","2015")
Portfolio.Summary[c(20,21,22),1] <- c("2016","2016","2016")
Portfolio.Summary[c(24,25,26),1] <- c("2017","2017","2017")
Portfolio.Summary[c(28,29,30),1] <- c("2018","2018","2018")
Portfolio.Summary[c(32,33,34),1] <- c("2019","2019","2019")
Portfolio.Summary[c(36,37,38),1] <- c("2020","2020","2020")
Portfolio.Summary[c(1:6),2] <- rep("Q4",6)
# formatting dates
library(zoo)
PS.dates <- as.Date(as.yearqtr(paste0(Portfolio.Summary$Quarter,"/",
                                     Portfolio.Summary$Year),
                               format = "Q%q/%Y"))

Simple time series plot of Federal Student Aid Portfolio Summary - Total

# only run the next two lines once. 
#Portfolio.Summary <- Portfolio.Summary[,c(-1,-2)]
#Portfolio.Summary <- cbind(PS.dates, Portfolio.Summary)

#The issue with some of the data is that the first few entries are #annual.
library(ggplot2)
# first zoo object includes pre-2013 data, which is not reported on a quarterly basis
PS.zoo <- as.zoo(Portfolio.Summary)
PS.zoo.qONLY <- as.zoo(Portfolio.Summary[-c(1:6),])
ggplot(data = PS.zoo.qONLY, aes(x = PS.dates[-c(1:6)],
                          y = as.numeric(Total.DO),
                          group = 1)) +
  geom_line() +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Federal Loans Outstanding WRONG",
       x = "Date",
       y = "Dollars Outstanding (Billions, USD)")

#autoplot()

Area chart - Direct Loan Portfolio by Loan Status

colnames(Portfolio.LoanStatus) <- c("Year", "Quarter",
                                 "InScool.DO", "InSchool.Recipients",
                                 "Grace.DO", "Grace.Recipients",
                                 "Repayment.DO", "Repayment.Recipients",
                                 "Deferment.DO", "Deferment.Recipients",
                                 "Forbearance.DO", "Forbearance.Recipients",
                                 "CumulDefault.DO", "CumulDefault.Recipients",
                                 "Other.DO", "Other.Recipients")

#only run once
Portfolio.LoanStatus <- Portfolio.LoanStatus[-c(1,2),]
Portfolio.LoanStatus[30,2] <- "Q4"
  
library(zoo)
PSLS.dates <- as.Date(as.yearqtr(paste0(Portfolio.LoanStatus$Quarter,"/",
                                     Portfolio.LoanStatus$Year),
                               format = "Q%q/%Y"))
# adding "zoo" formatted date to Portfolio.LoanStatus
Portfolio.LoanStatus$Date <- PSLS.dates
# only run this once
#Portfolio.LoanStatus <- Portfolio.LoanStatus[,-c(1,2)]
library(dplyr)
Portfolio.LoanStatus <- Portfolio.LoanStatus %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
PSLS.PIVOT <- Portfolio.LoanStatus[,-c(1,2)] %>%
  pivot_longer(!Date, names_to = "Category", values_to = "Value")

# extracting odd rows for dollars outstanding
odd <- seq(1,480,2)
Portfolio.LoanStatusDO <- PSLS.PIVOT[odd,]
# extracting even rows for recipients
even <- seq(2,480,2)
Portfolio.LoanStatusR <- PSLS.PIVOT[even,]
ggplot(Portfolio.LoanStatusR, aes(x = Date, y= Value, fill = Category)) +
  geom_area() +
  scale_fill_manual(name = "Loan Status", 
                    values = c("#333333", "#2B4279",
                               "#65428C", "#A1378B",
                               "#D42374", "#F62B4C",
                               "#FF5800"),
                    labels = c("Default",
                               "Deferment",
                               "Forbearance",
                               "Grace Period",
                               "In School",
                               "Other",
                               "Repayment Plan"))+
  labs(title = "Federal Student Loans: Recipients",
       x = "Year",
       y = "Millions of Borrowers")
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))
ggplot(Portfolio.LoanStatusDO, aes(x = Date, y= Value, fill = Category)) +
  geom_area() + 
  geom_vline(xintercept = as.Date("2015-01-01"),
             linetype = "dashed", color= "#555555") +
  geom_vline(xintercept = as.Date("2020-04-01"),
             linetype = "dashed",
             color = "#555555") +
  scale_fill_manual(name = "Loan Status", 
                    values = c('#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94', '#bdcbcb', '#000060'),
                    labels = c("Default",
                               "Deferment",
                               "Forbearance",
                               "Grace Period",
                               "In School",
                               "Other",
                               "Repayment Plan"))+
  labs(title = "Federal Direct Loans: Dollars Outstanding",
       y = "Dollars Outstanding (Billions - USD)",
       x = "Year") +
  theme_classic(base_family = "Constantia")+
  theme(text = element_text(face = "bold"))
ggsave(
  "PowerpointData.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.9,
  width = 7,
  height = 4,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

Direct Loans by Forbearance Type

# Direct Loans by Forbearance Type
Portfolio.FT <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/DLbyForbearanceType.xls",
                                trim_ws = TRUE)
Portfolio.FT <- Portfolio.FT[-c(1:5,30:33),]
colnames(Portfolio.FT) <- c("Year", "Quarter",
                                 "Admin.DO", "Admin.Recipients",
                                 "Discret.DO", "Discret.Recipients",
                                 "MandatoryAdmin.DO", "MandatoryAdmin.Recipients",
                                 "Mandatory.DO", "Mandatory.Recipients",
                                 "NotReported.DO", "NotReported.Recipients")

library(zoo)
FT.dates <- as.Date(as.yearqtr(paste0(Portfolio.FT$Quarter,"/",
                                     Portfolio.FT$Year),
                               format = "Q%q/%Y"))

Portfolio.FT$Date <- FT.dates
library(dplyr)
Portfolio.FT <- Portfolio.FT[,-c(1,2)] %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
FT.PIVOT <- Portfolio.FT %>%
  pivot_longer(!Date, names_to = "Category", values_to = "Value")

# extracting odd rows for dollars outstanding
odd2 <- seq(1,240,2)
Portfolio.FT.DO <- FT.PIVOT[odd2,]
# extracting even rows for recipients
even2 <- seq(2,240,2)
Portfolio.FT.R <- FT.PIVOT[even2,]
ggplot(Portfolio.FT.R, aes(x = Date, y= Value, fill = Category)) +
  geom_area() +
  scale_fill_manual(name = "Forbearance Reason", 
                    values = c('#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94'),
                    labels = c("Administrative",
                               "Discretionary",
                               "Mandatory",
                               "Mandatory Administrative",
                               "Not Reported"))+
  labs(title = "Federal Student Loans: Reasons for Forbearance",
       x = "Year",
       y = "Millions of Borrowers")
# With dollars outstanding by Forbearance reason for inclusion in final report. 
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))
library(dplyr)
library(fpp3)
filtered.P.FT.DO <- Portfolio.FT.DO[c(1:110),]
  

ggplot(filtered.P.FT.DO, aes(x = Date, y= Value, fill = Category)) +
  geom_area() + 
  scale_fill_manual(name = "Forbearance Type", 
                    values = c('#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94'),
                   labels = c("Administrative",
                               "Discretionary",
                              "Mandatory",
                              "Mandatory Admin",
                               "Not Reported"))+
  labs(title = "Federal Direct Loans: Dollars Outstanding",
       subtitle = "Forbearance",
       y = "Dollars Outstanding (Billions - USD)",
       x = "Year") +
  theme_classic(base_family = "Constantia")
ggsave(
  "Forbearance.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.9,
  width = 7,
  height = 4,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

Direct Loan Portfolio by Deferment Type

# Direct Loans by Forbearance Type
Portfolio.Defer <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/DLbyDefermentType.xls",
                                trim_ws = TRUE)
Portfolio.Defer <- Portfolio.Defer[-c(1:6,31:34),]
colnames(Portfolio.Defer) <- c("Year", "Quarter",
                                 "InSchool.DO", "InSchool.Recipients",
                                 "SixMonth.DO", "SixMonth.Recipients",
                                 "Unemployment.DO", "Unemployment.Recipients",
                                 "EconHardship.DO", "EconHardship.Recipients",
                                 "Military.DO", "Military.Recipients",
                                 "CancerTreatment.DO", "CancerTreatment.Recipients",
                                 "Other.DO", "Other.Recipients",
                                 "NotReported.DO", "NotReported.Recipients")

library(zoo)
Defer.dates <- as.Date(as.yearqtr(paste0(Portfolio.Defer$Quarter,"/",
                                     Portfolio.Defer$Year),
                               format = "Q%q/%Y"))

Portfolio.Defer$Date <- Defer.dates
library(dplyr)
Portfolio.Defer <- Portfolio.Defer[,-c(1,2)] %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
Defer.PIVOT <- Portfolio.Defer %>%
  pivot_longer(!Date, names_to = "Category", values_to = "Value")

# extracting odd rows for dollars outstanding
odd.d <- seq(1,384,2)
Portfolio.Defer.DO <- Defer.PIVOT[odd.d,]
# extracting even rows for recipients
even.d <- seq(2,384,2)
Portfolio.Defer.R <- Defer.PIVOT[even.d,]
ggplot(Portfolio.Defer.R, aes(x = Date, y= Value, fill = Category)) +
  geom_area() +
  scale_fill_manual(name = "Deferment Type", 
                    values = c("#333333", '#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94', '#660033', '#000060'),
                    labels = c("Cancer Treatment",
                               "Economic Hardship",
                               "In School",
                               "Military",
                               "Not Reported",
                               "Other",
                               "Six Months Post-Grad",
                               "Unemployment"))+
  labs(title = "Federal Student Loans: Reasons for Deferment",
       x = "Year",
       y = "Millions of Borrowers")
# deferment type
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))

filtered.P.Def.DO <- Portfolio.Defer.DO[c(1:176),]

ggplot(filtered.P.Def.DO, aes(x = Date, y= Value, fill = Category)) +
  geom_area() +
  scale_fill_manual(name = "Deferment Type", 
                    values = c('beige', '#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae','#574f94', '#000060', '#bdcbcb'),
                    labels = c("Cancer Treatment",
                               "Economic Hardship",
                               "In School",
                               "Military",
                               "Not Reported",
                               "Other",
                               "Six-Month", 
                               "Unemployment"))+
  labs(title = "Federal Direct Loans: Dollars Outstanding",
       y = "Dollars Outstanding (Billions - USD)",
       x = "Year",
       subtitle = "Deferment") +
  theme_classic(base_family = "Constantia")
ggsave(
  "Deferment.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.9,
  width = 7,
  height = 4,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

Direct Loans Entering Default

# "Default is defined as 361 days delinquent"
# Direct Loans by Forbearance Type
DL.Default <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/DLEnteringDefaults.xls",
                                trim_ws = TRUE)
DL.Default <- DL.Default[-c(1:5,30:38),]
colnames(DL.Default) <- c("Year", "Quarter",
                                 "DRFPQ.Billions", "RIRFPQ.Thous",
                                 "Entering.USD.Billions", "Entering.Borrowers.Thous",
                                 "PercentDOinRepPrevDef", "PercentBorrowersinRepPrevDef")

library(zoo)
DL.Default.dates <- as.Date(as.yearqtr(paste0(DL.Default$Quarter,"/",
                                    DL.Default$Year),
                               format = "Q%q/%Y"))

DL.Default$Date <- DL.Default.dates
library(dplyr)
DL.Default <- DL.Default[,-c(1,2)] %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
DL.Default.PIVOT <- DL.Default %>%
  pivot_longer(!Date, names_to = "Category", values_to = "Value")

# extracting odd rows for dollars outstanding
enter.index <- seq(3,144,6)
Portfolio.Default.USD <- DL.Default.PIVOT[c(enter.index),]
# extracting even rows for recipients
enter.thou.index <- seq(4,144,6)
Portfolio.Default.PEOPLE <- DL.Default.PIVOT[enter.thou.index,]
ggplot(Portfolio.Default.USD, aes(x = Date, y= Value, fill = Category)) +
  geom_line() +
  #scale_fill_manual(name = "Deferment Type", 
  #                  values = c("#333333", "#2B4279",
  #                             "#65428C", "#A1378B",
  #                             "#D42374", "#F62B4C"),
  #                  labels = c("one", "two",
  #                             "three", "four",
  #                             "five", "six"))+
  labs(title = "Federal Student Loans: Entering Default",
       x = "Year",
       y = "Dollars Outstanding (Billions - USD)")
ggplot(Portfolio.Default.PEOPLE, aes(x = Date, y= Value, fill = Category)) +
  geom_line() +
  #scale_fill_manual(name = "Deferment Type", 
  #                  values = c("#333333", "#2B4279",
  #                             "#65428C", "#A1378B",
  #                             "#D42374", "#F62B4C"),
  #                  labels = c("one", "two",
  #                             "three", "four",
  #                             "five", "six"))+
  labs(title = "Federal Student Loans: Borrowers Entering Default",
       x = "Year",
       y = "Number of Borrowers (Thousands)")
# creating CSV files from cleaned and formatted datasets.

# Direct Student Loan Status Portfolio - Pivoted
write.csv(PSLS.PIVOT,
          "PortfolioLS.Pivot.csv",
          row.names = FALSE)

# Direct Student Loans (Forbearance) - Pivoted
write.csv(FT.PIVOT,
          "PortfolioFOR.Pivot.csv",
          row.names = FALSE)

# Direct Student Loans (Deferment) - Pivoted
write.csv(Defer.PIVOT,
          "PortfolioDefer.Pivot.csv",
          row.names = FALSE)

# Direct Student Loans (Default) - Pivoted
write.csv(DL.Default.PIVOT,
          "PortfolioDefault.Pivot.csv",
          row.names = FALSE)