Exploring Datasets
# reading in data from the National Student Loan Data System
# https://studentaid.gov/help-center/answers/article/what-is-nslds
library(readxl)
Portfolio.LoanStatus <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/PortfoliobyLoanStatus.xls",
trim_ws = TRUE)
Portfolio.LoanType <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/PortfoliobyLoanType.xls",
trim_ws = TRUE)
Portfolio.Summary <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/PortfolioSummary.xls",
trim_ws = TRUE)
# cleaning portfolio data
Portfolio.LoanStatus <- Portfolio.LoanStatus[4:35,]
Portfolio.LoanType <- Portfolio.LoanType[5:33,]
Portfolio.Summary <- Portfolio.Summary[6:43,]
#further formatting data
# DL.DO - Direct Loans; dollars outstanding
# FFEL - Federal Family Education Loans
# PL - Perkins Loans
# DO in billions
# Recipients in millions
colnames(Portfolio.Summary) <- c("Year", "Quarter",
"DL.DO", "DL.Recipients",
"FFEL.DO", "FFEL.Recipients",
"PL.DO", "PL.Recipients",
"Total.DO", "Total.Recipients")
Portfolio.Summary[c(8,9,10),1] <- c("2013","2013","2013")
Portfolio.Summary[c(12,13,14),1] <- c("2014","2014","2014")
Portfolio.Summary[c(16,17,18),1] <- c("2015","2015","2015")
Portfolio.Summary[c(20,21,22),1] <- c("2016","2016","2016")
Portfolio.Summary[c(24,25,26),1] <- c("2017","2017","2017")
Portfolio.Summary[c(28,29,30),1] <- c("2018","2018","2018")
Portfolio.Summary[c(32,33,34),1] <- c("2019","2019","2019")
Portfolio.Summary[c(36,37,38),1] <- c("2020","2020","2020")
Portfolio.Summary[c(1:6),2] <- rep("Q4",6)
# formatting dates
library(zoo)
PS.dates <- as.Date(as.yearqtr(paste0(Portfolio.Summary$Quarter,"/",
Portfolio.Summary$Year),
format = "Q%q/%Y"))
Simple time series plot of Federal Student Aid Portfolio Summary - Total
# only run the next two lines once.
#Portfolio.Summary <- Portfolio.Summary[,c(-1,-2)]
#Portfolio.Summary <- cbind(PS.dates, Portfolio.Summary)
#The issue with some of the data is that the first few entries are #annual.
library(ggplot2)
# first zoo object includes pre-2013 data, which is not reported on a quarterly basis
PS.zoo <- as.zoo(Portfolio.Summary)
PS.zoo.qONLY <- as.zoo(Portfolio.Summary[-c(1:6),])
ggplot(data = PS.zoo.qONLY, aes(x = PS.dates[-c(1:6)],
y = as.numeric(Total.DO),
group = 1)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Federal Loans Outstanding WRONG",
x = "Date",
y = "Dollars Outstanding (Billions, USD)")
#autoplot()
Area chart - Direct Loan Portfolio by Loan Status
colnames(Portfolio.LoanStatus) <- c("Year", "Quarter",
"InScool.DO", "InSchool.Recipients",
"Grace.DO", "Grace.Recipients",
"Repayment.DO", "Repayment.Recipients",
"Deferment.DO", "Deferment.Recipients",
"Forbearance.DO", "Forbearance.Recipients",
"CumulDefault.DO", "CumulDefault.Recipients",
"Other.DO", "Other.Recipients")
#only run once
Portfolio.LoanStatus <- Portfolio.LoanStatus[-c(1,2),]
Portfolio.LoanStatus[30,2] <- "Q4"
library(zoo)
PSLS.dates <- as.Date(as.yearqtr(paste0(Portfolio.LoanStatus$Quarter,"/",
Portfolio.LoanStatus$Year),
format = "Q%q/%Y"))
# adding "zoo" formatted date to Portfolio.LoanStatus
Portfolio.LoanStatus$Date <- PSLS.dates
# only run this once
#Portfolio.LoanStatus <- Portfolio.LoanStatus[,-c(1,2)]
library(dplyr)
Portfolio.LoanStatus <- Portfolio.LoanStatus %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
PSLS.PIVOT <- Portfolio.LoanStatus[,-c(1,2)] %>%
pivot_longer(!Date, names_to = "Category", values_to = "Value")
# extracting odd rows for dollars outstanding
odd <- seq(1,480,2)
Portfolio.LoanStatusDO <- PSLS.PIVOT[odd,]
# extracting even rows for recipients
even <- seq(2,480,2)
Portfolio.LoanStatusR <- PSLS.PIVOT[even,]
ggplot(Portfolio.LoanStatusR, aes(x = Date, y= Value, fill = Category)) +
geom_area() +
scale_fill_manual(name = "Loan Status",
values = c("#333333", "#2B4279",
"#65428C", "#A1378B",
"#D42374", "#F62B4C",
"#FF5800"),
labels = c("Default",
"Deferment",
"Forbearance",
"Grace Period",
"In School",
"Other",
"Repayment Plan"))+
labs(title = "Federal Student Loans: Recipients",
x = "Year",
y = "Millions of Borrowers")
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))
ggplot(Portfolio.LoanStatusDO, aes(x = Date, y= Value, fill = Category)) +
geom_area() +
geom_vline(xintercept = as.Date("2015-01-01"),
linetype = "dashed", color= "#555555") +
geom_vline(xintercept = as.Date("2020-04-01"),
linetype = "dashed",
color = "#555555") +
scale_fill_manual(name = "Loan Status",
values = c('#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94', '#bdcbcb', '#000060'),
labels = c("Default",
"Deferment",
"Forbearance",
"Grace Period",
"In School",
"Other",
"Repayment Plan"))+
labs(title = "Federal Direct Loans: Dollars Outstanding",
y = "Dollars Outstanding (Billions - USD)",
x = "Year") +
theme_classic(base_family = "Constantia")+
theme(text = element_text(face = "bold"))
ggsave(
"PowerpointData.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.9,
width = 7,
height = 4,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
Direct Loans by Forbearance Type
# Direct Loans by Forbearance Type
Portfolio.FT <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/DLbyForbearanceType.xls",
trim_ws = TRUE)
Portfolio.FT <- Portfolio.FT[-c(1:5,30:33),]
colnames(Portfolio.FT) <- c("Year", "Quarter",
"Admin.DO", "Admin.Recipients",
"Discret.DO", "Discret.Recipients",
"MandatoryAdmin.DO", "MandatoryAdmin.Recipients",
"Mandatory.DO", "Mandatory.Recipients",
"NotReported.DO", "NotReported.Recipients")
library(zoo)
FT.dates <- as.Date(as.yearqtr(paste0(Portfolio.FT$Quarter,"/",
Portfolio.FT$Year),
format = "Q%q/%Y"))
Portfolio.FT$Date <- FT.dates
library(dplyr)
Portfolio.FT <- Portfolio.FT[,-c(1,2)] %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
FT.PIVOT <- Portfolio.FT %>%
pivot_longer(!Date, names_to = "Category", values_to = "Value")
# extracting odd rows for dollars outstanding
odd2 <- seq(1,240,2)
Portfolio.FT.DO <- FT.PIVOT[odd2,]
# extracting even rows for recipients
even2 <- seq(2,240,2)
Portfolio.FT.R <- FT.PIVOT[even2,]
ggplot(Portfolio.FT.R, aes(x = Date, y= Value, fill = Category)) +
geom_area() +
scale_fill_manual(name = "Forbearance Reason",
values = c('#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94'),
labels = c("Administrative",
"Discretionary",
"Mandatory",
"Mandatory Administrative",
"Not Reported"))+
labs(title = "Federal Student Loans: Reasons for Forbearance",
x = "Year",
y = "Millions of Borrowers")
# With dollars outstanding by Forbearance reason for inclusion in final report.
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))
library(dplyr)
library(fpp3)
filtered.P.FT.DO <- Portfolio.FT.DO[c(1:110),]
ggplot(filtered.P.FT.DO, aes(x = Date, y= Value, fill = Category)) +
geom_area() +
scale_fill_manual(name = "Forbearance Type",
values = c('#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94'),
labels = c("Administrative",
"Discretionary",
"Mandatory",
"Mandatory Admin",
"Not Reported"))+
labs(title = "Federal Direct Loans: Dollars Outstanding",
subtitle = "Forbearance",
y = "Dollars Outstanding (Billions - USD)",
x = "Year") +
theme_classic(base_family = "Constantia")
ggsave(
"Forbearance.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.9,
width = 7,
height = 4,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
Direct Loan Portfolio by Deferment Type
# Direct Loans by Forbearance Type
Portfolio.Defer <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/DLbyDefermentType.xls",
trim_ws = TRUE)
Portfolio.Defer <- Portfolio.Defer[-c(1:6,31:34),]
colnames(Portfolio.Defer) <- c("Year", "Quarter",
"InSchool.DO", "InSchool.Recipients",
"SixMonth.DO", "SixMonth.Recipients",
"Unemployment.DO", "Unemployment.Recipients",
"EconHardship.DO", "EconHardship.Recipients",
"Military.DO", "Military.Recipients",
"CancerTreatment.DO", "CancerTreatment.Recipients",
"Other.DO", "Other.Recipients",
"NotReported.DO", "NotReported.Recipients")
library(zoo)
Defer.dates <- as.Date(as.yearqtr(paste0(Portfolio.Defer$Quarter,"/",
Portfolio.Defer$Year),
format = "Q%q/%Y"))
Portfolio.Defer$Date <- Defer.dates
library(dplyr)
Portfolio.Defer <- Portfolio.Defer[,-c(1,2)] %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
Defer.PIVOT <- Portfolio.Defer %>%
pivot_longer(!Date, names_to = "Category", values_to = "Value")
# extracting odd rows for dollars outstanding
odd.d <- seq(1,384,2)
Portfolio.Defer.DO <- Defer.PIVOT[odd.d,]
# extracting even rows for recipients
even.d <- seq(2,384,2)
Portfolio.Defer.R <- Defer.PIVOT[even.d,]
ggplot(Portfolio.Defer.R, aes(x = Date, y= Value, fill = Category)) +
geom_area() +
scale_fill_manual(name = "Deferment Type",
values = c("#333333", '#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae', '#574f94', '#660033', '#000060'),
labels = c("Cancer Treatment",
"Economic Hardship",
"In School",
"Military",
"Not Reported",
"Other",
"Six Months Post-Grad",
"Unemployment"))+
labs(title = "Federal Student Loans: Reasons for Deferment",
x = "Year",
y = "Millions of Borrowers")
# deferment type
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))
filtered.P.Def.DO <- Portfolio.Defer.DO[c(1:176),]
ggplot(filtered.P.Def.DO, aes(x = Date, y= Value, fill = Category)) +
geom_area() +
scale_fill_manual(name = "Deferment Type",
values = c('beige', '#d1ffff', '#b3d0e4', '#95a3c9', '#7678ae','#574f94', '#000060', '#bdcbcb'),
labels = c("Cancer Treatment",
"Economic Hardship",
"In School",
"Military",
"Not Reported",
"Other",
"Six-Month",
"Unemployment"))+
labs(title = "Federal Direct Loans: Dollars Outstanding",
y = "Dollars Outstanding (Billions - USD)",
x = "Year",
subtitle = "Deferment") +
theme_classic(base_family = "Constantia")
ggsave(
"Deferment.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.9,
width = 7,
height = 4,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
Direct Loans Entering Default
# "Default is defined as 361 days delinquent"
# Direct Loans by Forbearance Type
DL.Default <- read_excel("C:/Users/georg/OneDrive/Rice - Spring 2021/STAT 450/New Datasets/DLEnteringDefaults.xls",
trim_ws = TRUE)
DL.Default <- DL.Default[-c(1:5,30:38),]
colnames(DL.Default) <- c("Year", "Quarter",
"DRFPQ.Billions", "RIRFPQ.Thous",
"Entering.USD.Billions", "Entering.Borrowers.Thous",
"PercentDOinRepPrevDef", "PercentBorrowersinRepPrevDef")
library(zoo)
DL.Default.dates <- as.Date(as.yearqtr(paste0(DL.Default$Quarter,"/",
DL.Default$Year),
format = "Q%q/%Y"))
DL.Default$Date <- DL.Default.dates
library(dplyr)
DL.Default <- DL.Default[,-c(1,2)] %>% mutate_if(is.character,as.numeric)
# pivoting the dataset to be used with area chart
library(tidyr)
library(dplyr)
library(readr)
DL.Default.PIVOT <- DL.Default %>%
pivot_longer(!Date, names_to = "Category", values_to = "Value")
# extracting odd rows for dollars outstanding
enter.index <- seq(3,144,6)
Portfolio.Default.USD <- DL.Default.PIVOT[c(enter.index),]
# extracting even rows for recipients
enter.thou.index <- seq(4,144,6)
Portfolio.Default.PEOPLE <- DL.Default.PIVOT[enter.thou.index,]
ggplot(Portfolio.Default.USD, aes(x = Date, y= Value, fill = Category)) +
geom_line() +
#scale_fill_manual(name = "Deferment Type",
# values = c("#333333", "#2B4279",
# "#65428C", "#A1378B",
# "#D42374", "#F62B4C"),
# labels = c("one", "two",
# "three", "four",
# "five", "six"))+
labs(title = "Federal Student Loans: Entering Default",
x = "Year",
y = "Dollars Outstanding (Billions - USD)")
ggplot(Portfolio.Default.PEOPLE, aes(x = Date, y= Value, fill = Category)) +
geom_line() +
#scale_fill_manual(name = "Deferment Type",
# values = c("#333333", "#2B4279",
# "#65428C", "#A1378B",
# "#D42374", "#F62B4C"),
# labels = c("one", "two",
# "three", "four",
# "five", "six"))+
labs(title = "Federal Student Loans: Borrowers Entering Default",
x = "Year",
y = "Number of Borrowers (Thousands)")
# creating CSV files from cleaned and formatted datasets.
# Direct Student Loan Status Portfolio - Pivoted
write.csv(PSLS.PIVOT,
"PortfolioLS.Pivot.csv",
row.names = FALSE)
# Direct Student Loans (Forbearance) - Pivoted
write.csv(FT.PIVOT,
"PortfolioFOR.Pivot.csv",
row.names = FALSE)
# Direct Student Loans (Deferment) - Pivoted
write.csv(Defer.PIVOT,
"PortfolioDefer.Pivot.csv",
row.names = FALSE)
# Direct Student Loans (Default) - Pivoted
write.csv(DL.Default.PIVOT,
"PortfolioDefault.Pivot.csv",
row.names = FALSE)