Merging data sets based on date and loan status.
library(fpp3)
library(tidyverse)
df_list <- list(DL.LoanStatus.p,
DL.ForbearanceType.p,
DL.Deferment.p,
DL.Default.p)
# Correcting typo in data
DL.LoanStatus.p <- DL.LoanStatus.p %>%
mutate(Loan.Status = str_replace(Loan.Status,
"Forebearance.DO",
"Forbearance.DO"))
m1 <- merge(DL.LoanStatus.p,
DL.ForbearanceType.p,
by = c("Date","Loan.Status"),
all.x = TRUE)
final.merge <- merge(m1,
DL.Deferment.p,
by = c("Date","Loan.Status"),
all.x = TRUE)
#final.merge <- merge(m2,
# DL.Default.p,
# by = c("Date","Loan.Status"),
# all.x = TRUE)
# combining columns that were separated by "merge" function
# in order to match hierarchical structure required
subcat.index <- seq(4, ncol(final.merge), 2)
DO.sub.index <- seq(5, ncol(final.merge), 2)
unite1 <- final.merge %>%
unite("Sub.Cat",
all_of(subcat.index), na.rm = TRUE, remove = FALSE)
unite2 <- final.merge %>%
unite("DO.Sub.Cat",
all_of(DO.sub.index), na.rm = TRUE, remove = FALSE)
unite1$Do.Sub.Cat <- unite2$DO.Sub.Cat
full_DL.data <- unite1[,c(1:4,ncol(unite1))]
library(zoo)
new.dates <- as.yearqtr(full_DL.data$Date,
format = "%Y-%m-%d")
full_DL.data$Date <- yearquarter(new.dates)
# Filling in missing hierarchies with upper-level data.
for (i in 1:nrow(full_DL.data)){
Date.i <- full_DL.data[i,1]
LS.i <- full_DL.data[i,2]
DO.i <- full_DL.data[i,3]
Sub.Cat.i <- full_DL.data[i,4]
DO.Sub.Cat.i <- full_DL.data[i,5]
if (Sub.Cat.i == "") {
full_DL.data[i,4] <- LS.i
full_DL.data[i,5] <- as.numeric(DO.i)
}
}
# Zero values represented with empty string.
# Replacing empty string with 0
full_DL.data[,2:ncol(full_DL.data)] <- full_DL.data[,-1] %>%
mutate_all(funs(replace(.,.=="",0)))
# Removing data so that only "Dollars Outstanding" are included
index <- 0
remove.index <- c()
for (string.i in full_DL.data$Loan.Status){
index <- index + 1
if(grepl(".Recipients", string.i) == TRUE) {
remove.index <- append(remove.index, index)
}
}
indexSUB <- 0
remove.indexSUB <- c()
for (string.i in full_DL.data$Sub.Cat){
indexSUB <- indexSUB + 1
if(grepl(".Recipients", string.i) == TRUE) {
remove.indexSUB <- append(remove.indexSUB, indexSUB)
}
}
# Removing recipient data
# Dollars outstanding data is all that is left
library(lubridate)
remove.Recipients <- union(remove.index, remove.indexSUB)
full_DL.data <- full_DL.data[-c(remove.Recipients),]
# formatting data
full_hierarchy <- as_tsibble(full_DL.data,
index = Date,
key = c("Loan.Status",
"Sub.Cat")) %>%
aggregate_key(Loan.Status/Sub.Cat,
Dollars.Outstanding = sum(as.numeric(Do.Sub.Cat))) %>%
filter_index("2015 Q1" ~ "2020 Q1")
# Filtering data for 2015 onwards
hierarchy.Covid <- as_tsibble(full_DL.data,
index = Date,
key = c("Loan.Status",
"Sub.Cat")) %>%
aggregate_key(Loan.Status/Sub.Cat,
Dollars.Outstanding = sum(as.numeric(Do.Sub.Cat))) %>%
filter_index("2020 Q2" ~.)
—————-FORECASTING—————–
# Generating bottom-up forecast
bu_forecast <- full_hierarchy %>%
model(base = ETS(Dollars.Outstanding)) %>%
reconcile(bu = bottom_up(base)) %>%
forecast(lambda = 0)
# "to generate bootstrapped prediction intervals in this way, we simply set bootstrap = TRUE in the forecast() function"
# Generating top-down forecast
# Using forecast proportions, rather than historical averages
td_forecast <- full_hierarchy %>%
model(base = ETS(Dollars.Outstanding)) %>%
reconcile(td = top_down(models = base,
method = "forecast_proportions")) %>%
forecast(lambda = 0)
# Generating middle-out forecast
#mo_forecast <- full_hierarchy %>%
# model(ets = ETS(Dollars.Outstanding)) %>%
# reconcile(td = middle_out(models = ets,
# level = choose_level, method = "forecast_proportions")) %>%
# forecast()
# Minimum Trace (MinT) optimal reconciliation approach
mint_fit <- full_hierarchy %>%
model(base = ETS(Dollars.Outstanding)) %>%
reconcile(
bu = bottom_up(base),
td = top_down(base, method = "forecast_proportions"),
ols = min_trace(base, method = "ols"),
mint = min_trace(base, method = "wls_struct")
) %>%
forecast(lambda = 0)
#mint_forecast <- mint_fit %>%
# forecast (h = "2 years")
# Second Attempt: Minimum Trace Forecasts
# Minimum Trace OLS
#MinTOLS_forecast <- full_hierarchy %>%
# model(base = ETS(Dollars.Outstanding)) %>%
# reconcile(ols = min_trace(base,
# method = "ols")) %>%
# forecast(lambda = 0)
# Minimum Trace: Shrink
#MinTShrink_forecast <- full_hierarchy %>%
# model(base = ETS(Dollars.Outstanding)) %>%
# reconcile(ols = min_trace(base,
# method = "wls_struct")) #%>%
# forecast(lambda = 0)
# plotting td/bu forecasts
bu_forecast %>%
filter(is_aggregated(Sub.Cat)) %>%
autoplot(
full_hierarchy,
level = NULL
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Botttom Up") +
facet_wrap(vars(Loan.Status),scales = "free_y",ncol=3) +
theme(legend.position = "none")
# Plotting every model in one graphic
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))
agg.labels <- c("Total",
"Default",
"Deferment",
"Forbearance",
"Grace",
"In School",
"Other",
"Repayment")
names(agg.labels) <- c("<aggregated>",
"CumulDefault.DO",
"Deferment.DO",
"Forbearance.DO",
"Grace.DO",
"InScool.DO",
"Other.DO",
"Repayment.DO")
library(scales)
mint_fit %>%
filter(is_aggregated(Sub.Cat)) %>%
autoplot(
full_hierarchy,
level = NULL
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Federal Direct Loans: Dollars Outstanding",
subtitle = "Hierarchical Forecasting",
color = "Forecasting Model") +
scale_color_manual(labels = c("Base",
"Bottom Up",
"MinT, OLS",
"MinT, Structural",
"Top Down"),
values = c("#8931EF", "#F2CA19",
"#FF00BD", "#0057E9",
"#E11845"))+
facet_wrap(vars(Loan.Status),scales = "free_y",ncol=4,
labeller = labeller(Loan.Status = agg.labels)) +
theme_classic(base_family = "Constantia",
base_size = 12.5) +
theme(plot.title = element_text(face="bold"),
strip.text = element_text(face="bold", size=12.5),
axis.text.x.bottom = element_text(size = 8.5),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"))
ggsave(
"DOhierarchy.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.8,
width = 15,
height = 7,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
library(dplyr)
library(fpp3)
# Comparing accuracy measures of the aggregate series
# Next steps involve ensuring all available hierarchies have been
# included in the model
# and that CV is used to assess model accuracy where
# it is appropriate
training_fit <- full_hierarchy %>%
filter_index(.~ "2018 Q1")%>%
model(base = ETS(Dollars.Outstanding)) %>%
reconcile(
bu = bottom_up(base),
td = top_down(base, method = "forecast_proportions"),
ols = min_trace(base, method = "ols"),
mintStruct = min_trace(base,
method = "wls_struct"),
#mintwlsvar = min_trace(base,
# method = "wls_var"),
#mint = min_trace(base)
) %>%
forecast(lambda = 0)
#training.fit.MINT <- full_hierarchy %>%
# filter_index(.~ "2018 Q1") %>%
# model(base = ETS(Dollars.Outstanding)) %>%
# reconcile(ols = min_trace(base,
# method = "wls_struct")) %>%
# forecast()
accuracy.table <- training_fit %>%
filter(is_aggregated(Sub.Cat)) %>%
accuracy(
data = full_hierarchy,
measures = list(rmse = RMSE,
mase = MASE)) %>%
group_by(.model) %>%
summarise(rmse = round(mean(rmse), digits = 2),
mase = round(mean(mase), digits = 2))
accuracy.table$.model = c("Base",
"Bottom Up",
"Minimum Trace - Structural",
"Minimum Trace - OLS",
"Top Down")
colnames(accuracy.table) = c("Model",
"RMSE",
"MASE")
library(gridExtra)
library(grid)
library(ggplot2)
png("accuracy1.png", height=100, width=200)
p<-accuracy.table
grid.table(p)
dev.off()
accuracy.table2 <- training_fit %>%
filter(!is_aggregated(Loan.Status)) %>%
accuracy(
data = full_hierarchy,
measures = list(rmse = RMSE,
mase = MASE)) %>%
group_by(.model) %>%
summarise(rmse = rmse,
mase = mase,
sub = format(Sub.Cat))
colnames(accuracy.table2) = c("Model",
"RMSE",
"MASE",
"Subcategory")
library(gridExtra)
library(grid)
library(ggplot2)
png("accuracy2.png", height=2790, width=370)
p2<-accuracy.table2
grid.table(p2)
dev.off()
# Creating table to compare accuracies across reconciliation methods
ac.table <- training_fit %>%
accuracy(data = full_hierarchy,
measures = list(rmse = RMSE,
mase = MASE))
new.ac.ls <- as.character(ac.table$Loan.Status)
new.ac.sc <- as.character(ac.table$Sub.Cat)
ac.table$Loan.Status <- new.ac.ls
ac.table$Sub.Cat <- new.ac.sc
write.csv(ac.table, "regular_accuracy.csv")
# Numerical Summary
library(fpp3)
DirectLoan.tsibble <- as_tsibble(full_DL.data,
index = Date,
key = c("Loan.Status",
"Sub.Cat"))
DirectLoan.tsibble$Do.Sub.Cat <- as.numeric(DirectLoan.tsibble$Do.Sub.Cat)
DirectLoan.tsibble %>%
features(Do.Sub.Cat, list(Mean = mean)) %>%
arrange(Mean)
# forecasting with prediction intervals.
windowsFonts("Selawik Semibold" = windowsFont("Selawik Semibold"))
mint_fit[which(mint_fit$.model == "mint"),] %>%
filter(is_aggregated(Sub.Cat)) %>%
autoplot(
full_hierarchy,
level = c(80,95),
alpha = 0.7
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Forecasts for Federal Direct Loans",
subtitle = "MinT with Structural Scaling",
color = "Forecasting Model") +
scale_color_manual(labels = c("MinT, Structural Scaling"),
values = c("#0057E9"))+
facet_wrap(vars(Loan.Status),scales = "free_y",ncol=4,
labeller = labeller(Loan.Status = agg.labels))+
theme_classic(base_family = "Constantia",
base_size = 12.5) +
theme(plot.title = element_text(face="bold"),
strip.text = element_text(face="bold", size=12.5),
axis.text.x.bottom = element_text(size = 8.5),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "none")
# saving plot
ggsave(
"OVERALLforecast.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.7,
width = 15.1,
height = 8.444,
units = c("in", "cm", "mm"),
dpi = 700,
limitsize = TRUE,
)
# generating plots for third level data [Forbearance]
for.labels = c("Forbearance: Total",
"Administrative",
"Discretionary",
"Mandatory",
"Mandatory Admin",
"Not Reported")
names(for.labels) = c("<aggregated>",
"Admin.DO",
"Discret.DO",
"Mandatory.DO",
"MandatoryAdmin.DO",
"NotReported.DO")
mint_fit[which(mint_fit$.model == "mint" & mint_fit$Loan.Status == "Forbearance.DO"),] %>%
filter(!is_aggregated(Loan.Status)) %>%
autoplot(
full_hierarchy,
level = c(80,95),
alpha = 0.7
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Forecasts for Federal Direct Loans in Forbearance",
subtitle = "MinT with Structural Scaling",
color = "Forecasting Model") +
scale_color_manual(labels = FALSE)+
facet_wrap(vars(Sub.Cat),
scales = "free_y",
ncol =3,
labeller = labeller(Sub.Cat = for.labels))+
theme_classic(base_family = "Constantia",
base_size = 12.5) +
theme(plot.title = element_text(face="bold"),
strip.text = element_text(face="bold", size=12.5),
axis.text.x.bottom = element_text(size = 8.5),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "none")
ggsave(
"ForbearanceForecast.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.8,
width = 15,
height = 7,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
# Third-level plots (Deferment)
def.labels <- c("Deferment: Total",
"Cancer Treatment",
"Economic Hardship",
"In School",
"Military",
"Not Reported",
"Other",
"Six Month",
"Unemployment")
names(def.labels) <- c("<aggregated>",
"CancerTreatment.DO",
"EconHardship.DO",
"InSchool.DO",
"Military.DO",
"NotReported.DO",
"Other.DO",
"SixMonth.DO",
"Unemployment.DO")
mint_fit[which(mint_fit$.model == "mint" & mint_fit$Loan.Status == "Deferment.DO") ,] %>%
filter(!is_aggregated(Loan.Status)) %>%
autoplot(
full_hierarchy,
level = c(80,95),
alpha = 0.7
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Forecasts for Federal Direct Loans in Deferment",
subtitle = "MinT with Structural Scaling",
color = "Forecasting Model") +
scale_color_manual(labels = FALSE)+
facet_wrap(vars(Sub.Cat),scales = "free_y",ncol=3,
labeller = labeller(Sub.Cat = def.labels))+
theme_classic(base_family = "Constantia",
base_size = 12.5) +
theme(plot.title = element_text(face="bold"),
strip.text = element_text(face="bold", size=12.5),
axis.text.x.bottom = element_text(size = 8.5),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "none")
ggsave(
"DefermentForecast.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.8,
width = 15,
height = 7,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
Cross Validation
i <- NULL
vect <- c()
for (d in as.character(full_hierarchy$Date)){
#print(d)
if (yearquarter(d) <= yearquarter("2015 Q3")){
i <- 1
}
if (d == "2015 Q4"){
i <- 2
}
if (d == "2016 Q1"){
i <- 3
}
if (d == "2016 Q2"){
i <- 4
}
if (d == "2016 Q3"){
i <- 5
}
if (d == "2016 Q4"){
i <- 6
}
if (d == "2017 Q1"){
i <- 7
}
if (d == "2017 Q2"){
i <- 8
}
if (d == "2017 Q3"){
i <- 9
}
if (d == "2017 Q4"){
i <- 10
}
if (d == "2018 Q1"){
i <- 11
}
if (d == "2018 Q2"){
i <- 12
}
if (d == "2018 Q3"){
i <- 13
}
if (d == "2018 Q4"){
i <- 14
}
if (d == "2019 Q1"){
i <- 15
}
if (d == "2019 Q2"){
i <- 16
}
if (d == "2019 Q3"){
i <- 17
}
if (d == "2019 Q4"){
i <- 18
}
if (d == "2020 Q1"){
i <- 19
}
if (d == "2020 Q2"){
i <- 20
}
vect <- append(vect, i)
}
full_hierarchy$cv_number <- vect
full.cv.accuracy <- list()
rmse.full <- c()
mase.full <- c()
labs.ls.full <- c()
labs.sc.full <- c()
id.full <- c()
model.full <- c()
for (id in 2:max(full_hierarchy$cv_number)) {
id.full <- append(id.full, id)
cv.seti <- full_hierarchy[which(full_hierarchy$cv_number <= id),-5]
#print(cv.seti)
accuracy.i <- cv.seti %>%
model(base = ETS(Dollars.Outstanding)) %>%
reconcile(
bu = bottom_up(base),
ols = min_trace(base, method = "ols"),
mint = min_trace(base, method = "wls_struct"),
td = top_down(base, method = "forecast_proportions")
) %>%
forecast(lambda = 0,h=1) %>%
accuracy(full_hierarchy)
full.cv.accuracy <- append(full.cv.accuracy, accuracy.i)
lab.ls <- as.character(accuracy.i$Loan.Status)
lab.sc <- as.character(accuracy.i$Sub.Cat)
labs.ls.full <- append(labs.ls.full, lab.ls)
labs.sc.full <- append(labs.sc.full, lab.sc)
rmse.full <- append(rmse.full, accuracy.i$RMSE)
mase.full <- append(mase.full, accuracy.i$RMSE)
model.full <- append(model.full, accuracy.i$.model)
}
# HERE -- run it and figure out a good way to average the RMSE and MASE
cv.object <- as.data.frame(cbind(model.full,
labs.ls.full,
labs.sc.full,
rmse.full,
mase.full))
cv.object <- cv.object %>%
drop_na() %>%
filter(rmse.full != "NaN") %>%
filter(mase.full != "NaN")
cv.base <- as.data.frame(cv.object[which(cv.object[,1] == "base"),])
cv.ols <- as.data.frame(cv.object[which(cv.object[,1] == "ols"),])
cv.mint <- as.data.frame(cv.object[which(cv.object[,1] == "mint"),])
cv.bu <- as.data.frame(cv.object[which(cv.object[,1] == "bu"),])
cv.td <- as.data.frame(cv.object[which(cv.object[,1] == "td"),])
# averaging out cv accuracy measures
base.rmse <- aggregate(as.numeric(cv.base$rmse.full),
by = list(cv.base$labs.ls.full,
cv.base$labs.sc.full),
mean)
bu.rmse <- aggregate(as.numeric(cv.bu$rmse.full),
by = list(cv.bu$labs.ls.full,
cv.bu$labs.sc.full),
mean)
ols.rmse <- aggregate(as.numeric(cv.ols$rmse.full),
by = list(cv.ols$labs.ls.full,
cv.ols$labs.sc.full),
mean)
mint.rmse <- aggregate(as.numeric(cv.mint$rmse.full),
by = list(cv.mint$labs.ls.full,
cv.mint$labs.sc.full),
mean)
td.rmse <- aggregate(as.numeric(cv.td$rmse.full),
by = list(cv.td$labs.ls.full,
cv.td$labs.sc.full),
mean)
cv.object2 <- cv.object[which(cv.object$mase.full < 1.1),]
base.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "base"),])
bu.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "bu"),])
ols.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "ols"),])
mint.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "mint"),])
td.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "td"),])
base.mase <- aggregate(as.numeric(base.m$mase.full),
by = list(base.m$labs.ls.full,
base.m$labs.sc.full),
mean)
bu.mase <- aggregate(as.numeric(bu.m$mase.full),
by = list(bu.m$labs.ls.full,
bu.m$labs.sc.full),
mean)
ols.mase <- aggregate(as.numeric(ols.m$mase.full),
by = list(ols.m$labs.ls.full,
ols.m$labs.sc.full),
mean)
mint.mase <- aggregate(as.numeric(mint.m$mase.full),
by = list(mint.m$labs.ls.full,
mint.m$labs.sc.full),
mean)
td.mase <- aggregate(as.numeric(td.m$mase.full),
by = list(td.m$labs.ls.full,
td.m$labs.sc.full),
mean)
# saving CV dataframes as a separate CSV
write.csv(base.rmse, "baseRmse.csv")
write.csv(bu.rmse, "buRmse.csv")
write.csv(ols.rmse, "olsRmse.csv")
write.csv(mint.rmse, "mintRmse.csv")
write.csv(td.rmse, "tdRmse.csv")
write.csv(base.mase, "baseMase.csv")
write.csv(bu.mase, "buMase.csv")
write.csv(ols.mase, "olsMase.csv")
write.csv(mint.mase, "mintMase.csv")
write.csv(td.mase, "tdMase.csv")
Covid - 19 Adjustments
# hard coding point estimates
# step 1: Calculating average proportions for data impacted by COVID-19 policy changes.
COVID.proportions <- hierarchy.Covid[,-1] %>%
group_by(as.character(Loan.Status)) %>%
summarize(n=n(),
do=sum(Dollars.Outstanding)) %>%
mutate(freq = n/sum(n),
DO.prop = do/sum(do))
# step 2: Calculating average proportions for data not impacted by COVID-19 policy changes. i.e. data up through the first quarter of 2020.
Pre.COVID.proportions <- full_hierarchy[,-1] %>%
group_by(as.character(Loan.Status)) %>%
summarize(n=n(),
do=sum(Dollars.Outstanding)) %>%
mutate(freq = n/sum(n),
DO.prop = do/sum(do))
# step 3: finding multiplication proportions for forecasts
forecast.labs <- COVID.proportions$`as.character(Loan.Status)`
forecast.multiplyer <- COVID.proportions$DO.prop/Pre.COVID.proportions$DO.prop
minTnoAgg <- mint_fit[which(mint_fit$.model == "mint"),]
mint.v.adjMint <- as.data.frame(cbind(as.character(minTnoAgg$Loan.Status),
as.numeric(minTnoAgg$.mean)))
colnames(mint.v.adjMint) <- c("LS.1", "Non.adjusted")
npf <- c()
i <- 0
for (l in mint.v.adjMint$LS.1) {
i <- i + 1
index <- match(l, forecast.labs)
m <- forecast.multiplyer[index]
new.point.forecast <- m*as.numeric(mint.v.adjMint$Non.adjusted[i])
npf <- append(npf, new.point.forecast)
}
library("distributional")
sig <- unlist(minTnoAgg$Dollars.Outstanding)[-seq(1,416,2)]
dist.new <- dist_normal(mu = npf,sigma = sig)
# Formatting resulting forecasts
enough.space <- mint_fit[which(mint_fit$.model == "mint" | mint_fit$.model == "ols"),]
i2 <- 0
i1 <- 0
for (mod in enough.space$.model) {
i1 <- i1 + 1
if (mod == "ols") {
i2 <- i2 + 1
enough.space$.mean[i1] <- npf[i2]
enough.space$Dollars.Outstanding[i1] <- dist.new[i2]
}
}
enough.space1 <- enough.space
library(fpp3)
enough.space %>%
filter(is_aggregated(Sub.Cat)) %>%
autoplot(
full_hierarchy,
level = c(80,95),
alpha = 0.7
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Federal Direct Loans: Dollars Outstanding",
subtitle = "MinT with Structural Scaling",
color = "Forecasting Model")+
facet_wrap(vars(Loan.Status),scales = "free_y",ncol=4,
labeller = labeller(Loan.Status = agg.labels)) +
scale_color_manual(labels = c("MinT", "MinT With Adjustments"),
values = c('deeppink','cyan')) +
guides(".model"=FALSE, level=FALSE) +
theme_classic(base_family = "Constantia",
base_size = 12.5) +
theme(plot.title = element_text(face="bold"),
strip.text = element_text(face="bold", size=12.5),
axis.text.x.bottom = element_text(size = 8.5),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "none")
ggsave(
"AdjustedForecast.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.7,
width = 15.1,
height = 8.444,
units = c("in", "cm", "mm"),
dpi = 700,
limitsize = TRUE,
)
# hard coding point estimates
# step 1: Calculating average proportions for data impacted by COVID-19 policy changes.
COVID.proportionsL3 <- hierarchy.Covid[,-1] %>%
group_by(as.character(Sub.Cat)) %>%
summarize(n=n(),
do=sum(Dollars.Outstanding)) %>%
mutate(freq = n/sum(n),
DO.prop = do/sum(do))
# step 2: Calculating average proportions for data not impacted by COVID-19 policy changes. i.e. data up through the first quarter of 2020.
Pre.COVID.proportionsL3 <- full_hierarchy[,-1] %>%
group_by(as.character(Sub.Cat)) %>%
summarize(n=n(),
do=sum(Dollars.Outstanding)) %>%
mutate(freq = n/sum(n),
DO.prop = do/sum(do))
# step 3: finding multiplication proportions for forecasts
forecast.labsL3 <- COVID.proportionsL3$`as.character(Sub.Cat)`
forecast.multiplyerL3 <- COVID.proportionsL3$DO.prop/Pre.COVID.proportionsL3$DO.prop
minTnoAgg <- mint_fit[which(mint_fit$.model == "mint"),] %>%
filter(Sub.Cat != Loan.Status)
mint.v.adjMint <- as.data.frame(cbind(as.character(minTnoAgg$Sub.Cat),
as.numeric(minTnoAgg$.mean)))
colnames(mint.v.adjMint) <- c("SC.1", "Non.adjusted")
npf2 <- c()
i2 <- 0
for (l in mint.v.adjMint$SC.1) {
i2 <- i2 + 1
index <- match(l, forecast.labsL3)
m2 <- forecast.multiplyerL3[index]
new.point.forecast2 <- m2*as.numeric(mint.v.adjMint$Non.adjusted[i2])
npf2 <- append(npf2, new.point.forecast2)
}
library("distributional")
sig <- unlist(minTnoAgg$Dollars.Outstanding)[-seq(1,416,2)]
dist.new <- dist_normal(mu = npf2,sigma = sig)
# Formatting resulting forecasts
enough.space <- mint_fit[which(mint_fit$.model == "mint" | mint_fit$.model == "ols"),] %>%
filter(Sub.Cat != Loan.Status)
i2 <- 0
i1 <- 0
for (mod in enough.space$.model) {
i1 <- i1 + 1
if (mod == "ols") {
i2 <- i2 + 1
enough.space$.mean[i1] <- npf2[i2]
enough.space$Dollars.Outstanding[i1] <- dist.new[i2]
}
}
third.level.labs <- c(def.labels, for.labels)
library(fpp3)
enough.space %>%
filter(!is_aggregated(Sub.Cat)) %>%
autoplot(
full_hierarchy,
level = c(80,95),
alpha = 0.7
) +
labs(y = "Dollars Outstanding - (Billions, USD)",
title = "Federal Direct Loans: Dollars Outstanding",
subtitle = "MinT with Structural Scaling",
color = "Forecasting Model")+
facet_wrap(vars(Sub.Cat),scales = "free_y",ncol=4,
labeller = labeller(Sub.Cat = third.level.labs)) +
scale_color_manual(labels = c("MinT", "MinT With Adjustments"),
values = c('deeppink','cyan')) +
guides(".model"=FALSE, level=FALSE) +
theme_classic(base_family = "Constantia",
base_size = 12.5) +
theme(plot.title = element_text(face="bold"),
strip.text = element_text(face="bold", size=12.5),
axis.text.x.bottom = element_text(size = 8.5),
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.position = "none")
ggsave(
"AppendixForecasts.png",
plot = last_plot(),
device = NULL,
path = NULL,
scale = 0.8,
width = 15,
height = 7,
units = c("in", "cm", "mm"),
dpi = 600,
limitsize = TRUE,
)
# making a table of point estimates
# inclusive of Covid-19 adjustments
point.estimates_lowest <- enough.space %>%
filter(!is_aggregated(Sub.Cat))
point.estimates_lowest$Loan.Status <- as.character(point.estimates_lowest$Loan.Status)
point.estimates_lowest$Sub.Cat <- as.character(point.estimates_lowest$Sub.Cat)
point.estimates_second <- enough.space1 %>%
filter(!is_aggregated(Loan.Status))
point.estimates_second$Loan.Status <- as.character(point.estimates_second$Loan.Status)
point.estimates_second$Sub.Cat <- as.character(point.estimates_second$Sub.Cat)
point.estimates_top <- enough.space1 %>%
filter(is_aggregated(Loan.Status))
point.estimates_top$Loan.Status <- as.character(point.estimates_top$Loan.Status)
point.estimates_top$Sub.Cat <- as.character(point.estimates_top$Sub.Cat)
# saving as CSVs.
write.csv(point.estimates_top, "one.csv")
write.csv(point.estimates_second, "two.csv")
write.csv(point.estimates_lowest, "three.csv")