Data Formatting

library(dplyr)
# Direct Loan Portfolio by Loan Status
DL.LoanStatus.p <- read.csv("PortfolioLS.Pivot.csv")
colnames(DL.LoanStatus.p) = c("Date",
                              "Loan.Status",
                              "DO")

# Direct Loan Portfolio by Forbearance Type
DL.ForbearanceType.p <- read.csv("PortfolioFOR.Pivot.csv")
DL.ForbearanceType.p$Loan.Status <- rep("Forbearance.DO",
                                        nrow(DL.ForbearanceType.p))
colnames(DL.ForbearanceType.p) <- c("Date",
                              "Subcategory",
                              "DO.Sub",
                              "Loan.Status")

# Direct Loan Portfolio by Deferment Type:
DL.Deferment.p <- read.csv("PortfolioDefer.Pivot.csv")
DL.Deferment.p$Loan.Status <- rep("Deferment.DO",
                                        nrow(DL.Deferment.p))
colnames(DL.Deferment.p) <- c("Date",
                              "Subcategory",
                              "DO.Sub",
                              "Loan.Status")

# Direct Loan Portfolio by Default:
DL.Default.p <- read.csv("PortfolioDefault.Pivot.csv")

# Filtering to include loans entering default 
# And loans staying in default from previous quarter
DL.Default.p <- DL.Default.p %>%
  filter(Category == "DRFPQ.Billions" |
         Category == "Entering.USD.Billions")

DL.Default.p$Loan.Status <- rep("CumulDefault.DO",
                                        nrow(DL.Default.p))
colnames(DL.Default.p) <- c("Date",
                              "Subcategory",
                              "DO.Sub",
                              "Loan.Status")

Merging data sets based on date and loan status.

library(fpp3)
library(tidyverse)
df_list <- list(DL.LoanStatus.p,
                DL.ForbearanceType.p,
                DL.Deferment.p,
                DL.Default.p)

# Correcting typo in data
DL.LoanStatus.p <- DL.LoanStatus.p %>%
  mutate(Loan.Status = str_replace(Loan.Status, 
                                   "Forebearance.DO",
                                   "Forbearance.DO"))

m1 <- merge(DL.LoanStatus.p,
                DL.ForbearanceType.p,
                by = c("Date","Loan.Status"),
                all.x = TRUE)
final.merge <- merge(m1,
                DL.Deferment.p,
                by = c("Date","Loan.Status"),
            all.x = TRUE)

#final.merge <- merge(m2,
#                DL.Default.p,
#                by = c("Date","Loan.Status"),
#                all.x = TRUE)

# combining columns that were separated by "merge" function
# in order to match hierarchical structure required
subcat.index <- seq(4, ncol(final.merge), 2)
DO.sub.index <- seq(5, ncol(final.merge), 2)

unite1 <- final.merge %>%
  unite("Sub.Cat",
        all_of(subcat.index), na.rm = TRUE, remove = FALSE)

unite2 <- final.merge %>%
  unite("DO.Sub.Cat",
        all_of(DO.sub.index), na.rm = TRUE, remove = FALSE)

unite1$Do.Sub.Cat <- unite2$DO.Sub.Cat

full_DL.data <- unite1[,c(1:4,ncol(unite1))]
library(zoo)
new.dates <- as.yearqtr(full_DL.data$Date,
                           format = "%Y-%m-%d")

full_DL.data$Date <- yearquarter(new.dates)

# Filling in missing hierarchies with upper-level data.
for (i in 1:nrow(full_DL.data)){
  Date.i <- full_DL.data[i,1]
  LS.i <- full_DL.data[i,2]
  DO.i <- full_DL.data[i,3]
  Sub.Cat.i <- full_DL.data[i,4]
  DO.Sub.Cat.i <- full_DL.data[i,5]
  if (Sub.Cat.i == "") {
    full_DL.data[i,4] <- LS.i
    full_DL.data[i,5] <- as.numeric(DO.i)
  }
}

# Zero values represented with empty string. 
# Replacing empty string with 0
full_DL.data[,2:ncol(full_DL.data)] <- full_DL.data[,-1] %>%
                                          mutate_all(funs(replace(.,.=="",0)))

# Removing data so that only "Dollars Outstanding" are included
index <- 0
remove.index <- c()
for (string.i in full_DL.data$Loan.Status){
  index <- index + 1
  if(grepl(".Recipients", string.i) == TRUE) {
    remove.index <- append(remove.index, index)
  }
}

indexSUB <- 0
remove.indexSUB <- c()
for (string.i in full_DL.data$Sub.Cat){
  indexSUB <- indexSUB + 1
  if(grepl(".Recipients", string.i) == TRUE) {
    remove.indexSUB <- append(remove.indexSUB, indexSUB)
  }
}

# Removing recipient data 
# Dollars outstanding data is all that is left
library(lubridate)
remove.Recipients <- union(remove.index, remove.indexSUB)
full_DL.data <- full_DL.data[-c(remove.Recipients),]

# formatting data
full_hierarchy <- as_tsibble(full_DL.data,
                             index = Date,
                             key = c("Loan.Status",
                                     "Sub.Cat")) %>%
                  aggregate_key(Loan.Status/Sub.Cat,
                                Dollars.Outstanding = sum(as.numeric(Do.Sub.Cat))) %>%
                  filter_index("2015 Q1" ~ "2020 Q1")

# Filtering data for 2015 onwards
hierarchy.Covid <- as_tsibble(full_DL.data,
                             index = Date,
                             key = c("Loan.Status",
                                     "Sub.Cat")) %>%
                  aggregate_key(Loan.Status/Sub.Cat,
                                Dollars.Outstanding = sum(as.numeric(Do.Sub.Cat))) %>%
                  filter_index("2020 Q2" ~.)

—————-FORECASTING—————–

# Generating bottom-up forecast
bu_forecast <- full_hierarchy %>%
  model(base = ETS(Dollars.Outstanding)) %>%
  reconcile(bu = bottom_up(base)) %>%
  forecast(lambda = 0)

# "to generate bootstrapped prediction intervals in this way, we simply set bootstrap = TRUE in the forecast() function"

# Generating top-down forecast
# Using forecast proportions, rather than historical averages
td_forecast <- full_hierarchy %>%
  model(base = ETS(Dollars.Outstanding)) %>%
  reconcile(td = top_down(models = base,
                          method = "forecast_proportions")) %>%
  forecast(lambda = 0)

# Generating middle-out forecast
#mo_forecast <- full_hierarchy %>%
#  model(ets = ETS(Dollars.Outstanding)) %>%
#  reconcile(td = middle_out(models = ets,
#                            level = choose_level, method = "forecast_proportions")) %>%
#  forecast()

# Minimum Trace (MinT) optimal reconciliation approach
mint_fit <- full_hierarchy %>%
  model(base = ETS(Dollars.Outstanding)) %>%
  reconcile(
    bu = bottom_up(base),
    td = top_down(base, method = "forecast_proportions"),
    ols = min_trace(base, method = "ols"),
    mint = min_trace(base, method = "wls_struct")
  ) %>%
  forecast(lambda = 0)

#mint_forecast <- mint_fit %>%
#  forecast (h = "2 years")

# Second Attempt: Minimum Trace Forecasts
# Minimum Trace OLS
#MinTOLS_forecast <- full_hierarchy %>%
#            model(base = ETS(Dollars.Outstanding)) %>%
#            reconcile(ols = min_trace(base,
#                                      method = "ols")) %>%
#            forecast(lambda = 0)
# Minimum Trace: Shrink
#MinTShrink_forecast <- full_hierarchy %>%
#            model(base = ETS(Dollars.Outstanding)) %>%
#            reconcile(ols = min_trace(base,
#                                      method = "wls_struct")) #%>%
#            forecast(lambda = 0)

# plotting td/bu forecasts
bu_forecast %>%
  filter(is_aggregated(Sub.Cat)) %>%
  autoplot(
    full_hierarchy,
    level = NULL
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Botttom Up") +
  facet_wrap(vars(Loan.Status),scales = "free_y",ncol=3) +
  theme(legend.position = "none")

# Plotting every model in one graphic
library(ggthemes)
library(extrafont)
windowsFonts("Constantia" = windowsFont("Constantia"))

agg.labels <- c("Total",
                 "Default",
                 "Deferment",
                 "Forbearance",
                 "Grace",
                 "In School",
                 "Other",
                 "Repayment")
names(agg.labels) <- c("<aggregated>",
                        "CumulDefault.DO",
                        "Deferment.DO",
                        "Forbearance.DO",
                        "Grace.DO",
                        "InScool.DO",
                        "Other.DO",
                        "Repayment.DO")

library(scales)

mint_fit %>%
  filter(is_aggregated(Sub.Cat)) %>%
  autoplot(
    full_hierarchy,
    level = NULL
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Federal Direct Loans: Dollars Outstanding",
       subtitle = "Hierarchical Forecasting",
       color = "Forecasting Model") + 
  scale_color_manual(labels = c("Base",
                                "Bottom Up",
                                "MinT, OLS",
                                "MinT, Structural",
                                "Top Down"), 
                     values = c("#8931EF", "#F2CA19",
                               "#FF00BD", "#0057E9",
                               "#E11845"))+
  facet_wrap(vars(Loan.Status),scales = "free_y",ncol=4,
             labeller = labeller(Loan.Status = agg.labels)) +
  theme_classic(base_family = "Constantia",
                base_size = 12.5) +
  theme(plot.title = element_text(face="bold"),
        strip.text = element_text(face="bold", size=12.5),
        axis.text.x.bottom = element_text(size = 8.5),
        axis.text = element_text(face = "bold"),
        axis.title = element_text(face = "bold"))

ggsave(
  "DOhierarchy.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.8,
  width = 15,
  height = 7,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

library(dplyr)
library(fpp3)
# Comparing accuracy measures of the aggregate series
# Next steps involve ensuring all available hierarchies have been 
# included in the model
# and that CV is used to assess model accuracy where 
# it is appropriate

training_fit <- full_hierarchy  %>%
  filter_index(.~ "2018 Q1")%>%
  model(base = ETS(Dollars.Outstanding)) %>%
  reconcile(
    bu = bottom_up(base),
    td = top_down(base, method = "forecast_proportions"),
    ols = min_trace(base, method = "ols"),
    mintStruct = min_trace(base,
                          method = "wls_struct"),
    #mintwlsvar = min_trace(base,
    #                       method = "wls_var"),
    #mint = min_trace(base)
  ) %>%
  forecast(lambda = 0)

#training.fit.MINT <- full_hierarchy %>%
#  filter_index(.~ "2018 Q1") %>%
#            model(base = ETS(Dollars.Outstanding)) %>%
#            reconcile(ols = min_trace(base,
#                                      method = "wls_struct")) %>%
#            forecast()

accuracy.table <- training_fit %>%
        filter(is_aggregated(Sub.Cat)) %>%
        accuracy(
            data = full_hierarchy,
            measures = list(rmse = RMSE,
                            mase = MASE)) %>%
        group_by(.model) %>%
        summarise(rmse = round(mean(rmse), digits = 2),
                  mase = round(mean(mase), digits = 2))

accuracy.table$.model = c("Base",
                          "Bottom Up",
                          "Minimum Trace - Structural",
                          "Minimum Trace - OLS",
                          "Top Down")
colnames(accuracy.table) = c("Model",
                             "RMSE",
                             "MASE")

library(gridExtra)
library(grid)
library(ggplot2)
png("accuracy1.png", height=100, width=200)
p<-accuracy.table
grid.table(p)
dev.off()

accuracy.table2 <- training_fit %>%
        filter(!is_aggregated(Loan.Status)) %>%
        accuracy(
            data = full_hierarchy,
            measures = list(rmse = RMSE,
                            mase = MASE)) %>%
        group_by(.model) %>%
        summarise(rmse = rmse,
                  mase = mase, 
                  sub = format(Sub.Cat))


colnames(accuracy.table2) = c("Model",
                             "RMSE",
                             "MASE", 
                             "Subcategory")
 
library(gridExtra)
library(grid)
library(ggplot2)
png("accuracy2.png", height=2790, width=370)
p2<-accuracy.table2
grid.table(p2)
dev.off()

# Creating table to compare accuracies across reconciliation methods
ac.table <- training_fit %>% 
            accuracy(data = full_hierarchy,
            measures = list(rmse = RMSE,
                            mase = MASE)) 
new.ac.ls <- as.character(ac.table$Loan.Status)
new.ac.sc <- as.character(ac.table$Sub.Cat)

ac.table$Loan.Status <- new.ac.ls
ac.table$Sub.Cat <- new.ac.sc

write.csv(ac.table, "regular_accuracy.csv")

# Numerical Summary
library(fpp3)
DirectLoan.tsibble <- as_tsibble(full_DL.data,
                             index = Date,
                             key = c("Loan.Status",
                                     "Sub.Cat"))
DirectLoan.tsibble$Do.Sub.Cat <- as.numeric(DirectLoan.tsibble$Do.Sub.Cat)
DirectLoan.tsibble %>%
  features(Do.Sub.Cat, list(Mean = mean)) %>%
  arrange(Mean)

# forecasting with prediction intervals.
windowsFonts("Selawik Semibold" = windowsFont("Selawik Semibold"))
mint_fit[which(mint_fit$.model == "mint"),] %>%
  filter(is_aggregated(Sub.Cat)) %>%
  autoplot(
    full_hierarchy,
    level = c(80,95),
    alpha = 0.7
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Forecasts for Federal Direct Loans",
       subtitle = "MinT with Structural Scaling",
       color = "Forecasting Model") + 
  scale_color_manual(labels = c("MinT, Structural Scaling"), 
                     values = c("#0057E9"))+
  facet_wrap(vars(Loan.Status),scales = "free_y",ncol=4,
             labeller = labeller(Loan.Status = agg.labels))+
  theme_classic(base_family = "Constantia",
                base_size = 12.5) +
  theme(plot.title = element_text(face="bold"),
        strip.text = element_text(face="bold", size=12.5),
        axis.text.x.bottom = element_text(size = 8.5),
        axis.text = element_text(face = "bold"),
        axis.title = element_text(face = "bold"),
        legend.position = "none")

# saving plot
ggsave(
  "OVERALLforecast.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.7,
  width = 15.1,
  height = 8.444,
  units = c("in", "cm", "mm"),
  dpi = 700,
  limitsize = TRUE,
)

# generating plots for third level data [Forbearance]
for.labels = c("Forbearance: Total",
               "Administrative",
               "Discretionary",
               "Mandatory",
               "Mandatory Admin",
               "Not Reported")
names(for.labels) = c("<aggregated>",
                        "Admin.DO",
                      "Discret.DO",
                      "Mandatory.DO",
                      "MandatoryAdmin.DO",
                      "NotReported.DO")
mint_fit[which(mint_fit$.model == "mint" & mint_fit$Loan.Status == "Forbearance.DO"),] %>%
  filter(!is_aggregated(Loan.Status)) %>%
  autoplot(
    full_hierarchy,
    level = c(80,95),
    alpha = 0.7
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Forecasts for Federal Direct Loans in Forbearance",
       subtitle = "MinT with Structural Scaling",
       color = "Forecasting Model") + 
  scale_color_manual(labels = FALSE)+
  facet_wrap(vars(Sub.Cat),
             scales = "free_y", 
             ncol =3,
             labeller = labeller(Sub.Cat = for.labels))+
  theme_classic(base_family = "Constantia",
                base_size = 12.5) +
  theme(plot.title = element_text(face="bold"),
        strip.text = element_text(face="bold", size=12.5),
        axis.text.x.bottom = element_text(size = 8.5),
        axis.text = element_text(face = "bold"),
        axis.title = element_text(face = "bold"),
        legend.position = "none")

ggsave(
  "ForbearanceForecast.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.8,
  width = 15,
  height = 7,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

# Third-level plots (Deferment)
def.labels <- c("Deferment: Total",
                "Cancer Treatment",
                "Economic Hardship",
                "In School",
                "Military",
                "Not Reported",
                "Other",
                "Six Month",
                "Unemployment")
names(def.labels) <- c("<aggregated>",
                        "CancerTreatment.DO",
                        "EconHardship.DO",
                        "InSchool.DO",
                        "Military.DO",
                        "NotReported.DO",
                        "Other.DO",
                        "SixMonth.DO",
                       "Unemployment.DO")
mint_fit[which(mint_fit$.model == "mint" & mint_fit$Loan.Status == "Deferment.DO") ,] %>%
  filter(!is_aggregated(Loan.Status)) %>%
  autoplot(
    full_hierarchy,
    level = c(80,95),
    alpha = 0.7
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Forecasts for Federal Direct Loans in Deferment",
       subtitle = "MinT with Structural Scaling",
       color = "Forecasting Model") + 
  scale_color_manual(labels = FALSE)+
  facet_wrap(vars(Sub.Cat),scales = "free_y",ncol=3,
             labeller = labeller(Sub.Cat = def.labels))+
  theme_classic(base_family = "Constantia",
                base_size = 12.5) +
  theme(plot.title = element_text(face="bold"),
        strip.text = element_text(face="bold", size=12.5),
        axis.text.x.bottom = element_text(size = 8.5),
        axis.text = element_text(face = "bold"),
        axis.title = element_text(face = "bold"),
        legend.position = "none")

ggsave(
  "DefermentForecast.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.8,
  width = 15,
  height = 7,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

Cross Validation

i <- NULL
vect <- c()
for (d in as.character(full_hierarchy$Date)){
  #print(d)
  if (yearquarter(d) <= yearquarter("2015 Q3")){
    i <- 1
  }
  if (d == "2015 Q4"){
    i <- 2
  }
  if (d == "2016 Q1"){
    i <- 3
  }
  if (d == "2016 Q2"){
    i <- 4
  }
  if (d == "2016 Q3"){
    i <- 5
  }
  if (d == "2016 Q4"){
    i <- 6
  }
  if (d == "2017 Q1"){
    i <- 7
  }
  if (d == "2017 Q2"){
    i <- 8
  }
  if (d == "2017 Q3"){
    i <- 9
  }
  if (d == "2017 Q4"){
    i <- 10
  }
  if (d == "2018 Q1"){
    i <- 11
  }
  if (d == "2018 Q2"){
    i <- 12
  }
  if (d == "2018 Q3"){
    i <- 13
  }
  if (d == "2018 Q4"){
    i <- 14
  }
  if (d == "2019 Q1"){
    i <- 15
  }
  if (d == "2019 Q2"){
    i <- 16
  }
  if (d == "2019 Q3"){
    i <- 17
  }
  if (d == "2019 Q4"){
    i <- 18
  }
  if (d == "2020 Q1"){
    i <- 19
  }
  if (d == "2020 Q2"){
    i <- 20
  }
  vect <- append(vect, i)
}
full_hierarchy$cv_number <- vect

full.cv.accuracy <- list()
rmse.full <- c()
mase.full <- c()
labs.ls.full <- c()
labs.sc.full <- c()
id.full <- c()
model.full <- c()
for (id in 2:max(full_hierarchy$cv_number)) {
  id.full <- append(id.full, id)
  cv.seti <- full_hierarchy[which(full_hierarchy$cv_number <= id),-5]
  #print(cv.seti)
  accuracy.i <- cv.seti %>% 
  model(base = ETS(Dollars.Outstanding)) %>%
  reconcile(
    bu = bottom_up(base),
    ols = min_trace(base, method = "ols"),
    mint = min_trace(base, method = "wls_struct"),
    td = top_down(base, method = "forecast_proportions")
  ) %>%
  forecast(lambda = 0,h=1) %>%
  accuracy(full_hierarchy)
  full.cv.accuracy <- append(full.cv.accuracy, accuracy.i)
  lab.ls <- as.character(accuracy.i$Loan.Status)
  lab.sc <- as.character(accuracy.i$Sub.Cat)
  labs.ls.full <- append(labs.ls.full, lab.ls)
  labs.sc.full <- append(labs.sc.full, lab.sc)
  rmse.full <- append(rmse.full, accuracy.i$RMSE)
  mase.full <- append(mase.full, accuracy.i$RMSE)
  model.full <- append(model.full, accuracy.i$.model)
}

# HERE -- run it and figure out a good way to average the RMSE and MASE 
cv.object <- as.data.frame(cbind(model.full,
                   labs.ls.full,
                   labs.sc.full,
                   rmse.full,
                   mase.full))
cv.object <- cv.object %>%
  drop_na() %>%
  filter(rmse.full != "NaN") %>%
  filter(mase.full != "NaN")
cv.base <- as.data.frame(cv.object[which(cv.object[,1] == "base"),])
cv.ols <- as.data.frame(cv.object[which(cv.object[,1] == "ols"),])
cv.mint <- as.data.frame(cv.object[which(cv.object[,1] == "mint"),])
cv.bu <- as.data.frame(cv.object[which(cv.object[,1] == "bu"),])
cv.td <- as.data.frame(cv.object[which(cv.object[,1] == "td"),])

# averaging out cv accuracy measures
base.rmse <- aggregate(as.numeric(cv.base$rmse.full),
                       by = list(cv.base$labs.ls.full,
                                 cv.base$labs.sc.full),
                       mean)
bu.rmse <- aggregate(as.numeric(cv.bu$rmse.full),
                       by = list(cv.bu$labs.ls.full,
                                 cv.bu$labs.sc.full),
                       mean)
ols.rmse <- aggregate(as.numeric(cv.ols$rmse.full),
                       by = list(cv.ols$labs.ls.full,
                                 cv.ols$labs.sc.full),
                       mean)
mint.rmse <- aggregate(as.numeric(cv.mint$rmse.full),
                       by = list(cv.mint$labs.ls.full,
                                 cv.mint$labs.sc.full),
                       mean)
td.rmse <- aggregate(as.numeric(cv.td$rmse.full),
                     by = list(cv.td$labs.ls.full,
                               cv.td$labs.sc.full),
                     mean)

cv.object2 <- cv.object[which(cv.object$mase.full < 1.1),]
base.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "base"),])
bu.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "bu"),])
ols.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "ols"),])
mint.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "mint"),])
td.m <- as.data.frame(cv.object2[which(cv.object2[,1] == "td"),])
  
base.mase <- aggregate(as.numeric(base.m$mase.full),
                       by = list(base.m$labs.ls.full,
                                 base.m$labs.sc.full),
                       mean)
bu.mase <- aggregate(as.numeric(bu.m$mase.full),
                       by = list(bu.m$labs.ls.full,
                                 bu.m$labs.sc.full),
                       mean)
ols.mase <- aggregate(as.numeric(ols.m$mase.full),
                       by = list(ols.m$labs.ls.full,
                                 ols.m$labs.sc.full),
                       mean)
mint.mase <- aggregate(as.numeric(mint.m$mase.full),
                       by = list(mint.m$labs.ls.full,
                                 mint.m$labs.sc.full),
                       mean)
td.mase <- aggregate(as.numeric(td.m$mase.full),
                     by = list(td.m$labs.ls.full,
                               td.m$labs.sc.full),
                     mean)

# saving CV dataframes as a separate CSV
write.csv(base.rmse, "baseRmse.csv")
write.csv(bu.rmse, "buRmse.csv")
write.csv(ols.rmse, "olsRmse.csv")
write.csv(mint.rmse, "mintRmse.csv")
write.csv(td.rmse, "tdRmse.csv")

write.csv(base.mase, "baseMase.csv")
write.csv(bu.mase, "buMase.csv")
write.csv(ols.mase, "olsMase.csv")
write.csv(mint.mase, "mintMase.csv")
write.csv(td.mase, "tdMase.csv")

Covid - 19 Adjustments

# hard coding point estimates

# step 1: Calculating average proportions for data impacted by COVID-19 policy changes.
COVID.proportions <- hierarchy.Covid[,-1] %>% 
  group_by(as.character(Loan.Status)) %>%
  summarize(n=n(),
            do=sum(Dollars.Outstanding)) %>%
  mutate(freq = n/sum(n),
         DO.prop = do/sum(do))
  

# step 2: Calculating average proportions for data not impacted by COVID-19 policy changes. i.e. data up through the first quarter of 2020.
Pre.COVID.proportions <- full_hierarchy[,-1] %>% 
  group_by(as.character(Loan.Status)) %>%
  summarize(n=n(),
            do=sum(Dollars.Outstanding)) %>%
  mutate(freq = n/sum(n),
         DO.prop = do/sum(do))

# step 3: finding multiplication proportions for forecasts

forecast.labs <- COVID.proportions$`as.character(Loan.Status)`
forecast.multiplyer <- COVID.proportions$DO.prop/Pre.COVID.proportions$DO.prop

minTnoAgg <- mint_fit[which(mint_fit$.model == "mint"),]

mint.v.adjMint <- as.data.frame(cbind(as.character(minTnoAgg$Loan.Status),
                        as.numeric(minTnoAgg$.mean)))
colnames(mint.v.adjMint) <- c("LS.1", "Non.adjusted")
npf <- c()
i <- 0
for (l in mint.v.adjMint$LS.1) {
  i <- i + 1
  index <- match(l, forecast.labs)
  m <- forecast.multiplyer[index]
  new.point.forecast <- m*as.numeric(mint.v.adjMint$Non.adjusted[i])
  npf <- append(npf, new.point.forecast)
}

library("distributional")
sig <- unlist(minTnoAgg$Dollars.Outstanding)[-seq(1,416,2)]
dist.new <- dist_normal(mu = npf,sigma = sig)

# Formatting resulting forecasts

enough.space <- mint_fit[which(mint_fit$.model == "mint" | mint_fit$.model == "ols"),]

i2 <- 0
i1 <- 0
for (mod in enough.space$.model) {
  i1 <- i1 + 1
  if (mod == "ols") {
    i2 <- i2 + 1
    enough.space$.mean[i1] <- npf[i2]
    enough.space$Dollars.Outstanding[i1] <- dist.new[i2]
  }
}

enough.space1 <- enough.space

library(fpp3)
enough.space %>%
  filter(is_aggregated(Sub.Cat)) %>%
  autoplot(
    full_hierarchy,
    level = c(80,95),
    alpha = 0.7
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Federal Direct Loans: Dollars Outstanding",
       subtitle = "MinT with Structural Scaling",
       color = "Forecasting Model")+
  facet_wrap(vars(Loan.Status),scales = "free_y",ncol=4,
             labeller = labeller(Loan.Status = agg.labels)) +
  scale_color_manual(labels = c("MinT", "MinT With Adjustments"), 
                     values = c('deeppink','cyan')) +
  guides(".model"=FALSE, level=FALSE) +
  theme_classic(base_family = "Constantia",
                base_size = 12.5) +
  theme(plot.title = element_text(face="bold"),
        strip.text = element_text(face="bold", size=12.5),
        axis.text.x.bottom = element_text(size = 8.5),
        axis.text = element_text(face = "bold"),
        axis.title = element_text(face = "bold"),
        legend.position = "none")

ggsave(
  "AdjustedForecast.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.7,
  width = 15.1,
  height = 8.444,
  units = c("in", "cm", "mm"),
  dpi = 700,
  limitsize = TRUE,
)

# hard coding point estimates

# step 1: Calculating average proportions for data impacted by COVID-19 policy changes.
COVID.proportionsL3 <- hierarchy.Covid[,-1] %>% 
  group_by(as.character(Sub.Cat)) %>%
  summarize(n=n(),
            do=sum(Dollars.Outstanding)) %>%
  mutate(freq = n/sum(n),
         DO.prop = do/sum(do))
  

# step 2: Calculating average proportions for data not impacted by COVID-19 policy changes. i.e. data up through the first quarter of 2020.
Pre.COVID.proportionsL3 <- full_hierarchy[,-1] %>% 
  group_by(as.character(Sub.Cat)) %>%
  summarize(n=n(),
            do=sum(Dollars.Outstanding)) %>%
  mutate(freq = n/sum(n),
         DO.prop = do/sum(do))

# step 3: finding multiplication proportions for forecasts

forecast.labsL3 <- COVID.proportionsL3$`as.character(Sub.Cat)`
forecast.multiplyerL3 <- COVID.proportionsL3$DO.prop/Pre.COVID.proportionsL3$DO.prop

minTnoAgg <- mint_fit[which(mint_fit$.model == "mint"),] %>%
  filter(Sub.Cat != Loan.Status)

mint.v.adjMint <- as.data.frame(cbind(as.character(minTnoAgg$Sub.Cat),
                                      as.numeric(minTnoAgg$.mean)))

colnames(mint.v.adjMint) <- c("SC.1", "Non.adjusted")
npf2 <- c()
i2 <- 0
for (l in mint.v.adjMint$SC.1) {
  i2 <- i2 + 1
  index <- match(l, forecast.labsL3)
  m2 <- forecast.multiplyerL3[index]
  new.point.forecast2 <- m2*as.numeric(mint.v.adjMint$Non.adjusted[i2])
  npf2 <- append(npf2, new.point.forecast2)
}

library("distributional")
sig <- unlist(minTnoAgg$Dollars.Outstanding)[-seq(1,416,2)]
dist.new <- dist_normal(mu = npf2,sigma = sig)

# Formatting resulting forecasts

enough.space <- mint_fit[which(mint_fit$.model == "mint" | mint_fit$.model == "ols"),] %>%
  filter(Sub.Cat != Loan.Status)

i2 <- 0
i1 <- 0
for (mod in enough.space$.model) {
  i1 <- i1 + 1
  if (mod == "ols") {
    i2 <- i2 + 1
    enough.space$.mean[i1] <- npf2[i2]
    enough.space$Dollars.Outstanding[i1] <- dist.new[i2]
  }
}

third.level.labs <- c(def.labels, for.labels)

library(fpp3)
enough.space %>%
  filter(!is_aggregated(Sub.Cat)) %>%
  autoplot(
    full_hierarchy,
    level = c(80,95),
    alpha = 0.7
  ) +
  labs(y = "Dollars Outstanding - (Billions, USD)",
       title = "Federal Direct Loans: Dollars Outstanding",
       subtitle = "MinT with Structural Scaling",
       color = "Forecasting Model")+
  facet_wrap(vars(Sub.Cat),scales = "free_y",ncol=4,
             labeller = labeller(Sub.Cat = third.level.labs)) +
  scale_color_manual(labels = c("MinT", "MinT With Adjustments"), 
                     values = c('deeppink','cyan')) +
  guides(".model"=FALSE, level=FALSE) +
  theme_classic(base_family = "Constantia",
                base_size = 12.5) +
  theme(plot.title = element_text(face="bold"),
        strip.text = element_text(face="bold", size=12.5),
        axis.text.x.bottom = element_text(size = 8.5),
        axis.text = element_text(face = "bold"),
        axis.title = element_text(face = "bold"),
        legend.position = "none")
ggsave(
  "AppendixForecasts.png",
  plot = last_plot(),
  device = NULL,
  path = NULL,
  scale = 0.8,
  width = 15,
  height = 7,
  units = c("in", "cm", "mm"),
  dpi = 600,
  limitsize = TRUE,
)

# making a table of point estimates
# inclusive of Covid-19 adjustments
point.estimates_lowest <- enough.space %>%
  filter(!is_aggregated(Sub.Cat))

point.estimates_lowest$Loan.Status <- as.character(point.estimates_lowest$Loan.Status)
point.estimates_lowest$Sub.Cat <- as.character(point.estimates_lowest$Sub.Cat)

point.estimates_second <- enough.space1 %>%
  filter(!is_aggregated(Loan.Status))

point.estimates_second$Loan.Status <- as.character(point.estimates_second$Loan.Status)
point.estimates_second$Sub.Cat <- as.character(point.estimates_second$Sub.Cat)

point.estimates_top <- enough.space1 %>%
  filter(is_aggregated(Loan.Status))

point.estimates_top$Loan.Status <- as.character(point.estimates_top$Loan.Status)
point.estimates_top$Sub.Cat <- as.character(point.estimates_top$Sub.Cat)

# saving as CSVs.
write.csv(point.estimates_top, "one.csv")
write.csv(point.estimates_second, "two.csv")
write.csv(point.estimates_lowest, "three.csv")

Stat 450: Hierarchical Forecasting

George Elsesser

3/14/2021

Data Formatting

Merging data sets based on date and loan status.

—————-FORECASTING—————–

Cross Validation

Covid - 19 Adjustments