Alexander Bactat
Introduction: We often hear that Americans live in different realities-that our experiences are worlds apart. Here, we see if the forecasts for these different worlds are different as well.
income_data_share_of_income <- read_excel("income data share of income.xlsx")
View(income_data_share_of_income)
library(ggplot2)
Use suppressPackageStartupMessages() to eliminate package startup messages
Attaching package: 㤼㸱ggplot2㤼㸲
The following object is masked _by_ 㤼㸱.GlobalEnv㤼㸲:
diamonds
library(forecast)
This is forecast 8.12
Crossvalidated is a great place to get help on forecasting issues:
http://stats.stackexchange.com/tags/forecasting.
library(corrplot)
lowest.share <- income_data_share_of_income$`Lowest Quintile`
second.share <- income_data_share_of_income$`Second Quintile`
middle.share <- income_data_share_of_income$`Middle Quintile`
fourth.share <- income_data_share_of_income$`Fourth Quintile`
highest.share <- income_data_share_of_income$`Highest Quintile`
top.share <- income_data_share_of_income$`Top 1%`
lowest.share.ts <- ts(lowest.share, start=1979, frequency=1)
second.share.ts <- ts(second.share, start=1979, frequency=1)
middle.share.ts <- ts(middle.share, start=1979, frequency=1)
fourth.share.ts <- ts(fourth.share, start=1979, frequency=1)
highest.share.ts <- ts(highest.share, start=1979, frequency=1)
top.share.ts <- ts(top.share, start=1979, frequency=1)
Share over time
autoplot(lowest.share.ts/100, main = "Income Share Over Time") + autolayer(second.share.ts/100) + autolayer(middle.share.ts/100) + autolayer(fourth.share.ts/100) + autolayer(highest.share.ts/100)
Correlation
corr.share <- cor(income_data_share_of_income)
corrplot(corr.share, method=c("number"))
Historic Share of Income
historic.share <- cbind(lowest.share.ts, second.share.ts, middle.share.ts, fourth.share.ts, highest.share.ts)
autoplot(historic.share)
income_data <- read_excel("income_data.xlsx")
Error in read_excel("income_data.xlsx") :
could not find function "read_excel"
lowest.income <- income_data$`Lowest Quintile`
second.income <- income_data$`Second Quintile`
middle.income <- income_data$`Middle Quintile`
fourth.income <- income_data$`Fourth Quintile`
highest.income <- income_data$`Highest Quintile`
top.income <- income_data$`Top 1%`
lowest.income.ts <- ts(lowest.income, start=1979, frequency=1)
second.income.ts <- ts(second.income, start=1979, frequency=1)
middle.income.ts <- ts(middle.income, start=1979, frequency=1)
fourth.income.ts <- ts(fourth.income, start=1979, frequency=1)
highest.income.ts <- ts(highest.income, start=1979, frequency=1)
top.income.ts <- ts(top.income, start=1979, frequency=1)
tni <- lowest.income.ts + second.income.ts + middle.income.ts + fourth.income.ts + highest.income.ts
lowest.tni.share <- lowest.income.ts/tni
second.tni.share <- second.income.ts/tni
middle.tni.share <- middle.income.ts/tni
fourth.tni.share <- fourth.income.ts/tni
highest.tni.share <- highest.income.ts/tni
autoplot(lowest.tni.share, main = "TNI Share") + autolayer(second.tni.share) + autolayer(middle.tni.share) + autolayer(fourth.tni.share) + autolayer(highest.tni.share)
Visualization
corr.income <- cor(income_data)
corrplot(corr.income, main = "Figure 1", method=c("number"))
income.ts <- ts(income_data, start=1979, frequency=1)
autoplot(income.ts, main = "Income Data Over Time, 1%")
income.quintile.ts <- ts(income_data_quintile, start=1979, end=2017)
income.ts.quintile <- ts(income_data_quintile, start=1979, end=2017)
autoplot(income.ts.quintile, main = "Quintile Income Over Time")
Separate into training and test data
tr.lowest.income <- ts(c(lowest.income.ts), start=1979, end=2013)
tr.second.income <- ts(c(second.income.ts), start=1979, end=2013)
tr.middle.income <- ts(c(middle.income.ts), start=1979, end=2013)
tr.fourth.income <- ts(c(fourth.income.ts), start=1979, end=2013)
tr.highest.income <- ts(c(highest.income.ts), start=1979, end=2013)
tr.top.income <- ts(c(top.income.ts), start=1979, end=2013)
test.lowest.income <- tail(lowest.income.ts, n=4L)
test.second.income <- tail(second.income.ts, n=4L)
test.middle.income <- tail(middle.income.ts, n=4L)
test.fourth.income <- tail(fourth.income.ts, n=4L)
test.highest.income <- tail(highest.income.ts, n=4L)
test.top.income <- tail(top.income.ts, n=4L)
BoxCox transformation
lambda.lowest<-BoxCox.lambda(lowest.income.ts)
lowest.ts.bc<-BoxCox(lowest.income.ts, lambda.lowest)
lambda.second<-BoxCox.lambda(second.income.ts)
second.ts.bc<-BoxCox(second.income.ts, lambda.second)
lambda.middle<-BoxCox.lambda(middle.income.ts)
middle.ts.bc<-BoxCox(middle.income.ts, lambda.middle)
lambda.fourth<-BoxCox.lambda(fourth.income.ts)
fourth.ts.bc<-BoxCox(fourth.income.ts, lambda.fourth)
lambda.highest<-BoxCox.lambda(highest.income.ts)
highest.ts.bc<-BoxCox(highest.income.ts, lambda.highest)
lambda.top<-BoxCox.lambda(top.income.ts)
top.ts.bc<-BoxCox(top.income.ts, lambda.top)
lambda.income<-BoxCox.lambda(income.ts)
income.ts.bc<-BoxCox(income.ts, lambda.income)
lambda.income.quintile<-BoxCox.lambda(income.quintile.ts)
income.quintile.ts.bc<-BoxCox(income.quintile.ts, lambda.income.quintile)
autoplot(lowest.ts.bc) + autolayer(second.ts.bc) + autolayer(middle.ts.bc) + autolayer(fourth.ts.bc) + autolayer(highest.ts.bc) + autolayer(top.ts.bc)
tr.lowest.income.bc <- ts(c(lowest.ts.bc), start=1979, end=2013)
tr.second.income.bc <- ts(c(second.ts.bc), start=1979, end=2013)
tr.middle.income.bc <- ts(c(middle.ts.bc), start=1979, end=2013)
tr.fourth.income.bc <- ts(c(fourth.ts.bc), start=1979, end=2013)
tr.highest.income.bc <- ts(c(highest.ts.bc), start=1979, end=2013)
tr.top.income.bc <- ts(c(top.ts.bc), start=1979, end=2013)
test.lowest.income.bc <- tail(lowest.ts.bc, n=4L)
test.second.income.bc <- tail(second.ts.bc, n=4L)
test.middle.income.bc <- tail(middle.ts.bc, n=4L)
test.fourth.income.bc <- tail(fourth.ts.bc, n=4L)
test.highest.income.bc <- tail(highest.ts.bc, n=4L)
test.top.income.bc <- tail(top.ts.bc, n=4L)
autoplot(lowest.ts.bc) + autolayer(second.ts.bc) + autolayer(middle.ts.bc) + autolayer(fourth.ts.bc) + autolayer(highest.ts.bc)
ARIMA Prediction
arima.lowest.tr <- auto.arima(tr.lowest.income, seasonal = FALSE)
arima.second.tr <- auto.arima(tr.second.income, seasonal = FALSE)
arima.middle.tr <- auto.arima(tr.middle.income, seasonal = FALSE)
arima.fourth.tr <- auto.arima(tr.fourth.income, seasonal = FALSE)
arima.highest.tr <- auto.arima(tr.highest.income, seasonal = FALSE)
arima.top.tr <- auto.arima(tr.top.income, seasonal = FALSE)
arima.lowest.pred <- forecast(arima.lowest.tr, h=4)
arima.second.pred <- forecast(arima.second.tr, h=4)
arima.middle.pred <- forecast(arima.middle.tr, h=4)
arima.fourth.pred <- forecast(arima.fourth.tr, h=4)
arima.highest.pred <- forecast(arima.highest.tr, h=4)
arima.top.pred <- forecast(arima.top.tr, h=4)
accuracy(arima.lowest.pred, test.lowest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 7.272219 462.4844 383.0312 -0.03105167 1.551573 0.7399466 0.004875264 NA
Test set 1498.156179 1597.2478 1498.1562 4.22661959 4.226620 2.8941653 -0.274204665 1.582799
accuracy(arima.second.pred, test.second.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.9326046 757.8808 598.5797 -0.06006521 1.618504 0.9085584 0.18145044 NA
Test set 2002.9411765 2181.6909 2002.9412 4.12030216 4.120302 3.0401786 0.08682013 1.917735
accuracy(arima.middle.pred, test.middle.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 1.338739 926.5299 712.095 -0.02885476 1.322278 0.9136314 0.21214719 NA
Test set 2764.705882 2969.9046 2764.706 4.13520057 4.135201 3.5471698 0.04459294 2.057399
accuracy(arima.fourth.pred, test.fourth.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 28.08586 1099.501 871.381 0.02343413 1.188251 0.7879509 -0.05602955 NA
Test set 2441.57350 2641.248 2441.573 2.61275906 2.612759 2.2078058 -0.18847282 1.528457
accuracy(arima.highest.pred, test.highest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 3.029746 9319.586 7467.231 -0.1120896 4.450155 0.8877128 0.1024052 NA
Test set 11102.941177 11646.891 11102.941 4.9978237 4.997824 1.3199301 -0.5640736 1.600023
accuracy(arima.top.pred, test.top.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 20176.34 140768.7 110627.8 1.933046 12.46039 0.9715219 -0.03039196 NA
Test set 183550.00 191028.1 183550.0 14.405772 14.40577 1.6119176 -0.43643777 2.340302
arima.lowest <- auto.arima(lowest.income.ts, seasonal = FALSE)
arima.second <- auto.arima(second.income.ts, seasonal = FALSE)
arima.middle <- auto.arima(middle.income.ts, seasonal = FALSE)
arima.fourth <- auto.arima(fourth.income.ts, seasonal = FALSE)
arima.highest <- auto.arima(highest.income.ts, seasonal = FALSE)
arima.top <- auto.arima(top.income.ts, seasonal = FALSE)
arima.lowest.forecast <- forecast(arima.lowest, h=6)
arima.second.forecast <- forecast(arima.second, h=6)
arima.middle.forecast <- forecast(arima.middle, h=6)
arima.fourth.forecast <- forecast(arima.fourth, h=6)
arima.highest.forecast <- forecast(arima.highest, h=6)
arima.top.forecast <- forecast(arima.top, h=6)
autoplot(income.ts) + autolayer(arima.lowest.forecast) + autolayer(arima.second.forecast) + autolayer(arima.middle.forecast) + autolayer(arima.fourth.forecast) + autolayer(arima.highest.forecast) + autolayer(arima.top.forecast)
autoplot(income.quintile.ts) + autolayer(arima.lowest.forecast) + autolayer(arima.second.forecast) + autolayer(arima.middle.forecast) + autolayer(arima.fourth.forecast) + autolayer(arima.highest.forecast)
arima.lowest$var.coef
ar1 drift
ar1 0.02359604 -1.009667
drift -1.00966732 15031.631266
arima.second$var.coef
drift
drift 15996
arima.middle$var.coef
ar1 drift
ar1 0.0276232 -2.181023
drift -2.1810228 41457.525051
arima.fourth$var.coef
drift
drift 36123.44
arima.highest$var.coef
drift
drift 2225887
arima.top$var.coef
numeric(0)
ARIMA BoxCox
arima.lowest.tr.bc <- auto.arima(tr.lowest.income.bc, seasonal = FALSE)
arima.second.tr.bc <- auto.arima(tr.second.income.bc, seasonal = FALSE)
arima.middle.tr.bc <- auto.arima(tr.middle.income.bc, seasonal = FALSE)
arima.fourth.tr.bc <- auto.arima(tr.fourth.income.bc, seasonal = FALSE)
arima.highest.tr.bc <- auto.arima(tr.highest.income.bc, seasonal = FALSE)
arima.top.tr.bc <- auto.arima(tr.top.income.bc, seasonal = FALSE)
arima.lowest.pred.bc <- forecast(arima.lowest.tr.bc, h=4)
arima.second.pred.bc <- forecast(arima.second.tr.bc, h=4)
arima.middle.pred.bc <- forecast(arima.middle.tr.bc, h=4)
arima.fourth.pred.bc <- forecast(arima.fourth.tr.bc, h=4)
arima.highest.pred.bc <- forecast(arima.highest.tr.bc, h=4)
arima.top.pred.bc <- forecast(arima.top.tr.bc, h=4)
accuracy(arima.lowest.pred.bc, test.lowest.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0002558375 0.006689416 0.005673231 0.003956481 0.0894611 0.7564037 -0.02981167 NA
Test set 0.0101396767 0.011289693 0.010139677 0.156423254 0.1564233 1.3519085 -0.37363327 1.146522
accuracy(arima.second.pred.bc, test.second.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 3.290109e-05 1.946594e-04 3.468072e-05 0.0028571386 0.0030116812 17.763066 -0.005726159 NA
Test set 3.026644e-06 3.290209e-06 3.026644e-06 0.0002628256 0.0002628256 1.550212 0.003398595 1.617547
accuracy(arima.middle.pred.bc, test.middle.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 2.857297e-05 1.690403e-04 2.882196e-05 2.857141e-03 2.882039e-03 105.3707 -0.001761018 NA
Test set 5.528845e-07 5.897093e-07 5.528845e-07 5.528509e-05 5.528509e-05 2.0213 -0.040482579 1.786731
accuracy(arima.fourth.pred.bc, test.fourth.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 2.857311e-05 1.690410e-04 2.874214e-05 2.857141e-03 2.874043e-03 140.0288313 -0.001491237 NA
Test set 1.443101e-07 1.633884e-07 1.443101e-07 1.443008e-05 1.443008e-05 0.7030645 -0.527666553 0.8164317
accuracy(arima.highest.pred.bc, test.highest.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 6.295487e-05 0.0004407647 0.0002575512 0.002856977 0.01168085 1.1552661 -0.07259585 NA
Test set 1.215947e-04 0.0001306268 0.0001215947 0.005510140 0.00551014 0.5454227 -0.68093091 0.9760904
accuracy(arima.top.pred.bc, test.top.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0001039776 0.0003969834 0.0002227265 0.005079834 0.010879522 1.317200 -0.07330695 NA
Test set 0.0001715479 0.0001770203 0.0001715479 0.008375898 0.008375898 1.014531 -0.45198422 2.585902
arima.lowest.bc <- auto.arima(lowest.ts.bc, seasonal = FALSE)
arima.second.bc <- auto.arima(second.ts.bc, seasonal = FALSE)
arima.middle.bc <- auto.arima(middle.ts.bc, seasonal = FALSE)
arima.fourth.bc <- auto.arima(fourth.ts.bc, seasonal = FALSE)
arima.highest.bc <- auto.arima(highest.ts.bc, seasonal = FALSE)
arima.top.bc <- auto.arima(top.ts.bc, seasonal = FALSE)
arima.lowest.forecast.bc <- forecast(arima.lowest.bc, h=6)
arima.second.forecast.bc <- forecast(arima.second.bc, h=6)
arima.middle.forecast.bc <- forecast(arima.middle.bc, h=6)
arima.fourth.forecast.bc <- forecast(arima.fourth.bc, h=6)
arima.highest.forecast.bc <- forecast(arima.highest.bc, h=6)
arima.top.forecast.bc <- forecast(arima.top.bc, h=6)
arima.lowest.bc$var.coef
ma1 drift
ma1 1.999394e-02 -1.113626e-06
drift -1.113626e-06 2.261588e-06
arima.second.bc$var.coef
drift
drift 2.631593e-08
arima.middle.bc$var.coef
drift
drift 2.631579e-08
arima.fourth.bc$var.coef
drift
drift 2.631579e-08
arima.highest.bc$var.coef
drift
drift 2.76922e-08
arima.top.bc$var.coef
drift
drift 2.719784e-08
ARIMA Distribution Forecast
arima.total.forecast <- arima.lowest.forecast$mean + arima.second.forecast$mean + arima.middle.forecast$mean + arima.fourth.forecast$mean + arima.highest.forecast$mean
arima.lowest.forecast.share <- arima.lowest.forecast$mean/arima.total.forecast
arima.second.forecast.share <- arima.second.forecast$mean/arima.total.forecast
arima.middle.forecast.share <- arima.middle.forecast$mean/arima.total.forecast
arima.fourth.forecast.share <- arima.fourth.forecast$mean/arima.total.forecast
arima.highest.forecast.share <- arima.highest.forecast$mean/arima.total.forecast
autoplot(arima.lowest.forecast.share) + autolayer(arima.second.forecast.share) + autolayer(arima.middle.forecast.share) + autolayer(arima.fourth.forecast.share) + autolayer(arima.highest.forecast.share)
Holt Trend
holt.lowest.pred <- holt(tr.lowest.income, h=4)
holt.second.pred <- holt(tr.second.income, h=4)
holt.middle.pred <- holt(tr.middle.income, h=4)
holt.fourth.pred <- holt(tr.fourth.income, h=4)
holt.highest.pred <- holt(tr.highest.income, h=4)
holt.top.pred <- holt(tr.top.income, h=4)
accuracy(holt.lowest.pred, test.lowest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -12.32408 501.8952 406.0495 -0.02202577 1.643612 0.7844137 0.1795881 NA
Test set 2013.10446 2123.7603 2013.1045 5.68107557 5.681076 3.8889518 -0.1555581 2.088794
accuracy(holt.second.pred, test.second.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 26.01141 772.0958 622.5127 0.01548649 1.690866 0.9448854 0.11402772 NA
Test set 2005.22172 2184.2038 2005.2217 4.12498532 4.124985 3.0436401 0.08714682 1.919919
accuracy(holt.middle.pred, test.middle.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 23.0981 977.0469 766.3832 0.02440584 1.437048 0.9832841 0.07965205 NA
Test set 2689.8084 2889.0496 2689.8084 4.02327361 4.023274 3.4510750 0.03608074 2.001999
accuracy(holt.fourth.pred, test.fourth.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 7.510388 1220.524 999.1519 0.001063871 1.364005 0.9034885 0.1221030 NA
Test set 2049.719871 2238.851 2049.7199 2.192779432 2.192779 1.8534701 -0.2573818 1.300647
accuracy(holt.highest.pred, test.highest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -1248.955 9447.836 7401.222 -0.8913465 4.484089 0.8798655 0.09106555 NA
Test set 7593.992 8100.949 7593.992 3.4229277 3.422928 0.9027822 -0.75824994 1.088374
accuracy(holt.top.pred, test.top.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -17824.56 140435.66 102952.43 -3.273117 11.821547 0.9041179 -0.008082532 NA
Test set 75479.75 84812.76 75479.75 5.920543 5.920543 0.6628555 -0.538230986 0.9612686
Holt Damped trend
holt.lowest.damp.pred <- holt(tr.lowest.income, damped=TRUE, h=4)
holt.second.damp.pred <- holt(tr.second.income, damped=TRUE, h=4)
holt.middle.damp.pred <- holt(tr.middle.income, damped=TRUE, h=4)
holt.fourth.damp.pred <- holt(tr.fourth.income, damped=TRUE, h=4)
holt.highest.damp.pred <- holt(tr.highest.income, damped=TRUE, h=4)
holt.top.damp.pred <- holt(tr.top.income, damped=TRUE, h=4)
accuracy(holt.lowest.damp.pred, test.lowest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 83.51819 493.0921 399.1551 0.3405908 1.599664 0.771095 0.03053898 NA
Test set 2300.82791 2431.7834 2300.8279 6.4909182 6.490918 4.444781 -0.07100652 2.389076
accuracy(holt.second.damp.pred, test.second.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 163.3489 771.4854 589.1744 0.4644494 1.585256 0.8942825 -0.0003128137 NA
Test set 2912.8488 3163.4926 2912.8488 5.9927338 5.992734 4.4212884 0.1580220240 2.766222
accuracy(holt.middle.damp.pred, test.middle.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 128.214 988.2337 764.2017 0.2049618 1.435785 0.9804852 0.09440196 NA
Test set 3258.861 3506.4420 3258.8611 4.8734970 4.873497 4.1811803 0.09036667 2.42528
accuracy(holt.fourth.damp.pred, test.fourth.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 167.2705 1235.066 993.0851 0.1936469 1.361649 0.8980024 0.13997329 NA
Test set 3022.2565 3279.815 3022.2565 3.2328751 3.232875 2.7328915 -0.06228371 1.894919
accuracy(holt.highest.damp.pred, test.highest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 29.49519 9380.064 7722.461 -0.1233526 4.665533 0.9180547 0.08719802 NA
Test set 13073.75856 13720.491 13073.759 5.8818537 5.881854 1.5542230 -0.40221205 1.898821
accuracy(holt.top.damp.pred, test.top.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -292.2801 139060.1 106702.4 -1.163883 12.18184 0.9370495 0.001575588 NA
Test set 142149.6391 149177.4 142149.6 11.145223 11.14522 1.2483438 -0.587766313 1.817446
holt.lowest <- holt(lowest.income.ts, damped=TRUE, h=6)
holt.second <- holt(second.income.ts, damped=TRUE, h=6)
holt.middle <- holt(middle.income.ts, damped=TRUE, h=6)
holt.fourth <- holt(fourth.income.ts, damped=TRUE, h=6)
holt.highest <- holt(highest.income.ts, damped=TRUE, h=6)
holt.top <- holt(top.income.ts, damped=TRUE, h=6)
autoplot(income.ts) + autolayer(holt.lowest) + autolayer(holt.second) + autolayer(holt.middle) + autolayer(holt.fourth) + autolayer(holt.highest) + autolayer(holt.top)
autoplot(income.quintile.ts) + autolayer(holt.lowest) + autolayer(holt.second) + autolayer(holt.middle) + autolayer(holt.fourth) + autolayer(holt.highest)
holt.lowest$model
Damped Holt's method
Call:
holt(y = lowest.income.ts, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9999
beta = 0.6115
phi = 0.8
Initial states:
l = 18919.9259
b = 370.4366
sigma: 594.5683
AIC AICc BIC
647.7792 650.4042 657.7606
holt.second$model
Damped Holt's method
Call:
holt(y = second.income.ts, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9999
beta = 0.0901
phi = 0.98
Initial states:
l = 31813.2089
b = 300.3689
sigma: 856.6333
AIC AICc BIC
676.2628 678.8878 686.2442
holt.middle$model
Damped Holt's method
Call:
holt(y = middle.income.ts, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9999
beta = 0.0414
phi = 0.98
Initial states:
l = 44999.873
b = 517.323
sigma: 1094.925
AIC AICc BIC
695.4065 698.0315 705.3879
holt.fourth$model
Damped Holt's method
Call:
holt(y = fourth.income.ts, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9999
beta = 0.0282
phi = 0.98
Initial states:
l = 58580.0094
b = 971.4121
sigma: 1343.881
AIC AICc BIC
711.3868 714.0118 721.3681
holt.highest$model
Damped Holt's method
Call:
holt(y = highest.income.ts, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9999
beta = 1e-04
phi = 0.98
Initial states:
l = 98159.7306
b = 4165.0621
sigma: 9834.965
AIC AICc BIC
866.6366 869.2616 876.6180
holt.top$model
Damped Holt's method
Call:
holt(y = top.income.ts, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.8913
beta = 1e-04
phi = 0.98
Initial states:
l = 300891.4257
b = 39277.1803
sigma: 144041.6
AIC AICc BIC
1076.001 1078.626 1085.982
Holt BoxCox
holt.lowest.pred.bc <- holt(tr.lowest.income.bc, h=4)
holt.second.pred.bc <- holt(tr.lowest.income.bc, h=4)
holt.middle.pred.bc <- holt(tr.lowest.income.bc, h=4)
holt.fourth.pred.bc <- holt(tr.lowest.income.bc, h=4)
holt.highest.pred.bc <- holt(tr.lowest.income.bc, h=4)
holt.top.pred.bc <- holt(tr.lowest.income.bc, h=4)
accuracy(holt.lowest.pred.bc, test.lowest.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0003309153 0.006978332 0.005448753 0.005482289 0.08578308 0.7264745 0.0853722 NA
Test set 0.0218306926 0.022968176 0.021830693 0.336805453 0.33680545 2.9106548 -0.1340803 2.298368
accuracy(holt.second.pred.bc, test.second.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0003309153 0.006978332 0.005448753 5.482289e-03 0.08578308 0.7264745 0.0853722 NA
Test set -5.3053409207 5.305341503 5.305340921 -4.607018e+02 460.70178256 707.3534727 0.2500358 2276525
accuracy(holt.middle.pred.bc, test.middle.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0003309153 0.006978332 0.005448753 5.482289e-03 0.08578308 0.7264745 0.0853722 NA
Test set -5.4568583918 5.456858959 5.456858392 -5.456527e+02 545.65274707 727.5550791 0.2500111 14534716
accuracy(holt.fourth.pred.bc, test.fourth.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0003309153 0.006978332 0.005448753 5.482289e-03 0.08578308 0.7264745 0.0853722 NA
Test set -5.4568540737 5.456854640 5.456854074 -5.456500e+02 545.64995928 727.5545034 0.2500103 23730445
accuracy(holt.highest.pred.bc, test.highest.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0003309153 0.006978332 0.005448753 5.482289e-03 0.08578308 0.7264745 0.0853722 NA
Test set -4.2501911122 4.250191787 4.250191112 -1.926015e+02 192.60149708 566.6718667 0.2591839 30828.02
accuracy(holt.top.pred.bc, test.top.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.0003309153 0.006978332 0.005448753 5.482289e-03 0.08578308 0.7264745 0.0853722 NA
Test set -4.4088162627 4.408816948 4.408816263 -2.152634e+02 215.26342726 587.8211298 0.2556618 60127.98
holt.lowest.bc <- holt(lowest.ts.bc, damped=TRUE, h=6)
holt.second.bc <- holt(second.ts.bc, damped=TRUE, h=6)
holt.middle.bc <- holt(middle.ts.bc, damped=TRUE, h=6)
holt.fourth.bc <- holt(fourth.ts.bc, damped=TRUE, h=6)
holt.highest.bc <- holt(highest.ts.bc, damped=TRUE, h=6)
holt.top.bc <- holt(top.ts.bc, damped=TRUE, h=6)
holt.lowest.bc$model
Damped Holt's method
Call:
holt(y = lowest.ts.bc, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9999
beta = 0.3546
phi = 0.936
Initial states:
l = 6.2669
b = -0.0041
sigma: 0.0077
AIC AICc BIC
-229.8235 -227.1985 -219.8421
holt.second.bc$model
Damped Holt's method
Call:
holt(y = second.ts.bc, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.8119
beta = 1e-04
phi = 0.9779
Initial states:
l = 1.1515
b = 0
sigma: 0
AIC AICc BIC
-853.9775 -851.3525 -843.9962
holt.middle.bc$model
Damped Holt's method
Call:
holt(y = middle.ts.bc, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9965
beta = 0.0778
phi = 0.9781
Initial states:
l = 1.0001
b = 0
sigma: 0
AIC AICc BIC
-1006.9280 -1004.3030 -996.9467
holt.fourth.bc$model
Damped Holt's method
Call:
holt(y = fourth.ts.bc, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9923
beta = 7e-04
phi = 0.9782
Initial states:
l = 1.0001
b = 0
sigma: 0
AIC AICc BIC
-1040.381 -1037.756 -1030.399
holt.highest.bc$model
Damped Holt's method
Call:
holt(y = highest.ts.bc, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.9954
beta = 1e-04
phi = 0.9769
Initial states:
l = 2.2034
b = 1e-04
sigma: 2e-04
AIC AICc BIC
-500.2277 -497.6027 -490.2463
holt.top.bc$model
Damped Holt's method
Call:
holt(y = top.ts.bc, h = 6, damped = TRUE)
Smoothing parameters:
alpha = 0.8066
beta = 1e-04
phi = 0.9771
Initial states:
l = 2.0462
b = 1e-04
sigma: 2e-04
AIC AICc BIC
-519.5355 -516.9105 -509.5542
Holt Distribution Forecast
holt.total <- holt.lowest$mean + holt.second$mean + holt.middle$mean + holt.fourth$mean + holt.highest$mean
holt.lowest.share <- holt.lowest$mean/holt.total
holt.second.share <- holt.second$mean/holt.total
holt.middle.share <- holt.middle$mean/holt.total
holt.fourth.share <- holt.fourth$mean/holt.total
holt.highest.share <- holt.highest$mean/holt.total
autoplot(holt.lowest.share) + autolayer(holt.second.share) + autolayer(holt.middle.share) + autolayer(holt.fourth.share) + autolayer(holt.highest.share)
Neural Net Prediction
nnar.lowest.tr <- nnetar(tr.lowest.income, lambda=0, boostrap=TRUE)
nnar.second.tr <- nnetar(tr.second.income, lambda=0, boostrap=TRUE)
nnar.middle.tr <- nnetar(tr.middle.income, lambda=0, boostrap=TRUE)
nnar.fourth.tr <- nnetar(tr.fourth.income, lambda=0, boostrap=TRUE)
nnar.highest.tr <- nnetar(tr.highest.income, lambda=0, boostrap=TRUE)
nnar.top.tr <- nnetar(tr.top.income, lambda=0, boostrap=TRUE)
nnar.lowest.pred <- forecast(nnar.lowest.tr, h=4)
nnar.second.pred <- forecast(nnar.second.tr, h=4)
nnar.middle.pred <- forecast(nnar.middle.tr, h=4)
nnar.fourth.pred <- forecast(nnar.fourth.tr, h=4)
nnar.highest.pred <- forecast(nnar.highest.tr, h=4)
nnar.top.pred <- forecast(nnar.top.tr, h=4)
accuracy(nnar.lowest.pred$mean, test.lowest.income)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 2462.223 2603.78 2462.223 6.945758 6.945758 -0.03887182 2.556189
accuracy(nnar.second.pred$mean, test.second.income)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 2783.702 3031.126 2783.702 5.725936 5.725936 0.1557105 2.652865
accuracy(nnar.middle.pred$mean, test.middle.income)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 3502.677 3791.374 3502.677 5.235919 5.235919 0.1179643 2.623366
accuracy(nnar.fourth.pred$mean, test.fourth.income)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 4023.068 4366.748 4023.068 4.303146 4.303146 0.04015834 2.515797
accuracy(nnar.highest.pred$mean, test.highest.income)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 15890.22 16793.13 15890.22 7.142739 7.142739 -0.2020195 2.345393
accuracy(nnar.top.pred$mean, test.top.income)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 161698.9 168829.4 161698.9 12.68541 12.68541 -0.5196279 2.058787
nnar.lowest <- nnetar(lowest.income.ts, lambda=0)
nnar.second <- nnetar(second.income.ts, lambda=0)
nnar.middle <- nnetar(middle.income.ts, lambda=0)
nnar.fourth <- nnetar(fourth.income.ts, lambda=0)
nnar.highest <- nnetar(highest.income.ts, lambda=0)
nnar.top <- nnetar(top.income.ts, lambda=0)
nnar.lowest.forecast <- forecast(nnar.lowest, h=6)
nnar.second.forecast <- forecast(nnar.second, h=6)
nnar.middle.forecast <- forecast(nnar.middle, h=6)
nnar.fourth.forecast <- forecast(nnar.fourth, h=6)
nnar.highest.forecast <- forecast(nnar.highest, h=6)
nnar.top.forecast <- forecast(nnar.top, h=6)
autoplot(income.ts) + autolayer(nnar.lowest.forecast) + autolayer(nnar.second.forecast) + autolayer(nnar.middle.forecast) + autolayer(nnar.fourth.forecast) + autolayer(nnar.highest.forecast) + autolayer(nnar.top.forecast)
autoplot(income.quintile.ts) + autolayer(nnar.lowest.forecast) + autolayer(nnar.second.forecast) + autolayer(nnar.middle.forecast) + autolayer(nnar.fourth.forecast) + autolayer(nnar.highest.forecast)
nnar.lowest$model
Average of 20 networks, each of which is
a 1-1-1 network with 4 weights
options were - linear output units
nnar.second$model
Average of 20 networks, each of which is
a 1-1-1 network with 4 weights
options were - linear output units
nnar.middle$model
Average of 20 networks, each of which is
a 1-1-1 network with 4 weights
options were - linear output units
nnar.fourth$model
Average of 20 networks, each of which is
a 1-1-1 network with 4 weights
options were - linear output units
nnar.highest$model
Average of 20 networks, each of which is
a 1-1-1 network with 4 weights
options were - linear output units
nnar.top$model
Average of 20 networks, each of which is
a 1-1-1 network with 4 weights
options were - linear output units
ETS Model Prediction
ets.lowest.tr <- ets(tr.lowest.income, model = "ZZZ", damped = TRUE)
ets.second.tr <- ets(tr.second.income, model = "ZZZ", damped = TRUE)
ets.middle.tr <- ets(tr.middle.income, model = "ZZZ", damped = TRUE)
ets.fourth.tr <- ets(tr.fourth.income, model = "ZZZ", damped = TRUE)
ets.highest.tr <- ets(tr.highest.income, model = "ZZZ", damped = TRUE)
ets.top.tr <- ets(tr.top.income, model = "ZZZ", damped = TRUE)
ets.lowest.pred <- forecast(ets.lowest.tr, h=4)
ets.second.pred <- forecast(ets.second.tr, h=4)
ets.middle.pred <- forecast(ets.middle.tr, h=4)
ets.fourth.pred <- forecast(ets.fourth.tr, h=4)
ets.highest.pred <- forecast(ets.highest.tr, h=4)
ets.top.pred <- forecast(ets.top.tr, h=4)
accuracy(ets.lowest.pred, test.lowest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 67.69353 493.0479 395.2248 0.2834461 1.584492 0.7635025 0.02005544 NA
Test set 2273.62567 2402.3208 2273.6257 6.4143967 6.414397 4.3922314 -0.07855079 2.360285
accuracy(ets.second.pred, test.second.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 74.66658 783.9019 630.9533 0.1992818 1.716390 0.9576969 0.01805541 NA
Test set 2561.35885 2786.2351 2561.3589 5.2689875 5.268987 3.8877768 0.13997401 2.440644
accuracy(ets.middle.pred, test.middle.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 128.2127 988.2339 764.2019 0.204960 1.435785 0.9804855 0.09440160 NA
Test set 3258.8539 3506.4342 3258.8539 4.873486 4.873486 4.1811711 0.09036617 2.425274
accuracy(ets.fourth.pred, test.fourth.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 167.1984 1235.070 993.1037 0.193567 1.361671 0.8980193 0.13995767 NA
Test set 3021.8692 3279.399 3021.8692 3.232461 3.232461 2.7325413 -0.06233516 1.894682
accuracy(ets.highest.pred, test.highest.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 11.99119 9369.749 7706.573 -0.1387739 4.651156 0.916166 0.08861028 NA
Test set 13066.12594 13712.441 13066.126 5.8784281 5.878428 1.553316 -0.40275691 1.897666
accuracy(ets.top.pred, test.top.income)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -5172.071 139396.3 105822.4 -1.786354 12.101286 0.9293220 0.04859916 NA
Test set 107175.498 114933.6 107175.5 8.385228 8.385228 0.9412044 -0.66724907 1.399752
ets.lowest <- ets(lowest.income.ts, model = "ZZZ", damped = TRUE)
ets.second <- ets(second.income.ts, model = "ZZZ", damped = TRUE)
ets.middle <- ets(middle.income.ts, model = "ZZZ", damped = TRUE)
ets.fourth <- ets(fourth.income.ts, model = "ZZZ", damped = TRUE)
ets.highest <- ets(highest.income.ts, model = "ZZZ", damped = TRUE)
ets.top <- ets(top.income.ts, model = "ZZZ", damped = TRUE)
ets.lowest.forecast <- forecast(ets.lowest, h=6)
ets.second.forecast <- forecast(ets.second, h=6)
ets.middle.forecast <- forecast(ets.middle, h=6)
ets.fourth.forecast <- forecast(ets.fourth, h=6)
ets.highest.forecast <- forecast(ets.highest, h=6)
ets.top.forecast <- forecast(ets.top, h=6)
autoplot(income.ts) + autolayer(ets.lowest.forecast) + autolayer(ets.second.forecast) + autolayer(ets.middle.forecast) + autolayer(ets.fourth.forecast) + autolayer(ets.highest.forecast) + autolayer(ets.top.forecast)
autoplot(income.quintile.ts) + autolayer(ets.lowest.forecast) + autolayer(ets.second.forecast) + autolayer(ets.middle.forecast) + autolayer(ets.fourth.forecast) + autolayer(ets.highest.forecast)
ETS BoxCox Model Prediction
ets.lowest.tr.bc <- ets(tr.lowest.income.bc, model = "ZZZ", damped = TRUE)
ets.second.tr.bc <- ets(tr.second.income.bc, model = "ZZZ", damped = TRUE)
ets.middle.tr.bc <- ets(tr.middle.income.bc, model = "ZZZ", damped = TRUE)
ets.fourth.tr.bc <- ets(tr.fourth.income.bc, model = "ZZZ", damped = TRUE)
ets.highest.tr.bc <- ets(tr.highest.income.bc, model = "ZZZ", damped = TRUE)
ets.top.tr.bc <- ets(tr.top.income.bc, model = "ZZZ", damped = TRUE)
ets.lowest.pred.bc <- forecast(ets.lowest.tr.bc, h=4)
ets.second.pred.bc <- forecast(ets.second.tr.bc, h=4)
ets.middle.pred.bc <- forecast(ets.middle.tr.bc, h=4)
ets.fourth.pred.bc <- forecast(ets.fourth.tr.bc, h=4)
ets.highest.pred.bc <- forecast(ets.highest.tr.bc, h=4)
ets.top.pred.bc <- forecast(ets.top.tr.bc, h=4)
accuracy(ets.lowest.pred.bc, test.lowest.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.001434809 0.006837343 0.005397466 0.02275123 0.08497752 0.7196364 0.01515251 NA
Test set 0.023360297 0.024619885 0.023360297 0.36039952 0.36039952 3.1145948 -0.08478199 2.466026
accuracy(ets.second.pred.bc, test.second.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 2.675884e-07 2.311370e-06 1.729327e-06 0.0000232377 0.0001501737 0.8857415 -0.01774253 NA
Test set 5.104369e-06 5.523659e-06 5.104369e-06 0.0004432496 0.0004432496 2.6143990 0.13950998 2.706167
accuracy(ets.middle.pred.bc, test.middle.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 2.833987e-08 3.439899e-07 2.296451e-07 2.833839e-06 2.296321e-05 0.8395635 0.01333967 NA
Test set 8.958193e-07 9.600636e-07 8.958193e-07 8.957648e-05 8.957648e-05 3.2750405 0.11187922 2.906027
accuracy(ets.fourth.pred.bc, test.fourth.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 5.917907e-09 2.277304e-07 1.697673e-07 5.917476e-07 1.697568e-05 0.8270893 0.1269625 NA
Test set 2.809025e-07 3.044669e-07 2.809025e-07 2.808842e-05 2.808842e-05 1.3685286 -0.1954513 1.513859
accuracy(ets.highest.pred.bc, test.highest.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -5.764992e-07 0.0002342864 0.0001977486 -2.669651e-05 0.008966746 0.8870168 0.0007019257 NA
Test set 2.188484e-04 0.0002279673 0.0002188484 9.917201e-03 0.009917201 0.9816620 -0.5262543553 1.794629
accuracy(ets.top.pred.bc, test.top.income.bc)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set -5.659374e-06 1.845866e-04 1.513110e-04 -0.0002766285 0.007389997 0.8948504 -0.01561312 NA
Test set 3.964055e-05 5.132096e-05 4.757256e-05 0.0019354579 0.002322751 0.2813432 -0.51602528 0.7041449
ets.lowest.bc <- ets(lowest.ts.bc, model = "ZZZ", damped = TRUE)
ets.second.bc <- ets(second.ts.bc, model = "ZZZ", damped = TRUE)
ets.middle.bc <- ets(middle.ts.bc, model = "ZZZ", damped = TRUE)
ets.fourth.bc <- ets(fourth.ts.bc, model = "ZZZ", damped = TRUE)
ets.highest.bc <- ets(highest.ts.bc, model = "ZZZ", damped = TRUE)
ets.top.bc <- ets(top.ts.bc, model = "ZZZ", damped = TRUE)
ets.lowest.forecast.bc <- forecast(ets.lowest.bc, h=6)
ets.second.forecast.bc <- forecast(ets.second.bc, h=6)
ets.middle.forecast.bc <- forecast(ets.middle.bc, h=6)
ets.fourth.forecast.bc <- forecast(ets.fourth.bc, h=6)
ets.highest.forecast.bc <- forecast(ets.highest.bc, h=6)
ets.top.forecast.bc <- forecast(ets.top.bc, h=6)
accuracy(ets.lowest.bc)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.00156607 0.00720561 0.005766345 0.02475689 0.09050821 0.755832 0.07646825
accuracy(ets.second.bc)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 7.232454e-07 2.22369e-06 1.631265e-06 6.280604e-05 0.0001416578 0.837977 -0.1018189
accuracy(ets.middle.bc)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 4.893851e-08 3.395959e-07 2.394733e-07 4.893567e-06 2.394596e-05 0.856803 0.1096092
accuracy(ets.fourth.bc)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 8.941874e-09 2.208425e-07 1.652104e-07 8.941233e-07 1.652001e-05 0.813465 0.1152623
accuracy(ets.highest.bc)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1.994842e-06 0.000225105 0.0001878442 8.980116e-05 0.008517329 0.8756187 -0.001696444
accuracy(ets.top.bc)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -4.643829e-06 0.000175768 0.0001410983 -0.0002270292 0.006891139 0.8808617 -0.01285064
After looking at the accuracy of all the models, we determine the ARIMA model to be the best at forecasting the bottom two distributions and the linear holt model to bet the best at forecasting the rest. While the ETS model was better at capturing the existing data, the ARIMA model had lower RMSE scores regarding the test set. As the purpose of this project is to forecast distribution rather than understand previous income distributions, we will use the ARIMA model. Going off of this, the Holt model had better test set RMSE than the ARIMA model from the middle quintile onward. It should be noted that the non-damped linear Holt method has the tendency to over-cast. That said, it should be no surprise to find that the increase in trend for the highest quintile or the top 1% overperforms any model. It is also interesting to note that the ARIMA model works best on the two quintiles with the most obvious and steady trend, where the forecast for time T is usually highly correlated with the observation at time T-1 as the ARIMA model does, while the higher income quintiles are more accurately forecast using an underlying trend shown by the Linear Holt model. (ARIMA was used across the board in the first draft, so downloaded data names reflect this. I will attempt to fix this going forward)
Full Data Set
arima_income_data <- read_excel("arima_income_data.xlsx")
Error in read_excel("arima_income_data.xlsx") :
could not find function "read_excel"
final.lowest.income <- arima_income_data$`Lowest Quantile`
final.second.income <- arima_income_data$`Second Quantile`
final.middle.income <- arima_income_data$`Middle Quantile`
final.fourth.income <- arima_income_data$`Fourth Quantile`
final.highest.income <- arima_income_data$`Highest Quantile`
final.80.income <- arima_income_data$`0.8`
final.90.income <- arima_income_data$`0.9`
final.96.income <- arima_income_data$`0.96`
final.top.income <- arima_income_data$`Top Quantile`
final.lowest.income.ts <- ts(final.lowest.income, start=1979, frequency=1)
final.second.income.ts <- ts(final.second.income, start=1979, frequency=1)
final.middle.income.ts <- ts(final.middle.income, start=1979, frequency=1)
final.fourth.income.ts <- ts(final.fourth.income, start=1979, frequency=1)
final.highest.income.ts <- ts(final.highest.income, start=1979, frequency=1)
final.80.income.ts <- ts(final.80.income, start=1979, frequency=1)
final.90.income.ts <- ts(final.90.income, start=1979, frequency=1)
final.96.income.ts <- ts(final.96.income, start=1979, frequency=1)
final.top.income.ts <- ts(final.top.income, start=1979, frequency=1)
corr.arima.income <- cor(arima_income_data)
corrplot(corr.arima.income, method=c("number"))
income.final.ts <- ts(arima_income_data, start=1979, frequency=1)
autoplot(income.final.ts)
Best Model
final.lowest <- auto.arima(final.lowest.income.ts, seasonal = FALSE)
final.second <- auto.arima(final.second.income.ts, seasonal = FALSE)
final.middle <- auto.arima(final.middle.income.ts, seasonal = FALSE)
final.fourth <- auto.arima(final.fourth.income.ts, seasonal = FALSE)
final.highest <- auto.arima(final.highest.income.ts, seasonal = FALSE)
final.80 <- auto.arima(final.80.income.ts, seasonal = FALSE)
final.90 <- auto.arima(final.90.income.ts, seasonal = FALSE)
final.96 <- auto.arima(final.96.income.ts, seasonal = FALSE)
final.top <- auto.arima(final.top.income.ts, seasonal = FALSE)
final.lowest.forecast <- forecast(final.lowest, h=6)
final.second.forecast <- forecast(final.second, h=6)
final.middle.forecast <- forecast(final.middle, h=6)
final.fourth.forecast <- forecast(final.fourth, h=6)
final.highest.forecast <- forecast(final.highest, h=6)
final.80.forecast <- forecast(final.80, h=6)
final.90.forecast <- forecast(final.90, h=6)
final.96.forecast <- forecast(final.96, h=6)
final.top.forecast <- forecast(final.top, h=6)
autoplot(income.final.ts, main = "Best Quantile Forecast") + autolayer(final.lowest.forecast) + autolayer(final.second.forecast) + autolayer(final.middle.forecast) + autolayer(final.fourth.forecast) + autolayer(final.80.forecast) + autolayer(final.90.forecast) + autolayer(final.96.forecast) + autolayer(final.top.forecast)
autoplot(income.quintile.ts, main = "Best Quintile Forecast") + autolayer(final.lowest.forecast) + autolayer(final.second.forecast) + autolayer(final.middle.forecast) + autolayer(final.fourth.forecast) + autolayer(final.highest.forecast)
NA
NA
ARIMA Distribution Forecast
final.total.forecast <- final.lowest.forecast$mean + final.second.forecast$mean + final.middle.forecast$mean + final.fourth.forecast$mean + final.80.forecast$mean + final.90.forecast$mean + final.96.forecast$mean + final.top.forecast$mean
final.lowest.forecast.share <- final.lowest.forecast$mean/final.total.forecast
final.second.forecast.share <- final.second.forecast$mean/final.total.forecast
final.middle.forecast.share <- final.middle.forecast$mean/final.total.forecast
final.fourth.forecast.share <- final.fourth.forecast$mean/final.total.forecast
final.80.forecast.share <- final.80.forecast$mean/final.total.forecast
final.90.forecast.share <- final.90.forecast$mean/final.total.forecast
final.96.forecast.share <- final.96.forecast$mean/final.total.forecast
final.top.forecast.share <- final.top.forecast$mean/final.total.forecast
autoplot(final.lowest.forecast.share) + autolayer(final.second.forecast.share) + autolayer(final.middle.forecast.share) + autolayer(final.fourth.forecast.share) + autolayer(final.80.forecast.share) + autolayer(final.90.forecast.share) + autolayer(final.96.forecast.share) + autolayer(final.top.forecast.share)
final.total.forecast.quintile <- final.lowest.forecast$mean + final.second.forecast$mean + final.middle.forecast$mean + final.fourth.forecast$mean + final.highest.forecast$mean
final.lowest.forecast.share.quintile <- final.lowest.forecast$mean/final.total.forecast.quintile
final.second.forecast.share.quintile <- final.second.forecast$mean/final.total.forecast.quintile
final.middle.forecast.share.quintile <- final.middle.forecast$mean/final.total.forecast.quintile
final.fourth.forecast.share.quintile <- final.fourth.forecast$mean/final.total.forecast.quintile
final.highest.forecast.share.quintile <- final.highest.forecast$mean/final.total.forecast.quintile
autoplot(final.lowest.forecast.share.quintile, main = "Quintile Share Forecast") + autolayer(final.second.forecast.share.quintile) + autolayer(final.middle.forecast.share.quintile) + autolayer(final.fourth.forecast.share.quintile) + autolayer(final.highest.forecast.share.quintile)
With the most accurate forecasts of income in the future, we see that the highest quintile will only capture a larger and larger share of total GDP in the US, with all other quintiles decreasing over time. This trend is further worrying given that the decomposed highest quintile forecast shows that all of this positive relative trend is associated with the top 1% of incomes. We even see the top 1% outclassing the gains in every other quantile including that which makes up the rest of the highest quantile such that even those will decrease as a share of total income over time.
holt.80 <- forecast(final.80.income.ts, h=6)
holt.90 <- forecast(final.90.income.ts, h=6)
holt.96 <- forecast(final.96.income.ts, h=6)
holt.total <- final.lowest.forecast$mean + final.second.forecast$mean + final.middle.forecast$mean + holt.fourth$mean + holt.80$mean + holt.90$mean + holt.96$mean + holt.top$mean
holt.lowest.share <- final.lowest.forecast$mean/holt.total
holt.second.share <- final.second.forecast$mean/holt.total
holt.middle.share <- final.middle.forecast$mean/holt.total
holt.fourth.share <- holt.fourth$mean/holt.total
holt.80.share <- holt.80$mean/holt.total
holt.90.share <- holt.90$mean/holt.total
holt.96.share <- holt.96$mean/holt.total
holt.top.share <- holt.top$mean/holt.total
autoplot(holt.lowest.share) + autolayer(holt.second.share) + autolayer(holt.middle.share) + autolayer(holt.fourth.share) + autolayer(holt.80.share) + autolayer(holt.90.share) + autolayer(holt.96.share) + autolayer(holt.top.share)
Using the linear Holt model, which was the best at forecasting over the short-term horizon for the fourth quantile and above, we see that while the increase in the share of total income is not felt by the top 1%, the middle quintiles and lower are all losing total GDP share. We see that decomposing the top 1% alters the share chart wildly.
final.total <- final.lowest.forecast$mean + final.second.forecast$mean + final.middle.forecast$mean + holt.fourth$mean + holt.highest$mean
holt.lowest.share <- final.lowest.forecast$mean/final.total
holt.second.share <- final.second.forecast$mean/final.total
holt.middle.share <- final.middle.forecast$mean/final.total
holt.fourth.share <- holt.fourth$mean/final.total
holt.highest.share <- holt.highest$mean/final.total
autoplot(holt.lowest.share) + autolayer(holt.second.share) + autolayer(holt.middle.share) + autolayer(holt.fourth.share) + autolayer(holt.highest.share)
UBI Analysis
Data Preparation
ubi.add <- c(3554.18, 3554.18, 4450.07, 4450.07, 4875.98, 4875.98, 5267.62, 5267.62, 5561.36, 5561.36, 6070.50, 6070.50, 6667.75, 6667.75, 7074.09, 7074.09, 7460.84, 7460.84, 7857.38, 7857.38, 8156.01, 8156.01, 8665.14, 8665.14, 9007.83, 9007.83, 9561.03, 9561.03, 10150.55, 10150.55, 10502.79, 10502.79, 11012.03, 11012.03, 11404.55, 11404.55, 11603.31, 11603.31, 12000)
ubi.income <- income_data + ubi.add
This is based off of Democratic 2020 Candidate Andrew Yang’s proposed $1,000 a month proposal for a Universal Basic Income.
ubi.lowest.income <- ubi.income$`Lowest Quintile`
ubi.second.income <- ubi.income$`Second Quintile`
ubi.middle.income <- ubi.income$`Middle Quintile`
ubi.fourth.income <- ubi.income$`Fourth Quintile`
ubi.highest.income <- ubi.income$`Highest Quintile`
ubi.top.income <- ubi.income$`Top 1%`
ubi.income.ts <- ts(ubi.income, start=1979, end=2017, frequency = 1)
ubi.income.quintile.ts <- ts(ubi.income[1:5], start=1979, end=2017, frequency = 1)
lowest.ts.ubi <- ts(ubi.lowest.income, start=1979, frequency=1)
second.ts.ubi <- ts(ubi.second.income, start=1979, frequency=1)
middle.ts.ubi <- ts(ubi.middle.income, start=1979, frequency=1)
fourth.ts.ubi <- ts(ubi.fourth.income, start=1979, frequency=1)
highest.ts.ubi <- ts(ubi.highest.income, start=1979, frequency=1)
top.ts.ubi <- ts(ubi.top.income, start=1979, frequency=1)
ubi.lowest <- auto.arima(lowest.ts.ubi, seasonal = FALSE)
ubi.second <- auto.arima(second.ts.ubi, seasonal = FALSE)
ubi.middle <- auto.arima(middle.ts.ubi, seasonal = FALSE)
ubi.fourth <- auto.arima(fourth.ts.ubi, seasonal = FALSE)
ubi.highest <- auto.arima(highest.ts.ubi, seasonal = FALSE)
ubi.top <- auto.arima(top.ts.ubi, seasonal = FALSE)
ubi.lowest.forecast <- forecast(ubi.lowest, h=6)
ubi.second.forecast <- forecast(ubi.second, h=6)
ubi.middle.forecast <- forecast(ubi.middle, h=6)
ubi.fourth.forecast <- forecast(ubi.fourth, h=6)
ubi.highest.forecast <- forecast(ubi.highest, h=6)
ubi.top.forecast <- forecast(ubi.top, h=6)
autoplot(ubi.income.quintile.ts) + autolayer(ubi.lowest.forecast) + autolayer(ubi.second.forecast) + autolayer(ubi.middle.forecast) + autolayer(ubi.fourth.forecast) + autolayer(ubi.highest.forecast)
total.forecast.ubi <- ubi.lowest.forecast$mean + ubi.second.forecast$mean + ubi.middle.forecast$mean + ubi.fourth.forecast$mean + ubi.highest.forecast$mean
lowest.share <- ubi.lowest.forecast$mean/total.forecast.ubi
second.share <- ubi.second.forecast$mean/total.forecast.ubi
middle.share <- ubi.middle.forecast$mean/total.forecast.ubi
fourth.share <- ubi.fourth.forecast$mean/total.forecast.ubi
highest.share <- ubi.highest.forecast$mean/total.forecast.ubi
autoplot(lowest.share, main = "UBI Share Forecast") + autolayer(second.share) + autolayer(middle.share) + autolayer(fourth.share) + autolayer(highest.share)
We see that the gains in the highest quintile still outclass those of the other four combined. Thus, by quantitative standards, we can assume that while the UBI may address immediate shortcomings in the minimum required income to survive, it will not address income inequality over the long term, given the gains by the highest quintile.
arima.lowest.forecast.ubi.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.08906718 0.08905152 0.08903628 0.08902145 0.08900700 0.08899292
arima.second.forecast.ubi.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1136310 0.1132679 0.1129144 0.1125703 0.1122352 0.1119087
middle.forecast.ubi.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1480268 0.1472922 0.1465772 0.1458811 0.1452032 0.1445427
fourth.forecast.ubi.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1978855 0.1971670 0.1964678 0.1957870 0.1951240 0.1944781
highest.forecast.ubi.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.4513895 0.4532214 0.4550043 0.4567401 0.4584306 0.4600776
Targeted UBI Analysis
ubi.lowest.income <- ubi.income$`Lowest Quintile`
ubi.second.income <- ubi.income$`Second Quintile`
ubi.middle.income <- ubi.income$`Middle Quintile`
ubi.income.ts <- ts(ubi.income, start=1979, end=2017, frequency = 1)
ubi.income.quintile.ts <- ts(ubi.income, start=1979, end=2017, frequency = 1)
lowest.ts.ubi <- ts(ubi.lowest.income, start=1979, frequency=1)
second.ts.ubi <- ts(ubi.second.income, start=1979, frequency=1)
middle.ts.ubi <- ts(ubi.middle.income, start=1979, frequency=1)
ubi.income.target.ts <- cbind(ubi.income.quintile.ts, fourth.income.ts, highest.income.ts)
ubi.lowest <- auto.arima(lowest.ts.ubi, seasonal = FALSE)
ubi.second <- auto.arima(second.ts.ubi, seasonal = FALSE)
ubi.middle <- auto.arima(middle.ts.ubi, seasonal = FALSE)
ubi.lowest.forecast <- forecast(ubi.lowest, h=6)
ubi.second.forecast <- forecast(ubi.second, h=6)
ubi.middle.forecast <- forecast(ubi.middle, h=6)
autoplot(ubi.income.quintile.ts) + autolayer(ubi.lowest.forecast) + autolayer(ubi.second.forecast) + autolayer(ubi.middle.forecast) + autolayer(holt.fourth) + autolayer(holt.highest) + autolayer(holt.top)
total.forecast.ubi <- ubi.lowest.forecast$mean + ubi.second.forecast$mean + ubi.middle.forecast$mean + holt.fourth$mean + holt.highest$mean
lowest.share <- ubi.lowest.forecast$mean/total.forecast.ubi
second.share <- ubi.second.forecast$mean/total.forecast.ubi
middle.share <- ubi.middle.forecast$mean/total.forecast.ubi
fourth.share <- holt.fourth$mean/total.forecast.ubi
highest.share <- holt.highest$mean/total.forecast.ubi
autoplot(lowest.share, main = "UBI Share Forecast") + autolayer(second.share) + autolayer(middle.share) + autolayer(fourth.share) + autolayer(highest.share)
lowest.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.09369943 0.09415455 0.09461059 0.09506748 0.09552515 0.09598353
second.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1195408 0.1197586 0.1199837 0.1202157 0.1204544 0.1206994
middle.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1558476 0.1559747 0.1561140 0.1562649 0.1564268 0.1565993
fourth.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1841021 0.1836621 0.1832237 0.1827869 0.1823517 0.1819181
highest.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.4468101 0.4464501 0.4460680 0.4456650 0.4452419 0.4447997
A targeted UBI does in fact assist income inequality, but still at a reduced speed. The gains made by the highest quantile are still statistically significant.
We can also see what would have happened between 1979-2017 had a UBI been implemented.
total.time.ubi <- ubi.lowest.income + ubi.second.income + ubi.middle.income + fourth.income.ts + highest.income.ts
lowest.share.time <- ubi.lowest.income/total.time.ubi
second.share.time <- ubi.second.income/total.time.ubi
middle.share.time <- ubi.middle.income/total.time.ubi
fourth.share.time <- fourth.income.ts/total.time.ubi
highest.share.time <- highest.income.ts/total.time.ubi
autoplot(lowest.share.time, main = "UBI Share Forecast") + autolayer(second.share.time) + autolayer(middle.share.time) + autolayer(fourth.share.time) + autolayer(highest.share.time)
Wall Street Hypothesis
We often also hear that higher-income individuals are better able to utilize savings and capital to earn income thanks to their ability to tap into the growth in the stock market. Here, we check if the S&P 500 data can be used as an indicator for the higher income quantiles’ time series.
S_P_income <- read_excel("S&P_income.xlsx")
Error in read_excel("S&P_income.xlsx") :
could not find function "read_excel"
Data Preparation
wall.lowest.income <- S_P_income$`Lowest Quantile`
wall.second.income <- S_P_income$`Second Quantile`
wall.middle.income <- S_P_income$`Middle Quantile`
wall.fourth.income <- S_P_income$`Fourth Quantile`
wall.highest.income <- S_P_income$`Highest Quantile`
wall.top.income <- S_P_income$`Top Quantile`
wall.data <- S_P_income$`S&P`
lowest.ts.wall <- ts(wall.lowest.income, start=1979, frequency=1)
second.ts.wall <- ts(wall.second.income, start=1979, frequency=1)
middle.ts.wall <- ts(wall.middle.income, start=1979, frequency=1)
fourth.ts.wall <- ts(wall.fourth.income, start=1979, frequency=1)
highest.ts.wall <- ts(wall.highest.income, start=1979, frequency=1)
top.ts.wall <- ts(wall.top.income, start=1979, frequency=1)
wall.ts <- ts(wall.data, start=1979, frequency=1)
corr.fin.income <- cor(S_P_income)
corrplot(corr.fin.income, method=c("number"))
arima.wall.lowest <- auto.arima(lowest.ts.wall, xreg = wall.data, seasonal = FALSE)
arima.wall.second <- auto.arima(second.ts.wall, xreg = wall.data, seasonal = FALSE)
arima.wall.middle <- auto.arima(middle.ts.wall, xreg = wall.data, seasonal = FALSE)
arima.wall.fourth <- auto.arima(fourth.ts.wall, xreg = wall.data, seasonal = FALSE)
arima.wall.highest <- auto.arima(highest.ts.wall, xreg = wall.data, seasonal = FALSE)
arima.wall.top <- auto.arima(top.ts.wall, xreg = wall.data, seasonal = FALSE)
accuracy(arima.wall.lowest)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 175.0316 560.7516 461.7201 0.6081286 1.758627 0.8354935 -0.07548672
accuracy(arima.wall.second)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 217.9078 804.8531 659.767 0.4838463 1.752184 0.946081 -0.03433632
accuracy(arima.wall.middle)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 219.9965 964.7754 701.5357 0.3490868 1.280456 0.8304785 -0.101025
accuracy(arima.wall.fourth)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 302.6678 1086.335 868.7847 0.3721168 1.153087 0.7537401 0.08575985
accuracy(arima.wall.highest)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1293.82 8685.012 6684.117 0.479861 3.982206 0.7994852 -0.1074442
accuracy(arima.wall.top)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 4856.026 122223.1 89124.74 -1.900682 10.38019 0.7962992 -0.09829954
accuracy(arima.lowest)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 6.63326 507.5138 409.2322 -0.059196 1.582477 0.7405154 0.02734329
accuracy(arima.second)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.8351548 769.6023 607.8527 -0.09022095 1.607966 0.8716378 0.2094412
accuracy(arima.middle)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 17.50649 919.0583 709.7766 -0.01022993 1.291981 0.840234 -0.05041771
accuracy(arima.fourth)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1.562347 1156.54 938.4045 -0.04101257 1.237995 0.8141408 0.2263137
accuracy(arima.highest)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 2.708163 9078.275 7290.158 -0.1733283 4.262883 0.8719735 0.04509386
accuracy(arima.top)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 24909.54 137315.7 109063.4 2.26348 11.95632 0.9744442 -0.09189933
Interestingly, using historical S&P 500 data as contributing factor only helps the accuracy of the model for the fourth distribution and higher.
wall.data.arima <- auto.arima(wall.data, seasonal = FALSE)
wall.data.forecast <- forecast(wall.data.arima, h=6)
wall.data.pred <- wall.data.forecast$mean
arima.wall.fourth.forecast <- forecast(arima.wall.fourth, xreg = wall.data.pred, h=6)
arima.wall.highest.forecast <- forecast(arima.wall.highest, xreg = wall.data.pred, h=6)
arima.wall.top.forecast <- forecast(arima.wall.top, xreg = wall.data.pred, h=6)
autoplot(income.ts) + autolayer(arima.lowest.forecast) + autolayer(arima.second.forecast) + autolayer(arima.middle.forecast) + autolayer(arima.wall.fourth.forecast) + autolayer(arima.wall.highest.forecast) + autolayer(arima.wall.top.forecast)
autoplot(income.quintile.ts) + autolayer(arima.lowest.forecast) + autolayer(arima.second.forecast) + autolayer(arima.middle.forecast) + autolayer(arima.wall.fourth.forecast) + autolayer(arima.wall.highest.forecast)
Share of Total GDP after S&P Regression
arima.total.forecast.wall <- arima.lowest.forecast$mean + arima.second.forecast$mean + arima.middle.forecast$mean + arima.wall.fourth.forecast$mean + arima.wall.highest.forecast$mean
arima.lowest.forecast.wall.share <- arima.lowest.forecast$mean/arima.total.forecast.wall
arima.second.forecast.wall.share <- arima.second.forecast$mean/arima.total.forecast.wall
arima.middle.forecast.wall.share <- arima.middle.forecast$mean/arima.total.forecast.wall
arima.fourth.forecast.wall.share <- arima.wall.fourth.forecast$mean/arima.total.forecast.wall
arima.highest.forecast.wall.share <- arima.wall.highest.forecast$mean/arima.total.forecast.wall
autoplot(arima.lowest.forecast.wall.share) + autolayer(arima.second.forecast.wall.share) + autolayer(arima.middle.forecast.wall.share) + autolayer(arima.fourth.forecast.wall.share) + autolayer(arima.highest.forecast.wall.share)
UBO Hypothesis
earnings <- S_P_income$`S&P Earnings`
n.shares <- 3554.18/wall.ts[1]
n.shares
[1] 32.92737
Using Andrew Yang’s $12,000/year initial offer, we assume the equivalent amount of buying power for the 1979 and calculate earnings per year based on the number of shares bought, assuming the entire portfolio is purely in the S&P 500.
ubo.ts <- earnings*32.92737
snp.difference <- tail(ubo.ts, n=38L)
ubo <- c(3554.18, snp.difference)
Data Preparation
lowest.ts.ubo <- lowest.income.ts + ubo
second.ts.ubo <- second.income.ts + ubo
middle.ts.ubo <- middle.income.ts + ubo
ubo.income <- cbind(lowest.ts.ubo, second.ts.ubo, middle.ts.ubo, fourth.income.ts, final.80.income.ts, final.90.income.ts, final.96.income.ts, final.top.income.ts)
ubo.income.quintile <- cbind(lowest.ts.ubo, second.ts.ubo, middle.ts.ubo, fourth.income.ts, highest.income.ts)
arima.ubo.lowest <- auto.arima(lowest.ts.ubo, xreg = wall.data, seasonal = FALSE)
arima.ubo.second <- auto.arima(second.ts.ubo, xreg = wall.data, seasonal = FALSE)
arima.ubo.middle <- auto.arima(middle.ts.ubo, xreg = wall.data, seasonal = FALSE)
arima.ubo.lowest.forecast <- forecast(arima.ubo.lowest, xreg = wall.data.pred, h=6)
arima.ubo.second.forecast <- forecast(arima.ubo.second, xreg = wall.data.pred, h=6)
arima.ubo.middle.forecast <- forecast(arima.ubo.middle, xreg = wall.data.pred, h=6)
autoplot(ubo.income) + autolayer(arima.ubo.lowest.forecast) + autolayer(arima.ubo.second.forecast) + autolayer(arima.ubo.middle.forecast) + autolayer(final.fourth.forecast) + autolayer(final.80.forecast) + autolayer(final.90.forecast) + autolayer(final.96.forecast) + autolayer(final.top.forecast)
autoplot(ubo.income.quintile, main = "UBO Quantile Forecast") + autolayer(arima.ubo.lowest.forecast) + autolayer(arima.ubo.second.forecast) + autolayer(arima.ubo.middle.forecast) + autolayer(final.fourth.forecast) + autolayer(final.highest.forecast)
Summary of UBO Models
summary(arima.ubo.lowest)
Series: lowest.ts.ubo
Regression with ARIMA(0,0,0) errors
Coefficients:
intercept xreg
18072.522 11.1674
s.e. 1278.944 1.1246
sigma^2 estimated as 23103318: log likelihood=-384.94
AIC=775.89 AICc=776.57 BIC=780.88
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 2.47765e-09 4681.723 3149.849 -3.416752 12.76711 0.6796663 0.1692944
summary(arima.ubo.second)
Series: second.ts.ubo
Regression with ARIMA(0,0,0) errors
Coefficients:
intercept xreg
30315.487 11.7775
s.e. 1197.411 1.0529
sigma^2 estimated as 20251536: log likelihood=-382.38
AIC=770.75 AICc=771.44 BIC=775.74
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1.036796e-08 4383.263 2966.502 -1.188202 7.45201 0.6295592 0.08259339
summary(arima.ubo.middle)
Series: middle.ts.ubo
Regression with ARIMA(0,0,0) errors
Coefficients:
intercept xreg
45306.898 13.4287
s.e. 1171.823 1.0304
sigma^2 estimated as 19395276: log likelihood=-381.53
AIC=769.07 AICc=769.75 BIC=774.06
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1.015201e-08 4289.597 3017.454 -0.5519901 5.259434 0.6429931 0.1093753
What we can unfortunately see from the models, which has positives and negatives, is that the best model to capture the distributions exposed to the proposed UBO is an ARIMA(0, 0, 0), a white noise model with the only meaningful variable being the stock market data. The obvious negative is that any individual’s income is more closely tied to the stock market(S&P 500, for our purposes) than their own earned income. On the other hand, this was always true for the higher-income quintiles, as we see that the standard ARIMA model increases accuracy when S&P 500 data is included as a independent variable for the fourth income distribution and higher. While the means by which the higher income distributions utilize the stock market to increase wealth is beyond the scope of this paper, the correlation between market gains and the highest income distributions is noticed.
UBO Share Forecast
total.forecast.ubo <- arima.ubo.lowest.forecast$mean + arima.ubo.second.forecast$mean + arima.ubo.middle.forecast$mean + final.fourth.forecast$mean + final.highest.forecast$mean
arima.ubo.lowest.forecast.wall.share <- arima.ubo.lowest.forecast$mean/total.forecast.ubo
arima.ubo.second.forecast.wall.share <- arima.ubo.second.forecast$mean/total.forecast.ubo
arima.ubo.middle.share <- arima.ubo.middle.forecast$mean/total.forecast.ubo
final.ubo.fourth.share <- final.fourth.forecast$mean/total.forecast.ubo
final.ubo.highest.share <- final.highest.forecast$mean/total.forecast.ubo
autoplot(arima.ubo.lowest.forecast.wall.share) + autolayer(arima.ubo.second.forecast.wall.share) + autolayer(arima.ubo.middle.share) + autolayer(final.ubo.fourth.share) + autolayer(final.ubo.highest.share)
This may indicate that the gains made by the highest quintile still outperform the lower quintile’s exposure to the market. That said, the assumption is purely based on the return of the S&P, and the model may change after taking into account the increased taxes proposed by progressive aimed at the highest income distributions.
total.forecast.ubo <- arima.ubo.lowest.forecast$mean + arima.ubo.second.forecast$mean + arima.ubo.middle.forecast$mean + holt.fourth$mean + holt.highest$mean
lowest.ubo.share <- arima.ubo.lowest.forecast$mean/total.forecast.ubo
second.ubo.share <- arima.ubo.second.forecast$mean/total.forecast.ubo
middle.ubo.share <- arima.ubo.middle.forecast$mean/total.forecast.ubo
fourth.ubo.share <- holt.fourth$mean/total.forecast.ubo
highest.ubo.share <- holt.highest$mean/total.forecast.ubo
autoplot(lowest.ubo.share, main = "UBO Share Forecast") + autolayer(second.ubo.share) + autolayer(middle.ubo.share) + autolayer(fourth.ubo.share) + autolayer(highest.ubo.share)
It should be noted that depending on the model used for the highest quintile, the effect of the UBO on income mobility changes. Further, reating the top 1% as its own quantile separate from the others including the highest quintile shows that every quantile would steadily move towards hire shares of total income over time despite the large gains made by the top 1%, though the change is minimal. And again, the UBO model under consideration can be considered a minimum value, as this is the value used for immediate income substitution and is far cheaper than Andrew Yang’s proposed UBI.
lowest.ubo.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.09355706 0.09412448 0.09469011 0.09525397 0.09581610 0.09637651
second.ubo.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1202985 0.1206962 0.1210977 0.1215027 0.1219110 0.1223224
middle.ubo.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1578057 0.1580676 0.1583392 0.1586199 0.1589093 0.1592070
fourth.ubo.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1833511 0.1827875 0.1822283 0.1816734 0.1811227 0.1805759
highest.ubo.share
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.4449876 0.4443242 0.4436447 0.4429501 0.4422409 0.4415181
The targeted UBO does a comparable job of shifting income inequality as the targeted UBO does, but I show that it costs a fraction of the price of the UBI.
We can also view the change in total share over the time period of 1979-2017.
ubo.time.total <- lowest.ts.ubo + second.ts.ubo + middle.ts.ubo + fourth.income.ts + highest.income.ts
lowest.ubo.time.share <- lowest.ts.ubo/ubo.time.total
second.ubo.time.share <- second.ts.ubo/ubo.time.total
middle.ubo.time.share <- middle.ts.ubo/ubo.time.total
fourth.ubo.time.share <- fourth.income.ts/ubo.time.total
highest.ubo.time.share <- highest.income.ts/ubo.time.total
autoplot(lowest.ubo.time.share, main = "UBO Share Forecast Over 1979-2017") + autolayer(second.ubo.time.share) + autolayer(middle.ubo.time.share) + autolayer(fourth.ubo.time.share) + autolayer(fourth.ubo.time.share) + autolayer(highest.ubo.time.share)
Individual UBO Proposal
ubo.ts.2 <- earnings*32.92737*1.9
snp.difference.2 <- tail(ubo.ts.2, n=38L)
ubo.2 <- c(2961.81, snp.difference.2)
Based off the average number of children per household in the US. The previous example was that of a single child born to certain income quantiles–the assumption being that every household has a single child. This proposal will be based off of the average number of children born per household (1.9) in 1979. Thus, we will be calculating that each benefitting household will receive 1.9 times the benefits of a UBO.
lowest.ts.ubo.2 <- lowest.income.ts + ubo.2
second.ts.ubo.2 <- second.income.ts + ubo.2
middle.ts.ubo.2 <- middle.income.ts + ubo.2
ubo.income.quintile.2 <- cbind(lowest.ts.ubo.2, second.ts.ubo.2, middle.ts.ubo.2, fourth.income.ts, highest.income.ts)
arima.ubo.lowest.2 <- auto.arima(lowest.ts.ubo.2, xreg = wall.data, seasonal = FALSE)
arima.ubo.second.2 <- auto.arima(second.ts.ubo.2, xreg = wall.data, seasonal = FALSE)
arima.ubo.middle.2 <- auto.arima(middle.ts.ubo.2, xreg = wall.data, seasonal = FALSE)
arima.ubo.lowest.forecast.2 <- forecast(arima.ubo.lowest.2, xreg = wall.data.pred, h=6)
arima.ubo.second.forecast.2 <- forecast(arima.ubo.second.2, xreg = wall.data.pred, h=6)
arima.ubo.middle.forecast.2 <- forecast(arima.ubo.middle.2, xreg = wall.data.pred, h=6)
autoplot(ubo.income.quintile.2, main = "UBO Quantile Forecast Times 2") + autolayer(arima.ubo.lowest.forecast.2) + autolayer(arima.ubo.second.forecast.2) + autolayer(arima.ubo.middle.forecast.2) + autolayer(final.fourth.forecast) + autolayer(final.highest.forecast)
Individual UBO Proposal Share Projection
total.forecast.ubo.2 <- arima.ubo.lowest.forecast.2$mean + arima.ubo.second.forecast.2$mean + arima.ubo.middle.forecast.2$mean + holt.fourth$mean + holt.highest$mean
arima.ubo.lowest.forecast.wall.share.2 <- arima.ubo.lowest.forecast$mean/total.forecast.ubo.2
arima.ubo.second.forecast.wall.share.2 <- arima.ubo.second.forecast$mean/total.forecast.ubo.2
arima.ubo.middle.share.2 <- arima.ubo.middle.forecast.2$mean/total.forecast.ubo.2
final.ubo.fourth.share.2 <- holt.fourth$mean/total.forecast.ubo.2
final.ubo.highest.share.2 <- holt.highest$mean/total.forecast.ubo.2
autoplot(arima.ubo.lowest.forecast.wall.share.2) + autolayer(arima.ubo.second.forecast.wall.share.2) + autolayer(arima.ubo.middle.share.2) + autolayer(final.ubo.fourth.share.2) + autolayer(final.ubo.highest.share.2)
arima.ubo.lowest.forecast.wall.share.2
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.08953717 0.09000925 0.09047980 0.09094886 0.09141641 0.09188248
arima.ubo.second.forecast.wall.share.2
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1151296 0.1154192 0.1157132 0.1160112 0.1163130 0.1166185
arima.ubo.middle.share.2
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1653476 0.1657304 0.1661201 0.1665163 0.1669185 0.1673265
final.ubo.fourth.share.2
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.1754730 0.1747958 0.1741257 0.1734625 0.1728059 0.1721557
final.ubo.highest.share.2
Time Series:
Start = 2018
End = 2023
Frequency = 1
[1] 0.4258677 0.4248978 0.4239185 0.4229304 0.4219341 0.4209302
How to pay for it
Distribution_population <- read_excel("Distribution population.xlsx")
Error in read_excel("Distribution population.xlsx") :
could not find function "read_excel"
dist.pop.ts <- ts(Distribution_population, start=1979, end=2017, frequency=1)
lowest.pop <- Distribution_population$`Lowest Quantile`
second.pop <- Distribution_population$`Second Quantile`
middle.pop <- Distribution_population$`Middle Quantile`
lowest.pop.ts <- ts(lowest.pop, start=1979, end=2017, frequency = 1)
second.pop.ts <- ts(second.pop, start=1979, end=2017, frequency = 1)
middle.pop.ts <- ts(middle.pop, start=1979, end=2017, frequency = 1)
lowest.pop.pred <- rwf(lowest.pop.ts, h = 6, drift=TRUE)
second.pop.pred <- rwf(second.pop.ts, h = 6, drift=TRUE)
middle.pop.pred <- rwf(middle.pop.ts, h = 6, drift=TRUE)
lowest.pop.pred$model
second.pop.pred$model
middle.pop.pred$model
autoplot(dist.pop.ts) + autolayer(lowest.pop.pred) + autolayer(second.pop.pred) + autolayer(middle.pop.pred)
ubo.cost <- c(12000, 12000, 12000, 12000, 12000, 12000)
inflation <- c(1.00, 1.014, 1.028, 1.042, 1.056, 1.070)
ubo.inf.cost <- ubo.cost*inflation
lowest.pop.inc <- c(192100, 192100, 192100, 192100, 192100, 192100)
second.pop.inc <- c(240600, 240600, 240600, 240600, 240600, 240600)
middle.pop.inc <- c(271100, 271100, 271100, 271100, 271100, 271100)
lowest.cost <- lowest.pop.inc*ubo.inf.cost
second.cost <- second.pop.inc*ubo.inf.cost
middle.cost <- middle.pop.inc*ubo.inf.cost
lowest.cost <- ts(lowest.cost, start=2018, end=2023, frequency=1)
second.cost <- ts(second.cost, start=2018, end=2023, frequency=1)
middle.cost <- ts(middle.cost, start=2018, end=2023, frequency=1)
total.cost <- lowest.cost + second.cost + middle.cost
autoplot(total.cost) + autolayer(lowest.cost) + autolayer(second.cost) + autolayer(middle.cost)
total.cost
summary(final.highest)
This estimates a roughly $3,181,579,000 cost a year. This is no trifling amount, but is minimal compared to the $2.8 trillion a year for Andrew Yang’s UBI proposal. This is not a proposal for an immediate difference between the minimum required income for an individual living in America. But for attempting to move a family toward a higher income quintile, I believe that the cost is worth it.
holt.top$model
3181579000/39277180300
As we can see, even with only the top 1% paying for the entirety of this program without any federal assistance and only out of their gains per year according to the most accurate model, the cost for the UBI would only be roughly 8.10% of the gains in the top 1% in a year.