library(WDI)
library(ggplot2)
wdi_data = WDI(indicator = c('inf_mort' = "SP.DYN.IMRT.IN",
'gdpPercap'="NY.GDP.PCAP.KD",
'yrs_schooling'='BAR.SCHL.15UP'), # interest rate spread
start = 1960, end = 2020,
extra=TRUE) %>%
as_tibble()
united_states_data = wdi_data %>%
filter(country == 'United States')
united_states_data
## # A tibble: 61 × 13
## iso2c country year inf_mort gdpPercap yrs_schooling iso3c region capital
## <chr> <chr> <int> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 US United S… 1960 25.9 19135. NA USA North … Washing…
## 2 US United S… 1963 24.4 20701. NA USA North … Washing…
## 3 US United S… 1964 23.8 21600. NA USA North … Washing…
## 4 US United S… 1961 25.4 19254. NA USA North … Washing…
## 5 US United S… 1962 24.9 20116. NA USA North … Washing…
## 6 US United S… 1967 22 24227. NA USA North … Washing…
## 7 US United S… 1968 21.3 25137. NA USA North … Washing…
## 8 US United S… 1969 20.6 25664. NA USA North … Washing…
## 9 US United S… 1970 19.9 25279. 10.8 USA North … Washing…
## 10 US United S… 1971 19.1 25784. NA USA North … Washing…
## # … with 51 more rows, and 4 more variables: longitude <chr>, latitude <chr>,
## # income <chr>, lending <chr>
Trends & Seasonality
ggplot(united_states_data, aes(x = year, y = inf_mort)) +geom_line() + labs(y = 'Infant Mortality Rate',x = 'Year') + ggtitle("Infant mortality rate across Years")
Since the mortality rate is continuously reducing overtime, we can be sure that variance is non-stationary.
From the plot of infant mortality rate of United States, we can observe that the mortality rate timeseries is neither mean stationary nor variance stationary as the mortality rate is constantly decreasing over time. This is evident from the mortality rate plot.
From the mortality rate plots above, we can safely say that there is no seasonlaity in the mortality rate as it is pretty much a decreasing trend overtime.
for_prophet_data <- subset(united_states_data, select = c(year, inf_mort)) %>%
mutate(year = as.Date(ISOdate(year, 1, 1)))
#(paste('01/01/', trimws(year))))
for_prophet_data
## # A tibble: 61 × 2
## year inf_mort
## <date> <dbl>
## 1 1960-01-01 25.9
## 2 1963-01-01 24.4
## 3 1964-01-01 23.8
## 4 1961-01-01 25.4
## 5 1962-01-01 24.9
## 6 1967-01-01 22
## 7 1968-01-01 21.3
## 8 1969-01-01 20.6
## 9 1970-01-01 19.9
## 10 1971-01-01 19.1
## # … with 51 more rows
library(prophet)
prophet_data = subset(for_prophet_data, select = c(year, inf_mort)) %>%
rename(ds = year, # Have to name our date variable "ds"
y = inf_mort) # Have to name our time series "y"
train = prophet_data %>%
filter(ds<ymd("2018-01-01"))
test = prophet_data %>%
filter(ds>=ymd("2018-01-01"))
model = prophet(train, yearly.seasonality = FALSE, weekly.seasonality = FALSE, daily.seasonality = FALSE)
future = make_future_dataframe(model,periods = 7*365)
forecast = predict(model,future)
plot(model,forecast)+
ylab("Infant Mortality Rate in US")+xlab("Year")+theme_bw()
Just looking at the forecast we can see that there is a discrepancy in the forecast trend. Infant mortality rate is going below zero which is not possible. We will set the floor value later when we try to figure out the saturation points.
dyplot.prophet(model,forecast)
prophet_plot_components(model,forecast)
plot(model,forecast)+add_changepoints_to_plot(model)+theme_bw()+xlab("Year")+ylab("Infant Mortality Rate of US")
We can manually specify more number of changepoints to improve performance.
# Number of Changepoints
model = prophet(train,n.changepoints=10,yearly.seasonality = FALSE, weekly.seasonality = FALSE, daily.seasonality = FALSE)
forecast = predict(model,future)
plot(model,forecast)+add_changepoints_to_plot(model)+theme_bw()+xlab("Year")+ylab("Infant Mortality Rate of US")
prophet_plot_components(model,forecast)
forecast_plot_data = forecast %>%
as_tibble() %>%
mutate(ds = as.Date(ds)) %>%
filter(ds>=ymd("2020-01-01"))
forecast_not_scaled = ggplot()+
geom_line(aes(test$ds,test$y))+ xlab("Year") + ylab("Infant Mortality Rate of US") +
geom_line(aes(forecast_plot_data$ds,forecast_plot_data$yhat), color='red')
forecast_scaled = forecast_not_scaled
forecast_scaled
We need to set the floor value for this data as the infant mortality can not go below 0. Let us make a two-year forecast to identify the need for saturation points.
two_yr_future = make_future_dataframe(model,periods = 730)
two_yr_forecast = predict(model,two_yr_future)
plot(model,two_yr_forecast)+theme_bw()+xlab("Date")+ylab("Infant Mortality Rate of US")
train$floor = 0
train$cap = 30
future$floor = 0
future$cap = 30
# Set floor in forecsat data
two_yr_future$floor = 0
two_yr_future$cap = 30
sat_model = prophet(train,growth='logistic', yearly.seasonality = FALSE, weekly.seasonality = FALSE, daily.seasonality = FALSE)
sat_two_yr_forecast = predict(sat_model,two_yr_future)
plot(sat_model,sat_two_yr_forecast)+ylim(0,30)+
theme_bw()+xlab("Date")+ylab("Page Views")
prophet_plot_components(model,forecast)
additive = prophet(train,yearly.seasonality = FALSE, weekly.seasonality = FALSE, daily.seasonality = FALSE)
add_fcst = predict(additive,future)
plot(additive,add_fcst)+
ylim(0,30)
prophet_plot_components(additive,add_fcst)
multi = prophet(train,seasonality.mode = 'multiplicative', yearly.seasonality = FALSE, weekly.seasonality = FALSE, daily.seasonality = FALSE)
multi_fcst = predict(multi,future)
plot(multi,multi_fcst)+ylim(0,30)
prophet_plot_components(multi, multi_fcst)
From the above plots, we can see that the additive and multiplicative seasonalities doesn’t play any significant role.
Since the data we are using is a yearly data, impact of holidays is not applicable on the annual infant mortality rate.
forecast_metric_data = forecast %>%
as_tibble() %>%
mutate(ds = as.Date(ds)) %>%
filter(ds>=ymd("2020-01-01"))
RMSE = sqrt(mean((test$y - forecast_metric_data$yhat)^2))
MAE = mean(abs(test$y - forecast_metric_data$yhat))
MAPE = mean(abs((test$y - forecast_metric_data$yhat)/test$y))
print(paste("RMSE:",round(RMSE,2)))
## [1] "RMSE: 0.36"
print(paste("MAE:",round(MAE,2)))
## [1] "MAE: 0.34"
print(paste("MAPE:",round(MAPE,2)))
## [1] "MAPE: 0.06"
The observed values of performance metrics are RMSE = 0.36, MAE = 0.34, MAPE = 0.06.
library(tidyverse)
library(caret)
library(prophet)
df.cv <- cross_validation(model, horizon = 5*365,initial = 6*365, units = 'days')
head(df.cv)
## # A tibble: 6 × 6
## y ds yhat yhat_lower yhat_upper cutoff
## <dbl> <dttm> <dbl> <dbl> <dbl> <dttm>
## 1 21.3 1968-01-01 00:00:00 21.4 21.3 21.4 1967-01-14 00:00:00
## 2 20.6 1969-01-01 00:00:00 20.7 20.6 20.9 1967-01-14 00:00:00
## 3 19.9 1970-01-01 00:00:00 20.1 19.8 20.4 1967-01-14 00:00:00
## 4 19.1 1971-01-01 00:00:00 19.4 19.0 19.9 1967-01-14 00:00:00
## 5 18.3 1972-01-01 00:00:00 18.8 18.1 19.5 1967-01-14 00:00:00
## 6 19.9 1970-01-01 00:00:00 19.9 19.9 19.9 1969-07-14 12:00:00
unique(df.cv$cutoff)
## [1] "1967-01-14 00:00:00 GMT" "1969-07-14 12:00:00 GMT"
## [3] "1972-01-13 00:00:00 GMT" "1974-07-13 12:00:00 GMT"
## [5] "1977-01-11 00:00:00 GMT" "1979-07-12 12:00:00 GMT"
## [7] "1982-01-10 00:00:00 GMT" "1984-07-10 12:00:00 GMT"
## [9] "1987-01-09 00:00:00 GMT" "1989-07-09 12:00:00 GMT"
## [11] "1992-01-08 00:00:00 GMT" "1994-07-08 12:00:00 GMT"
## [13] "1997-01-06 00:00:00 GMT" "1999-07-07 12:00:00 GMT"
## [15] "2002-01-05 00:00:00 GMT" "2004-07-05 12:00:00 GMT"
## [17] "2007-01-04 00:00:00 GMT" "2009-07-04 12:00:00 GMT"
## [19] "2012-01-03 00:00:00 GMT"
df.cv %>%
ggplot()+
geom_point(aes(ds,y)) +
geom_point(aes(ds,yhat,color = factor(cutoff)))+
theme_bw()+
xlab("Date")+
ylab("Adjusted Closing Price")+
scale_color_discrete(name = 'Cutoff')
Nearer horizon values are closer to the actual values and the farther values show more deviation from teh actual values.
We can generate a table for various performance metrics for different time horizons.
performance_metrics(df.cv)
## horizon mse rmse mae mape mdape smape
## 1 180.5 days 0.02203328 0.1484361 0.1115551 0.01405537 0.009832773 0.01417687
## 2 352.0 days 0.02256754 0.1502250 0.1191892 0.01441354 0.009832773 0.01453446
## 3 354.0 days 0.02285979 0.1511945 0.1247254 0.01472895 0.009832773 0.01484939
## 4 355.0 days 0.02224203 0.1491376 0.1219532 0.01443306 0.007169743 0.01455097
## 5 356.0 days 0.02691323 0.1640525 0.1309530 0.01508729 0.007169743 0.01522092
## 6 357.0 days 0.02994155 0.1730363 0.1480905 0.01681164 0.016714379 0.01696085
## 7 359.0 days 0.03080494 0.1755134 0.1534586 0.01738732 0.016714379 0.01753098
## 8 360.0 days 0.02913382 0.1706863 0.1505182 0.01680129 0.016714379 0.01692125
## 9 361.0 days 0.04043131 0.2010754 0.1732670 0.02002807 0.016714379 0.02026862
## 10 362.0 days 0.03813686 0.1952866 0.1608028 0.01798798 0.012302975 0.01826157
## 11 364.0 days 0.03763065 0.1939862 0.1548644 0.01792068 0.012302975 0.01819525
## 12 535.5 days 0.03848501 0.1961760 0.1604521 0.01818594 0.012302975 0.01845941
## 13 536.5 days 0.03845380 0.1960964 0.1602932 0.01810211 0.012302975 0.01837502
## 14 538.5 days 0.03894727 0.1973506 0.1611057 0.01797422 0.012302975 0.01824374
## 15 539.5 days 0.06061901 0.2462093 0.1951223 0.02115576 0.012302975 0.02152652
## 16 540.5 days 0.05949179 0.2439094 0.1866278 0.02013216 0.006415317 0.02051076
## 17 541.5 days 0.05139937 0.2267143 0.1591871 0.01640994 0.005531182 0.01671321
## 18 543.5 days 0.05996035 0.2448680 0.1699162 0.01786200 0.005531182 0.01825230
## 19 544.5 days 0.06806006 0.2608832 0.1961858 0.02176448 0.006415317 0.02224680
## 20 545.5 days 0.07134868 0.2671117 0.2136161 0.02461709 0.025398983 0.02505507
## 21 718.0 days 0.07178299 0.2679235 0.2155823 0.02466950 0.025398983 0.02510719
## 22 719.0 days 0.07149424 0.2673841 0.2139838 0.02450975 0.025398983 0.02494378
## 23 720.0 days 0.06807901 0.2609195 0.2078823 0.02381235 0.019122337 0.02423058
## 24 721.0 days 0.06762250 0.2600433 0.2073961 0.02353661 0.019122337 0.02394226
## 25 723.0 days 0.07686973 0.2772539 0.2364773 0.02651342 0.028323383 0.02696997
## 26 724.0 days 0.07925017 0.2815141 0.2502938 0.02817728 0.028323383 0.02861527
## 27 725.0 days 0.07074983 0.2659884 0.2396511 0.02650192 0.028323383 0.02684133
## 28 726.0 days 0.08342040 0.2888259 0.2575923 0.02907391 0.028323383 0.02955319
## 29 728.0 days 0.08015785 0.2831216 0.2408482 0.02631013 0.019122337 0.02683402
## 30 729.0 days 0.07857925 0.2803199 0.2281283 0.02575733 0.019122337 0.02628312
## 31 900.5 days 0.08237012 0.2870020 0.2413980 0.02643410 0.019122337 0.02695449
## 32 902.5 days 0.07967752 0.2822721 0.2354291 0.02583196 0.018087686 0.02634235
## 33 903.5 days 0.09395300 0.3065175 0.2489458 0.02666156 0.018087686 0.02721200
## 34 904.5 days 0.14289606 0.3780160 0.2972092 0.03123051 0.018087686 0.03202098
## 35 905.5 days 0.14095780 0.3754435 0.2882276 0.03007285 0.013703124 0.03087808
## 36 907.5 days 0.12845748 0.3584097 0.2608403 0.02620425 0.013703124 0.02688475
## 37 908.5 days 0.14706504 0.3834906 0.2789015 0.02871908 0.013703124 0.02959979
## 38 909.5 days 0.15693219 0.3961467 0.3096505 0.03336641 0.013996049 0.03436297
## 39 910.5 days 0.16125991 0.4015718 0.3310508 0.03693161 0.032902493 0.03786896
## 40 1083.0 days 0.15995081 0.3999385 0.3275465 0.03665663 0.032902493 0.03759667
## 41 1084.0 days 0.15703221 0.3962729 0.3182409 0.03593731 0.032902493 0.03686395
## 42 1085.0 days 0.13680409 0.3698704 0.2979067 0.03392402 0.032213273 0.03476487
## 43 1087.0 days 0.13871916 0.3724502 0.2992181 0.03375004 0.032213273 0.03457800
## 44 1088.0 days 0.14912307 0.3861646 0.3265367 0.03660188 0.032902493 0.03749588
## 45 1089.0 days 0.15199744 0.3898685 0.3362136 0.03774807 0.032902493 0.03859867
## 46 1090.0 days 0.14392035 0.3793684 0.3290555 0.03646935 0.032902493 0.03721017
## 47 1092.0 days 0.16375775 0.4046699 0.3533560 0.04002120 0.032902493 0.04099028
## 48 1093.0 days 0.16036864 0.4004605 0.3416472 0.03801476 0.032213273 0.03903086
## 49 1094.0 days 0.15712347 0.3963880 0.3233428 0.03718010 0.032213273 0.03820022
## 50 1266.5 days 0.16576206 0.4071389 0.3440817 0.03829174 0.032213273 0.03929839
## 51 1267.5 days 0.15730363 0.3966152 0.3320596 0.03710934 0.024311742 0.03808353
## 52 1268.5 days 0.18129984 0.4257932 0.3469966 0.03795327 0.024311742 0.03899284
## 53 1269.5 days 0.25581576 0.5057823 0.4096054 0.04409032 0.024311742 0.04552310
## 54 1271.5 days 0.25296860 0.5029598 0.4000574 0.04277634 0.021571648 0.04423296
## 55 1272.5 days 0.23056208 0.4801688 0.3726381 0.03873352 0.021571648 0.03995026
## 56 1273.5 days 0.28024457 0.5293813 0.4090868 0.04376717 0.021571648 0.04552788
## 57 1274.5 days 0.29105796 0.5394979 0.4350000 0.04767770 0.038540943 0.04959324
## 58 1276.5 days 0.29270332 0.5410206 0.4478419 0.04981401 0.038540943 0.05170692
## 59 1448.0 days 0.29416991 0.5423743 0.4501909 0.04977658 0.038540943 0.05167013
## 60 1449.0 days 0.28456639 0.5334476 0.4223104 0.04772158 0.038540943 0.04958846
## 61 1451.0 days 0.24898599 0.4989850 0.3990839 0.04523614 0.038540943 0.04693082
## 62 1452.0 days 0.29880252 0.5466283 0.4242426 0.04718212 0.038540943 0.04908103
## 63 1453.0 days 0.31015842 0.5569187 0.4498779 0.04990772 0.038540943 0.05189279
## 64 1454.0 days 0.30316983 0.5506086 0.4332783 0.04758508 0.037016303 0.04946888
## 65 1456.0 days 0.28025624 0.5293923 0.4185791 0.04529067 0.037016303 0.04689350
## 66 1457.0 days 0.32305371 0.5683781 0.4602941 0.05135083 0.037016303 0.05345213
## 67 1458.0 days 0.32265591 0.5680281 0.4585494 0.05102782 0.037016303 0.05313512
## 68 1459.0 days 0.31111618 0.5577779 0.4246188 0.04948878 0.037016303 0.05161129
## 69 1631.5 days 0.32887860 0.5734794 0.4641508 0.05182526 0.037016303 0.05391640
## 70 1632.5 days 0.30767989 0.5546890 0.4458462 0.04993240 0.037016303 0.05193305
## 71 1633.5 days 0.31990121 0.5655981 0.4512652 0.04988887 0.037016303 0.05188451
## 72 1635.5 days 0.44934132 0.6703293 0.5394682 0.05872756 0.037627439 0.06144721
## 73 1636.5 days 0.44964399 0.6705550 0.5405442 0.05873928 0.037627439 0.06145873
## 74 1637.5 days 0.40795452 0.6387132 0.5018964 0.05305704 0.037627439 0.05531555
## 75 1638.5 days 0.46334302 0.6806930 0.5345661 0.05769052 0.037627439 0.06059050
## 76 1640.5 days 0.47586136 0.6898271 0.5618874 0.06186885 0.050754942 0.06495835
## 77 1641.5 days 0.47821234 0.6915290 0.5762354 0.06428925 0.050754942 0.06734543
## 78 1813.0 days 0.48521933 0.6965769 0.5842250 0.06449167 0.050754942 0.06754288
## 79 1815.0 days 0.45787045 0.6766613 0.5316516 0.06049108 0.050754942 0.06346227
## 80 1816.0 days 0.42485281 0.6518073 0.5164145 0.05838579 0.050754942 0.06113875
## 81 1817.0 days 0.52306519 0.7232325 0.5541716 0.06145258 0.050754942 0.06464833
## 82 1818.0 days 0.53514087 0.7315332 0.5780443 0.06403098 0.050754942 0.06733916
## 83 1820.0 days 0.52099352 0.7217988 0.5462013 0.05955966 0.040948288 0.06271489
## 84 1821.0 days 0.49981456 0.7069756 0.5349866 0.05769879 0.040948288 0.06056813
## 85 1822.0 days 0.55431526 0.7445235 0.5829667 0.06478330 0.040948288 0.06831291
## 86 1823.0 days 0.55353774 0.7440012 0.5800519 0.06425152 0.040948288 0.06779287
## 87 1825.0 days 0.52909807 0.7273913 0.5353115 0.06277030 0.040948288 0.06635738
## coverage
## 1 0.3333333
## 2 0.2222222
## 3 0.1111111
## 4 0.1111111
## 5 0.1111111
## 6 0.0000000
## 7 0.0000000
## 8 0.0000000
## 9 0.0000000
## 10 0.1111111
## 11 0.2222222
## 12 0.3333333
## 13 0.3333333
## 14 0.3333333
## 15 0.3333333
## 16 0.4444444
## 17 0.5555556
## 18 0.5555556
## 19 0.4444444
## 20 0.3333333
## 21 0.3333333
## 22 0.4444444
## 23 0.4444444
## 24 0.4444444
## 25 0.3333333
## 26 0.2222222
## 27 0.2222222
## 28 0.2222222
## 29 0.3333333
## 30 0.3333333
## 31 0.3333333
## 32 0.3333333
## 33 0.3333333
## 34 0.3333333
## 35 0.4444444
## 36 0.5555556
## 37 0.5555556
## 38 0.4444444
## 39 0.3333333
## 40 0.3333333
## 41 0.4444444
## 42 0.4444444
## 43 0.4444444
## 44 0.3333333
## 45 0.2222222
## 46 0.2222222
## 47 0.2222222
## 48 0.3333333
## 49 0.3333333
## 50 0.3333333
## 51 0.3333333
## 52 0.3333333
## 53 0.3333333
## 54 0.4444444
## 55 0.4444444
## 56 0.4444444
## 57 0.3333333
## 58 0.3333333
## 59 0.3333333
## 60 0.4444444
## 61 0.4444444
## 62 0.4444444
## 63 0.3333333
## 64 0.4444444
## 65 0.4444444
## 66 0.4444444
## 67 0.4444444
## 68 0.4444444
## 69 0.4444444
## 70 0.4444444
## 71 0.4444444
## 72 0.4444444
## 73 0.4444444
## 74 0.5555556
## 75 0.5555556
## 76 0.5555556
## 77 0.5555556
## 78 0.5555556
## 79 0.6666667
## 80 0.6666667
## 81 0.6666667
## 82 0.6666667
## 83 0.6666667
## 84 0.6666667
## 85 0.5555556
## 86 0.5555556
## 87 0.5555556
Now, we can plot visualizations for metrics like RMSE, MAE, MAPE, MDAPE, SMAPE.
plot_cross_validation_metric(df.cv, metric = 'rmse')
plot_cross_validation_metric(df.cv, metric = 'mae')
plot_cross_validation_metric(df.cv, metric = 'mape')
plot_cross_validation_metric(df.cv, metric = 'mdape')
plot_cross_validation_metric(df.cv, metric = 'smape')