library(quantmod)
## Warning: package 'quantmod' was built under R version 4.0.5
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.5
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
options('getSymbols.warning4.0'=FALSE)
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.0.5
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.0.4 v tsibble 1.0.1
## v dplyr 1.0.2 v tsibbledata 0.3.0
## v tidyr 1.1.2 v feasts 0.2.1
## v lubridate 1.7.9.2 v fable 0.3.0
## v ggplot2 3.3.3
## Warning: package 'tsibble' was built under R version 4.0.5
## Warning: package 'tsibbledata' was built under R version 4.0.5
## Warning: package 'feasts' was built under R version 4.0.5
## Warning: package 'fabletools' was built under R version 4.0.5
## Warning: package 'fable' was built under R version 4.0.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks xts::first()
## x tsibble::index() masks zoo::index()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks xts::last()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(tsbox)
## Warning: package 'tsbox' was built under R version 4.0.5
library(stringr)
x varable
GDP_growth <- getSymbols('A191RP1A027NBEA',src='FRED',auto.assign = FALSE)
GDP_growth <- GDP_growth %>%
ts_tsibble() %>%
mutate(time = year(time)) %>%
filter_index('1993' ~ '2019') %>%
rename(GDP_growth=value)
y variable
RetailSales <- getSymbols('MRTSSM452USN',src='FRED',auto.assign = FALSE)
RetailSales_growth <- RetailSales %>%
ts_tsibble() %>%
index_by(Year = year(time)) %>%
summarize(value=sum(value)) %>%
mutate(time=Year) %>%
update_tsibble(index=time) %>%
select(-Year) %>%
mutate(value=100*((value/lag(value))-1)) %>%
mutate(value=round(value,1)) %>%
filter_index('1993' ~ '2019') %>%
rename(RetSales_growth=value)
remove(RetailSales)
Make tibble so both series are in signel tsibble
Growth <- left_join(GDP_growth,RetailSales_growth,by='time') %>%
relocate(RetSales_growth, .before=GDP_growth)
remove(GDP_growth,RetailSales_growth)
Compute percent change
Growth %>%
pivot_longer(c(GDP_growth,RetSales_growth),names_to='Series') %>%
autoplot(value)+
labs(y='% change')
Seem to be moving together when comparing the relationship. Seems like something happened in 2019. Economic disloation tha had long term effects.
Better way to see the relationship is a scatterplot. Just plot the y variable on horizontal axis.
Growth %>%
ggplot(aes(x=GDP_growth,y=RetSales_growth))+
labs(y='Retail Sales (annual % change)',
x='GDP (annual % change)')+
geom_point()+
geom_smooth(method='lm',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
Y against X. use geom_smooth using the LM method to fit the regression line. Not paying any attention to the line order. Not observations occured when. Each point comes from a particular year but we do not see what year.
Line does not fit poorly here.
TSLM - lm modeling command. A times Series for LM. Two advantages : allows to fit regression model within time series object. Give some predictors unique to time series data. Regression ananlysis using LM command.
x variable is the GDP_growth
fit <- Growth %>%
model(TSLM(RetSales_growth ~ GDP_growth))
fit %>% report()
## Series: RetSales_growth
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7496 -1.1122 0.1129 1.0420 2.9305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2639 0.8586 -0.307 0.761
## GDP_growth 0.9479 0.1764 5.373 1.42e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.623 on 25 degrees of freedom
## Multiple R-squared: 0.5359, Adjusted R-squared: 0.5174
## F-statistic: 28.87 on 1 and 25 DF, p-value: 1.4235e-05
Disp_Income <- getSymbols('A067RP1A027NBEA', src='FRED',auto.assign = FALSE)
Disp_Income <- Disp_Income %>%
ts_tsibble() %>%
mutate(time = year(time)) %>%
filter_index('1993' ~ '2019') %>%
rename(DispInc_growth=value)
Growth <- left_join(Growth,Disp_Income,by='time')
Cons_Credit <- getSymbols('HNOCCIA027N',src='FRED',auto.assign = FALSE)
Cons_Credit <- Cons_Credit %>%
ts_tsibble() %>%
mutate(time=year(time)) %>%
mutate(value=100*((value/lag(value))-1)) %>%
mutate(value=round(value,1)) %>%
filter_index('1993' ~ '2019') %>%
rename(ConsCredit_growth=value)
Growth <- left_join(Growth,Cons_Credit,by='time') %>%
relocate(RetSales_growth,.after = last_col())
remove(Cons_Credit,Disp_Income)
Growth %>%
pivot_longer(RetSales_growth:ConsCredit_growth,names_to='Series') %>%
autoplot(value)+
labs(y='% change')
Growth %>%
GGally::ggpairs(columns = 2:5)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
fit <- Growth %>%
model(tslm1 = TSLM(RetSales_growth ~ GDP_growth + DispInc_growth + ConsCredit_growth),
tslm2 = TSLM(RetSales_growth ~ GDP_growth + ConsCredit_growth),
tslm3 = TSLM(RetSales_growth ~ 0 + GDP_growth + ConsCredit_growth),
)
Drop displosable income and R62 went up meaing a better predicitive relationship.
look at Tvalue , so cut off is 1 to -1
fit %>% select(tslm1) %>% report()
## Series: RetSales_growth
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.46910 -1.12184 0.01338 0.98303 2.77411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4515 0.9367 -0.482 0.6344
## GDP_growth 0.6834 0.2812 2.430 0.0233 *
## DispInc_growth 0.0577 0.2717 0.212 0.8337
## ConsCredit_growth 0.1788 0.1080 1.656 0.1113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.595 on 23 degrees of freedom
## Multiple R-squared: 0.5876, Adjusted R-squared: 0.5338
## F-statistic: 10.92 on 3 and 23 DF, p-value: 0.00011731
fit %>% select(tslm2) %>% report()
## Series: RetSales_growth
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.500755 -1.191313 0.002247 0.951166 2.713873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3661 0.8290 -0.442 0.66271
## GDP_growth 0.7207 0.2153 3.348 0.00268 **
## ConsCredit_growth 0.1810 0.1053 1.718 0.09862 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.563 on 24 degrees of freedom
## Multiple R-squared: 0.5868, Adjusted R-squared: 0.5523
## F-statistic: 17.04 on 2 and 24 DF, p-value: 2.4795e-05
fit %>% select(tslm3) %>% report()
## Series: RetSales_growth
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6604 -1.2543 0.0607 0.9499 2.7866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## GDP_growth 0.6552 0.1535 4.269 0.000248 ***
## ConsCredit_growth 0.1777 0.1034 1.719 0.098004 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.538 on 25 degrees of freedom
## Multiple R-squared: 0.8983, Adjusted R-squared: 0.8901
## F-statistic: 110.4 on 2 and 25 DF, p-value: 3.9257e-13
pick the lowestAICC
glance(fit) %>%
select(.model,adj_r_squared, CV, AIC, AICc, BIC) %>%
arrange(desc(adj_r_squared))
## # A tibble: 3 x 6
## .model adj_r_squared CV AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 tslm3 0.890 2.46 27.2 28.2 31.1
## 2 tslm2 0.552 2.76 28.9 30.8 34.1
## 3 tslm1 0.534 3.08 30.9 33.8 37.4
Everythign close to zero no trend in residial series is white noise. Definieted auto correlation. no white noise .hug positivle autocorellation in resideination in lab 1,2,3.
pvalue is very low, way less then 0.05 for pvalue.
Def a problem.
fit %>% select(tslm3) %>% gg_tsresiduals()
fit %>% select(tslm3) %>% augment() %>% features(.innov,ljung_box,lag=15,df=2)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 tslm3 52.1 0.00000554
Remove intercept to create a strong relationship look at T value and adjusted R^2
fit <- Growth %>%
model(TSLM(RetSales_growth ~ 0 + GDP_growth + ConsCredit_growth))
fitGDP <- Growth %>% model(ARIMA(GDP_growth))
fitGDP %>% report()
## Series: GDP_growth
## Model: ARIMA(0,0,1) w/ mean
##
## Coefficients:
## ma1 constant
## 0.4805 4.5144
## s.e. 0.1418 0.4329
##
## sigma^2 estimated as 2.551: log likelihood=-50.05
## AIC=106.1 AICc=107.14 BIC=109.98
fcGDP <- fitGDP %>% forecast(h='5 years')
fcGDP %>% autoplot(Growth)
fitCC = Growth %>% model(ARIMA(ConsCredit_growth))
fitCC %>% report()
## Series: ConsCredit_growth
## Model: ARIMA(0,1,0)
##
## sigma^2 estimated as 11.09: log likelihood=-68.18
## AIC=138.35 AICc=138.52 BIC=139.61
fcCC <- fitCC %>% forecast(h='5 years')
fcCC %>% autoplot(Growth)
Forecast two x variable and use that to forecast for the next 5 years. See 5 after growth. Need to have forecast of the x variables. This gives a more sophisticated view.
Growth_future <- new_data(Growth,5) %>%
mutate(GDP_growth = fcGDP$.mean) %>%
mutate(ConsCredit_growth = fcCC$.mean)
fc <- fit %>% forecast(new_data = Growth_future)
fc %>% autoplot(Growth) + labs(y='% Change in Retail Sales Growth')
PerCon <- getSymbols('NA000349Q',src='FRED',auto.assign = FALSE)
PerCon <- PerCon %>%
ts_tsibble() %>%
mutate(time=yearquarter(time)) %>%
filter_index('2000-Q1' ~ '2019-Q4')
quarterly data, not seasonal and there is a trend. around 2009 something happened. a disruption.
PerCon %>%
autoplot(value)+
labs(y='US$ Millions',title='Personal Consumption')
Fit trend and seasonality to this form. You see trend - see general upward slop. You see seasonality factor where Q1 and Q2 etc are all didfferent.
fit <- PerCon %>%
model(TSLM(value ~ trend() + season()))
report(fit)
## Series: value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -101345 -58882 -10936 55234 181362
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1530377.3 20733.0 73.814 < 2e-16 ***
## trend() 24298.0 343.2 70.794 < 2e-16 ***
## season()year2 67885.9 22393.7 3.031 0.00334 **
## season()year3 62418.9 22401.6 2.786 0.00675 **
## season()year4 139805.9 22414.8 6.237 2.4e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 70810 on 75 degrees of freedom
## Multiple R-squared: 0.9855, Adjusted R-squared: 0.9847
## F-statistic: 1276 on 4 and 75 DF, p-value: < 2.22e-16
Trend is 24298.0 in millions of dollars per time period per quarter on average is the increase.
seasonality personal commsumoiion is lowerst in the first quarter of the eayr. goes up alot in quarter 4.
r ^2 is very hight at .9847
so lets
fit %>% gg_tsresiduals()
Red is the acutally data and blue is the fited.
augment(fit) %>%
ggplot(aes(x=time)) +
geom_line(aes(y=value, color = "Actual")) +
geom_line(aes(y = .fitted, color= "Fitted")) +
labs(y="US$ Millions",
title= "Personal Consumption") +
guides(color = guide_legend(title="Series"))
Soemthing is offer after the ression so actually is higher than fitted and after that time frame they are going above and below.
One way is to
augment(fit) %>%
ggplot(aes(x=value, y=.fitted,
color=factor(quarter(time)))) +
geom_point() +
labs(y = "Fitted", x = "Actual values",
title = "Personal Consumption") +
geom_abline(intercept = 0, slope = 1) +
guides(color = guide_legend(title="Quarter"))
Do dummy variable.
Instead do fourier K=2, k can equal 0,1,2 and get same R^2.
knot = like things are noted. slope changes at any given times. Frist Quarter in 2019 is where the slope changes it.
fit <- PerCon %>%
model(TSLM(value ~ trend() + fourier(K = 2)))
report(fit)
## Series: value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -101345 -58882 -10936 55234 181362
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1597905.0 15996.7 99.890 < 2e-16 ***
## trend() 24298.0 343.2 70.794 < 2e-16 ***
## fourier(K = 2)C1_4 -31209.4 11200.8 -2.786 0.00675 **
## fourier(K = 2)S1_4 -35960.0 11200.8 -3.210 0.00195 **
## fourier(K = 2)C2_4 -36318.2 7918.3 -4.587 1.77e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 70810 on 75 degrees of freedom
## Multiple R-squared: 0.9855, Adjusted R-squared: 0.9847
## F-statistic: 1276 on 4 and 75 DF, p-value: < 2.22e-16
fit %>% gg_tsresiduals()
augment(fit) %>%
ggplot(aes(x=time)) +
geom_line(aes(y=value, color = "Actual")) +
geom_line(aes(y = .fitted, color= "Fitted")) +
labs(y="US$ Millions",
title= "Personal Consumption") +
guides(color = guide_legend(title="Series"))
augment(fit) %>%
ggplot(aes(x=value, y=.fitted,
color=factor(quarter(time)))) +
geom_point() +
labs(y = "Fitted", x = "Actual values",
title = "Personal Consumption") +
geom_abline(intercept = 0, slope = 1) +
guides(color = guide_legend(title="Quarter"))
shift = should you include it and whether to use it. dummy variable =1 at Q4 2008. knots = where you apply the knot. put it at Q1 2009. No rational rule on where you should place the knot.
PerCon <- PerCon %>%
mutate(Shift=if_else(time >= yearquarter('2008 Q4'),1,0))
fit <- PerCon %>%
model(TSLM(value ~ Shift + trend(knots=yearquarter('2009 Q1')) + season()))
fit %>% report()
## Series: value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -71846 -32219 -1941 21411 101931
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1515698.6 15804.5 95.903
## Shift -208092.3 18590.1 -11.194
## trend(knots = yearquarter("2009 Q1"))trend 27079.6 679.1 39.874
## trend(knots = yearquarter("2009 Q1"))trend_37 1877.0 825.0 2.275
## season()year2 64072.0 12825.2 4.996
## season()year3 54791.0 12843.8 4.266
## season()year4 138768.7 12833.1 10.813
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Shift < 2e-16 ***
## trend(knots = yearquarter("2009 Q1"))trend < 2e-16 ***
## trend(knots = yearquarter("2009 Q1"))trend_37 0.0258 *
## season()year2 3.88e-06 ***
## season()year3 5.89e-05 ***
## season()year4 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40540 on 73 degrees of freedom
## Multiple R-squared: 0.9954, Adjusted R-squared: 0.995
## F-statistic: 2621 on 6 and 73 DF, p-value: < 2.22e-16
augment(fit) %>%
ggplot(aes(x=time)) +
geom_line(aes(y=value, color = "Actual")) +
geom_line(aes(y = .fitted, color= "Fitted")) +
labs(y="US$ Millions",
title= "Personal Consumption") +
guides(color = guide_legend(title="Series"))
fit %>% gg_tsresiduals()
Adjusted R-squared: 0.995 - very good. seems to fit much better.
fit <- PerCon %>%
model(tslm1 = TSLM(value ~ trend() + season()),
tslm2 = TSLM(value ~ trend() + fourier(K=2)),
tslm3 = TSLM(value ~ Shift + trend(knots=yearquarter('2009 Q1')) + season()))
PerCon_new <- new_data(PerCon,8) %>%
mutate(Shift = 1)
fc <- fit %>% forecast(PerCon_new)
fc %>%
autoplot(PerCon) +
labs(title='Forecasts of Personal Consumption using regression',
y='US$ Millions')+
facet_grid(.model ~ .)
Trend and seasonality. One is the knots and shifts. The actually is slightly below. but better job at fitting the data.
TSLM - whats wrong? very often just run you will get autocorrelation residual. Estimated coeffiencents are no longer you best guess. statistically test are incorrectly because of white noise residenials not longer relievent. poor forecast model.
USComp <- getSymbols('A033RC1A027NBEA',src='FRED',auto.assign = FALSE)
USComp <- USComp %>%
ts_tsibble() %>%
mutate(time = year(time)) %>%
filter_index('1960' ~ '2019') %>%
rename(USComp=value)
PortBirths <- getSymbols('SPDYNCBRTINPRT',src='FRED',auto.assign = FALSE)
PortBirths <- PortBirths %>%
ts_tsibble() %>%
mutate(time = year(time)) %>%
filter_index('1960' ~ '2019') %>%
rename(PortBirths=value)
USComp <- left_join(USComp,PortBirths,by='time')
USComp %>%
pivot_longer(-time) %>%
ggplot(aes(x=time, y=value, color=name))+
geom_line()+
facet_grid(name ~ ., scales='free_y')
USComp %>%
ggplot(aes(x=PortBirths,y=USComp))+
labs(y='US Employee Compensation',
x='Portugal Births per 1,000')+
geom_point()+
geom_smooth(method='loess',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
USComp %>%
ggplot(aes(x=log(PortBirths),y=log(USComp)))+
labs(y='log(US Employee Compensation)',
x='log(Portugal Births per 1,000)')+
geom_point()+
geom_smooth(method='lm',se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
fit <- USComp %>%
model(TSLM(log(USComp) ~ log(PortBirths)))
report(fit)
## Series: USComp
## Model: TSLM
## Transformation: log(USComp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.395666 -0.149228 0.008151 0.125734 0.406418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.87436 0.17223 92.17 <2e-16 ***
## log(PortBirths) -3.09276 0.06548 -47.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1821 on 58 degrees of freedom
## Multiple R-squared: 0.9747, Adjusted R-squared: 0.9742
## F-statistic: 2231 on 1 and 58 DF, p-value: < 2.22e-16
fit %>% gg_tsresiduals()