knitr::opts_chunk$set(echo=TRUE)
library(tsibbledata)
## Warning: package 'tsibbledata' was built under R version 4.1.2
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tsibbledata)
library(tsibble)
library(forecast)
library(dplyr)
library(feasts)
## Loading required package: fabletools
##
## Attaching package: 'fabletools'
## The following objects are masked from 'package:forecast':
##
## accuracy, forecast
library(ggplot2)
library(histogram)
library(MASS)
## Warning: package 'MASS' was built under R version 4.1.2
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(latex2exp)
## Warning: package 'latex2exp' was built under R version 4.1.2
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✓ tibble 3.1.6 ✓ lubridate 1.8.0
## ✓ tidyr 1.2.0 ✓ fable 0.3.1
## Warning: package 'tidyr' was built under R version 4.1.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x fabletools::forecast() masks forecast::forecast()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x MASS::select() masks dplyr::select()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
##########Chapter 3 exercise
#1.Plot the GDP per capita for each country over time. Which country has the
#highest GDP per capita? How has this changed over time?
View(global_economy)
#GDP per capital
globaleconomy <- global_economy %>%
mutate(GDPperCapita = GDP/Population)
View(globaleconomy)
#plot GDP per capita for each country over years
globaleconomy %>%
autoplot(GDPperCapita, show.legend = FALSE) +
labs(title= "GDP per capita by Country", y = "GDP per capital in $")
## Warning: Removed 3242 row(s) containing missing values (geom_path).

#Monaco has the highest GDP per capita of $185152.53 in 2014.
globaleconomy %>%
filter(Country == "Monaco") %>%
autoplot(GDPperCapita) +
labs(title= "GDP per capita for Monaco", y = "GDP per capital in $")
## Warning: Removed 11 row(s) containing missing values (geom_path).

#It shows an overall increasing trend of Monaco's GDP per capita
#2a.United States GDP from global_economy
USGDP<-global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP / 10 ^ 12) +
labs(title= "GDP, United States", y = "GDP in trillion $")
USGDP

#b.Slaughter of Victorian "Bulls, bullocks and steers" in aus_livestock
View(aus_livestock)
aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers",
State == "Victoria") %>%
autoplot(Count) +
labs(title= "Slaughter of Victoria Bulls, Bullocks, and Steers")

#c.Victorian Electricity Demand from vic_elec
View(vic_elec)
#transfer into daily electricity demand
ve<-vic_elec%>%
group_by(Date)%>%
mutate(Demand = sum(Demand))%>%
distinct(Date,Demand)
View(ve)
ve%>%
as_tsibble(index=Date)%>%
autoplot(Demand)+
labs(title="Daily Victorian Electricity Demand from Jan 2012 to Dec 2014")

#d.Gas production from aus_production
View(aus_production)
aus_production%>%
autoplot(Gas)+
labs(title="Quarterly Gas production from 1956Q1-2010Q2")

#It shows an increasing seasonal variation, applies a box-cox transformation
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "", title = TeX(paste0("Transformed Gas Production with $\\lambda$ = ",
round(lambda,2))))

#3.Why is a Box-Cox transformation unhelpful for the canadian_gas data?
View(canadian_gas)
canadian_gas %>%
autoplot(Volume) +
labs(title = "Canadian Gas Production from Jan 1960 to Feb 2005")

lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "", title =TeX(paste0("Transformed Gas Production with $\\lambda$ = ",
round(lambda,2))))

histogram(canadian_gas$Volume)
## Choosing between regular and irregular histogram:
##
## 1.Building regular histogram with maximum number of bins 86.
## - Choosing number of bins via maximum likelihood with BR penalty.
## - Number of bins chosen: 16.
##
##
## 2.Building irregular histogram.
## - Using finest grid based on observations.
## - Choosing number of bins via maximum likelihood with PENB penalty.
## - Using greedy procedure to recursively build a finest partition with at most 100 bins.
## - Pre-selected finest partition with 100 bins.
## - Computing weights for dynamic programming algorithm.
## - Now performing dynamic optimization.
## - Number of bins chosen: 6.
##
##
##
## Regular histogram chosen.

## $breaks
## [1] 0.96600 2.12615 3.28630 4.44645 5.60660 6.76675 7.92690 9.08705
## [9] 10.24720 11.40735 12.56750 13.72765 14.88780 16.04795 17.20810 18.36825
## [17] 19.52840
##
## $counts
## [1] 29 37 35 18 23 82 70 35 34 19 15 17 19 40 50 19
##
## $density
## [1] 0.04611950 0.05884212 0.05566146 0.02862590 0.03657753 0.13040686
## [7] 0.11132293 0.05566146 0.05407114 0.03021622 0.02385491 0.02703557
## [13] 0.03021622 0.06361310 0.07951638 0.03021622
##
## $mids
## [1] 1.546075 2.706225 3.866375 5.026525 6.186675 7.346825 8.506975
## [8] 9.667125 10.827275 11.987425 13.147575 14.307725 15.467875 16.628025
## [15] 17.788175 18.948325
##
## $xname
## [1] "canadian_gas$Volume"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
#It shows that the Box-Cox transformation is not helpful. The Box-Cox transformation
#usually is used to transforms our data closely to a normal distribution.
#Because the seasonal variation increases and then decreases after 1970, the Box Cox
#transformation cannot be used to make the seasonal variation uniform.
#4.What Box-Cox transformation would you select for your retail data?
View(aus_retail)
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,Turnover)+
labs(title = "Retail Turnover", y = "in millions $")

lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>%
autoplot(box_cox(Turnover, lambda)) +
labs(y = "", title = TeX(paste0("Transformed Retail Turnover with $\\lambda$ = ",
round(lambda,2))))

#5.For the following series, find an appropriate Box-Cox transformation in order
#to stabilise the variance. Tobacco from aus_production,
View(aus_production)
autoplot(aus_production, Tobacco)+
labs(title = "Tobacco from 1956Q1 to 2010Q2")
## Warning: Removed 24 row(s) containing missing values (geom_path).

lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "", title = TeX(paste0("Transformed Tobacco with $\\lambda$ = ",
round(lambda,2))))
## Warning: Removed 24 row(s) containing missing values (geom_path).

#It shows that the box-cox transformation is not really effective on Tobacco
#Economy class passengers between Melbourne and Sydney from ansett,
View(ansett)
MEL_SYD<-ansett %>%
filter(Airports=="MEL-SYD",Class=="Economy")
autoplot(MEL_SYD,Passengers)+
labs(title="Economy class passengers between Melbourne and Sydney")

lambda <- MEL_SYD %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
MEL_SYD %>%
autoplot(box_cox(Passengers, lambda)) +
labs(y = "", title = TeX(paste0("Transformed Number of MEL-SYD Economy Passengers with $\\lambda$ = ",
round(lambda,2))))

#with a lambda of 2, it shows a more clear variation
#and Pedestrian counts at Southern Cross Station from pedestrian.
View(pedestrian)
S<-pedestrian%>%
filter(Sensor=="Southern Cross Station")
autoplot(S,Count)+
labs(title = "Hourly Pedestrian Counts at Southern Cross Station 2015-2016")

lambda <- S %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
S %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "", title = TeX(paste0("Transformed Hourly Southern Cross Station Pedestrian Counts with $\\lambda$ = ",
round(lambda,2))))

#6.Show that a 3×5 MA is equivalent to a 7-term weighted moving average
#with weights of 0.067, 0.133, 0.200, 0.200, 0.200, 0.133, and 0.067.
#3x5 MA=1/15Y1+2/15Y2+3/15Y3+3/15Y4+3/15Y5+2/15Y6+1/15Y7
#weight=c(0.067, 0.133, 0.200, 0.200, 0.200, 0.133, 0.067)
#7.Consider the last five years of the Gas data from aus_production
View(aus_production)
gas <- tail(aus_production, 5*4) %>%
dplyr::select(Gas)
View(gas)
#a.plot the time series.
#Can you identify seasonal fluctuations and/or a trend-cycle?
autoplot(gas,Gas)

#It shows an overall increasing trend from last five years.
#b.Use classical_decomposition with type=multiplicative to calculate
#the trend-cycle and seasonal indices.
gas_dcmp <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_dcmp) %>%
autoplot() +
labs(title = "Classical Multiplicative Decomposition for Gas")
## Warning: Removed 2 row(s) containing missing values (geom_path).

#c.Do the results support the graphical interpretation from part a?
#Yes, it do support the graphical interpretation from part a. It shows an increasing
#trend with a seasonal variation increase from Q1 to Q3 then decrease after Q3.
#d.Compute and plot the seasonally adjusted data.
components(gas_dcmp) %>%
as_tsibble() %>%
autoplot(Gas, colour = "red") +
geom_line(aes(y=season_adjust)) +
labs(title = "Seasonally Adjusted Gas Production")

#e.Change one observation to be an outlier (e.g., add 300 to one observation),
#and recompute the seasonally adjusted data. What is the effect of the outlier?
gas2 <- gas
gas2$Gas[10] <- gas2$Gas[10]+300
gas2 %>%
model(classical_decomposition(Gas,type = "multiplicative"))%>%
components()%>%
autoplot() +
labs(title = "Last Five Years of The Gas Data with 300 added")
## Warning: Removed 2 row(s) containing missing values (geom_path).

gas2 %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas, colour = "red") +
geom_line(aes(y=season_adjust ,colour = "grey")) +
labs(title = "Last Five Years of The Gas Data with 300 added")+
scale_colour_manual(
values = c("red","grey"),
breaks = c("Data", "Seasonally Adjusted"))

#f.Does it make any difference if the outlier is near the end rather than in
#the middle of the time series?
gas3 <- gas
gas3$Gas[20] <- gas3$Gas[20]+300
gas3 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data with outlier at the End")
## Warning: Removed 2 row(s) containing missing values (geom_path).

#The seasonal data shows less effect, the trend shows a better pattern than with
#an outlier at the middle and shows a more similar patter as the original one but
#with a peak at the end.
#8.Recall your retail time series data (from Exercise 8 in Section 2.10).
#Decompose the series using X-11. Does it reveal any outliers,
#or unusual features that you had not noticed previously?
myseries %>%
model(classical_decomposition(Turnover,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Multiplicative decomposition of Retail")
## Warning: Removed 6 row(s) containing missing values (geom_path).

myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components() %>%
autoplot()+
labs(title = "X-11 decomposition of Retail")

#This method is able to capture more noise in the early 1990s which was a
#recession and it is able to capture the seasonality better.
#There are a few data points that look to be more irregular that were not seen before.
#9a.It shows an overall increasing trend of the number of persons in the civilian
#labor force in Australia each month from February 1978 to around 1990, then the
#increasing rate slows down. The seasonal data also shows a slightly increasing variation
#of a peak followed by a decreasing and then raise to another peak.
#b.Yes, the recession of 1991/1992 is visible in the remainder component.
#10.This exercise uses the canadian_gas data (monthly Canadian gas production
#in billions of cubic metres, January 1960 – February 2005).
#a.Plot the data using autoplot(), gg_subseries() and gg_season() to look at the
#effect of the changing seasonality over time.
View(canadian_gas)
canadian_gas %>%
autoplot(Volume)+
labs(title = "Monthly Canadian Gas Production 1960-2005")

canadian_gas %>%
gg_subseries(Volume)+
labs(title = "Monthly Canadian Gas Production 1960-2005")

canadian_gas %>%
gg_season(Volume)+
labs(title = "Monthly Canadian Gas Production 1960-2005")

#It shows an increasing trend from 1960 to around 1971, then followed by a slightly
#decreasing trend till 1990, and ends with another raise after 1990.And the gas
#production usually decreases in summer and increases in winter.
#b.Do an STL decomposition of the data. You will need to choose a seasonal window
#to allow for the changing shape of the seasonal component.
canadian_gas %>%
model(
STL(Volume ~ trend(window = 10) +
season(window = 10),
robust = TRUE)) %>%
components() %>%
autoplot()+
labs(title="SLT Decomposition of Canadian Gas")

#c.How does the seasonal shape change over time?
#The seasonal shape shows an increase in variation from 1970 till around 1985,
#then followed by a decrease in variation after that.
#d.Can you produce a plausible seasonally adjusted series?
canadian_gas %>%
model(
STL(Volume ~ trend(window = 10) +
season(window = 10),
robust = TRUE)) %>%
components() %>%
ggplot(aes(x = Month)) +
geom_line(aes(y = Volume, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(title = "STL decomposition of Canadian Gas") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)

#e.Compare the results with those obtained using SEATS and X-11. How are they different?
#SEATS
canadian_gas %>%
model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
components() %>%
autoplot() +
labs(title ="SEATS Decomposition of Canadian Gas Production")

#x-11
canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
components() %>%
autoplot()+
labs(title = "X-11 decomposition of Canadian Gas")

#SEATS shows a greater variation than x-11 in irregular component.
###############################################################################
##########Chapter 7 exercises
#1.Half-hourly electricity demand for Victoria, Australia is contained in vic_elec.
#Extract the January 2014 electricity demand, and aggregate this data to daily
#with daily total demands and maximum temperatures.
View(vic_elec)
jan14_vic_elec <- vic_elec %>%
filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
index_by(Date = as_date(Time)) %>%
summarise(
Demand = sum(Demand),
Temperature = max(Temperature)
)
#a.Plot the data and find the regression model for Demand with temperature as a
#predictor variable. Why is there a positive relationship?
jan14_vic_elec %>%
as.data.frame()%>%
ggplot(aes(x=Temperature, y=Demand)) +
labs(title="Electricity Demand for Victoria, Australia 2014 Jan",
y="Electricity Demand",x="Temperature")+
geom_point() +
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

tslm_elec<-jan14_vic_elec %>%
model(TSLM(Demand~Temperature))%>%
report()
## Series: Demand
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -49978.2 -10218.9 -121.3 18533.2 35440.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59083.9 17424.8 3.391 0.00203 **
## Temperature 6154.3 601.3 10.235 3.89e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24540 on 29 degrees of freedom
## Multiple R-squared: 0.7832, Adjusted R-squared: 0.7757
## F-statistic: 104.7 on 1 and 29 DF, p-value: 3.8897e-11
#The regression output indicates that the higher the temperature, people will demand
#more electric power. One reason could be that people need to use air conditioner.
#b.Produce a residual plot. Is the model adequate? Are there any outliers or
#influential observations?
jan14_vic_elec %>%
as.data.frame()%>%
ggplot(aes(x=Temperature, y=Demand)) +
labs(title="Electricity Demand for Victoria, Australia 2014 Jan",
y="Electricity Demand",x="Temperature")+
geom_point() +
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

tslm_elec %>% gg_tsresiduals()

#The model is adequate. The residuals tend to increase gradually over the time.
#Most of the residuals is within the range of 0 to 30,000 but there was outliers.
#c.Use the model to forecast the electricity demand that you would expect for
#the next day if the maximum temperature was 15∘C and compare it with the forecast
#if the with maximum temperature was 35∘C.
#Do you believe these forecasts? The following R code will get you started:
newscenarios<-scenarios(
"Temperature of 15∘C" = new_data(jan14_vic_elec, 1) %>%
mutate(Temperature = 15),
"Temperature of 35∘C" = new_data(jan14_vic_elec, 1) %>%
mutate(Temperature = 35),
names_to = "Scenario")
fcast <- forecast(tslm_elec,newscenarios)
fcast
## # A fable: 2 x 6 [1D]
## # Key: Scenario, .model [2]
## Scenario .model Date Demand .mean Temperature
## <chr> <chr> <date> <dist> <dbl> <dbl>
## 1 Temperature of 15∘C TSLM(Dem… 2014-02-01 N(151398, 6.8e+08) 1.51e5 15
## 2 Temperature of 35∘C TSLM(Dem… 2014-02-01 N(274484, 6.4e+08) 2.74e5 35
#Temperature of 15∘C: 1.51e5, Temperature of 35∘C: 2.74e5. I do believe this forecast
#it's constant with the previous result that a higher temperature is associated
#with a higher demand of electric power.
jan14_vic_elec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(jan14_vic_elec, 1) %>%
mutate(Temperature = 15)
) %>%
autoplot(jan14_vic_elec)

jan14_vic_elec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(jan14_vic_elec, 1) %>%
mutate(Temperature = 35)
) %>%
autoplot(jan14_vic_elec)

#d.Given prediction intervals for your forecasts
lm <- lm(Demand ~ Temperature, data=jan14_vic_elec)
forecast(lm, newdata=data.frame(Temperature=c(15,35)))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 151398.35 151398.4 117127.2 185669.5 97951.22 204845.5
## 274484.25 274484.2 241333.0 307635.5 222783.69 326184.8
#e.Plot Demand vs Temperature for all of the available data in vic_elec
#aggregated to daily total demand and maximum temperature.
#What does this say about your model?
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Temperature")

#The temperature does have a relationship with electricity demand, but it's not
#a linear relationship. Below the temperature of 20, the electricity demand shows
#a negative relationship with the temperature; however, after 20, it shows a positive
#relationship between temperature and eletricity demand.
#2.Data set olympic_running contains the winning times (in seconds) in each
#Olympic Games sprint, middle-distance and long-distance track events from 1896 to 2016.
#a.Plot the winning time against the year for each event. Describe the main features of the plot.
View(olympic_running)
autoplot(olympic_running)
## Plot variable not specified, automatically selected `.vars = Time`

#It shows a obvious decreasing trend for 10,000 mem's and women's running over years
#It also shows missing values for some periods
#b.Fit a regression line to the data for each event. Obviously the winning times
#have been decreasing, but at what average rate per year?
#filter data into male and female
male<-olympic_running%>%
filter(Sex=='men')
female<-olympic_running%>%
filter(Sex=='women')
View(female)
#Male Running Section:
fitted1 = male %>%
group_by(Length) %>% do(model = lm(Time~Year, data =.))
fitted1$model
## [[1]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 35.01158 -0.01261
##
##
## [[2]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 69.37650 -0.02488
##
##
## [[3]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 172.48148 -0.06457
##
##
## [[4]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 405.8457 -0.1518
##
##
## [[5]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 843.4367 -0.3151
##
##
## [[6]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 2853.20 -1.03
##
##
## [[7]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 6965.459 -2.666
#100m: time using decreases by 0.01261 per year
#200m: time using decreases by 0.02488 per year
#400m: time using decreases by0.06457 per year
#800m: time using decreases by 0.1518 per year
#1,500m: time using decreases by 0.3151 per year
#5,000m: time using decreases by 1.03 per year
#10,000m: time using decreases by 2.666 per year
#Female Running Section:
fitted2 = female %>%
group_by(Length) %>% do(model = lm(Time~Year, data =.))
fitted2$model
## [[1]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 39.18816 -0.01419
##
##
## [[2]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 89.13761 -0.03363
##
##
## [[3]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 129.39165 -0.04008
##
##
## [[4]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 511.369 -0.198
##
##
## [[5]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## -50.9926 0.1468
##
##
## [[6]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 1504.175 -0.303
##
##
## [[7]]
##
## Call:
## lm(formula = Time ~ Year, data = .)
##
## Coefficients:
## (Intercept) Year
## 8825.260 -3.496
#100m: time using decreases by 0.01419 per year
#200m: time using decreases by 0.03363 per year
#400m: time using decreases by 0.04008 per year
#800m: time using decreases by 0.198 per year
#1,500m: time using decreases by 0.1468 per year
#5,000m: time using decreases by 0.303 per year
#10,000m: time using decreases by 3.496 per year
#c.Plot the residuals against the year.
#What does this indicate about the suitability of the fitted lines?
mens100<-male%>%
filter(Length==100)
men100<-lm(Time~Year,data=mens100)
checkresiduals(men100)

##
## Breusch-Godfrey test for serial correlation of order up to 6
##
## data: Residuals
## LM test = 2.2103, df = 6, p-value = 0.8994
#p-value>0.05, residuals are not correlated each other, also the histogram shows
#a slightly left skewed.
mens1500<-male%>%
filter(Length==1500)
men1500<-lm(Time~Year,data=mens1500)
checkresiduals(men1500)

##
## Breusch-Godfrey test for serial correlation of order up to 6
##
## data: Residuals
## LM test = 5.4204, df = 6, p-value = 0.4911
#p-value>0.05, residuals are not correlated each other, also the histogram shows
#a slightly right skewed.
women100<-female%>%
filter(Length==100)
fwomen100<-lm(Time~Year,data=women100)
checkresiduals(fwomen100)

##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals
## LM test = 1.482, df = 5, p-value = 0.9151
#p-value>0.05, residuals are not correlated each other.
women1500<-female%>%
filter(Length==1500)
fwomen1500<-lm(Time~Year,data=women1500)
checkresiduals(fwomen1500)

##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals
## LM test = 3.116, df = 5, p-value = 0.6821
#p-value>0.05, residuals are not correlated each other, also the histogram shows
#a slightly left skewed.
#d.Predict the winning time for each race in the 2020 Olympics.
#Give a prediction interval for your forecasts.
#What assumptions have you made in these calculations?
mens100 %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running)

mens1500 %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running)

women100 %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running)

women1500 %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running)

#All of the four graphs show that the time using for 2020 will decrease, which
#is consitent with the previous finding.
#3. logy=β0 + β1logx + ε could be also wrote as
#y=e^(β0 + β1logx + e)
#dy/dx=(β1/x)e^(β0 + β1logx + e)
#dy/dx = β1*y/x
#β1=(dy/dx)*(x/y) --> β1 measures the ratio of the percentage change in y
#to the percentage change in x, so, β1 is the elasticity coefficient.
#4.The data set souvenirs concerns the monthly sales figures of a shop which opened
#in Jan 1987 and sells gifts, souvenirs, and novelties. The shop is situated on
#the wharf at a beach resort town in Queensland, Australia. The sales volume varies
#with the seasonal population of tourists. There is a large influx of visitors to
#the town at Christmas and for the local surfing festival, held every March since 1988.
#Over time, the shop has expanded its premises, range of products, and staff.
#a.Produce a time plot of the data and describe the patterns in the graph
#Identify any unusual or unexpected fluctuations in the time series.
View(souvenirs)
autoplot(souvenirs)
## Plot variable not specified, automatically selected `.vars = Sales`

#The plot shows an increasing trend with seasonal variation. One unusual pattern
#was between year of 1993 and 1994, it shows a greater turbulence than before.
#b.Explain why it is necessary to take logarithms of these data before fitting a model.
#Because logarithms can handle a non-linear relationship and transform a skewed
#variable into one that is more approximately normal for better prediction.
#It's necessary to transfer to logarithms to keep a constant seasonal pattern.
#c.Fit a regression model to the logarithms of these sales data with a linear trend,
#seasonal dummies and a “surfing festival” dummy variable.
souvenirs_ts<-ts(souvenirs$Sales,start=1987,end=1993,frequency=12)
head(souvenirs_ts,84)
## Jan Feb Mar Apr May Jun Jul Aug
## 1987 1664.81 2397.53 2840.71 3547.29 3752.96 3714.74 4349.61 3566.34
## 1988 2499.81 5198.24 7225.14 4806.03 5900.88 4951.34 6179.12 4752.15
## 1989 4717.02 5702.63 9957.58 5304.78 6492.43 6630.80 7349.62 8176.62
## 1990 5921.10 5814.58 12421.25 6369.77 7609.12 7224.75 8121.22 7979.25
## 1991 4826.64 6470.23 9638.77 8821.17 8722.37 10209.48 11276.55 12552.22
## 1992 7615.03 9849.69 14558.40 11587.33 9332.56 13082.09 16732.78 19888.61
## 1993 10243.24
## Sep Oct Nov Dec
## 1987 5021.82 6423.48 7600.60 19756.21
## 1988 5496.43 5835.10 12600.08 28541.72
## 1989 8573.17 9690.50 15151.84 34061.01
## 1990 8093.06 8476.70 17914.66 30114.41
## 1991 11637.39 13606.89 21822.11 45060.69
## 1992 23933.38 25391.35 36024.80 80721.71
## 1993
fest<- rep(0, length(souvenirs_ts))
fest[seq_along(fest)%%12 == 3] = 1
fest[3] = 0
fest<- ts(fest, freq = 12, start=c(1987,1))
souvenirs_ts1<- data.frame(souvenirs_ts, fest)
souvenirs_tslm<- tslm(BoxCox(souvenirs_ts1$souvenirs_ts,0)~season+trend+fest)
souvenirs_tslm
##
## Call:
## tslm(formula = BoxCox(souvenirs_ts1$souvenirs_ts, 0) ~ season +
## trend + fest)
##
## Coefficients:
## (Intercept) season2 season3 season4 season5 season6
## 7.67063 0.27302 0.21925 0.36583 0.41414 0.43996
## season7 season8 season9 season10 season11 season12
## 0.57677 0.54076 0.62991 0.72431 1.19625 1.94798
## trend fest
## 0.02064 0.56067
#d.Plot the residuals against time and against the fitted values. Do these plots
#reveal any problems with the model?
autoplot(souvenirs_tslm$residuals)

#The plot shows the residuals are around 0. So, the logarithms transformation
#does improve the model.
#e.Do boxplots of the residuals for each month. Does this reveal any problems with
#the model?
boxplot(residuals(souvenirs_tslm)~cycle(residuals(souvenirs_tslm)))

#Yes, the wide range residuals for several months (e.g., month 3) become an issue
#that prevent the model to fit the residuals.
#f.What do the values of the coefficients tell you about each variable?
souvenirs_tslm$coefficients
## (Intercept) season2 season3 season4 season5 season6
## 7.67063278 0.27301763 0.21924940 0.36583070 0.41414455 0.43995797
## season7 season8 season9 season10 season11 season12
## 0.57676896 0.54076308 0.62990870 0.72431141 1.19625228 1.94797768
## trend fest
## 0.02064238 0.56067468
#The coefficients shows an increasing trend in sales over the year.
#g.What does the Ljung-Box test tell you about your model?
checkresiduals(souvenirs_tslm)

##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals from Linear regression model
## LM test = 32.702, df = 17, p-value = 0.01229
#The p-value of 0.012 (<0.05) shows that the residuals are correlated.
#The histogram almost shows a normal distribution
#h.Regardless of your answers to the above questions, use your regression model
#to predict the monthly sales for 1994, 1995, and 1996. Produce prediction intervals
#for each of your forecasts.
fore <- data.frame(fest = rep(0, 47))
preds <- forecast(souvenirs_tslm, newdata=fore)
preds
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Feb 1993 9.471186 9.217087 9.725285 9.078883 9.863490
## Mar 1993 9.438060 9.099830 9.776290 8.915867 9.960253
## Apr 1993 9.605284 9.351185 9.859383 9.212980 9.997588
## May 1993 9.674240 9.420141 9.928339 9.281937 10.066544
## Jun 1993 9.720696 9.466597 9.974795 9.328392 10.113000
## Jul 1993 9.878149 9.624050 10.132249 9.485846 10.270453
## Aug 1993 9.862786 9.608687 10.116885 9.470482 10.255089
## Sep 1993 9.972574 9.718475 10.226673 9.580270 10.364877
## Oct 1993 10.087619 9.833520 10.341718 9.695315 10.479923
## Nov 1993 10.580202 10.326103 10.834301 10.187899 10.972506
## Dec 1993 11.352570 11.098471 11.606669 10.960266 11.744874
## Jan 1994 9.425235 9.171780 9.678689 9.033926 9.816543
## Feb 1994 9.718895 9.460927 9.976862 9.320619 10.117171
## Mar 1994 9.685769 9.342813 10.028724 9.156280 10.215258
## Apr 1994 9.852993 9.595025 10.110960 9.454716 10.251269
## May 1994 9.921949 9.663981 10.179916 9.523673 10.320225
## Jun 1994 9.968405 9.710437 10.226372 9.570128 10.366681
## Jul 1994 10.125858 9.867890 10.383826 9.727582 10.524134
## Aug 1994 10.110494 9.852527 10.368462 9.712218 10.508771
## Sep 1994 10.220282 9.962315 10.478250 9.822006 10.618559
## Oct 1994 10.335327 10.077360 10.593295 9.937051 10.733604
## Nov 1994 10.827911 10.569943 11.085878 10.429635 11.226187
## Dec 1994 11.600278 11.342311 11.858246 11.202002 11.998555
## Jan 1995 9.672943 9.415130 9.930757 9.274905 10.070981
## Feb 1995 9.966603 9.703880 10.229326 9.560985 10.372221
## Mar 1995 9.933477 9.585149 10.281806 9.395693 10.471262
## Apr 1995 10.100701 9.837978 10.363424 9.695083 10.506319
## May 1995 10.169657 9.906934 10.432380 9.764039 10.575276
## Jun 1995 10.216113 9.953390 10.478836 9.810495 10.621731
## Jul 1995 10.373566 10.110843 10.636290 9.967948 10.779185
## Aug 1995 10.358203 10.095480 10.620926 9.952585 10.763821
## Sep 1995 10.467991 10.205268 10.730714 10.062373 10.873609
## Oct 1995 10.583036 10.320313 10.845759 10.177418 10.988654
## Nov 1995 11.075619 10.812896 11.338342 10.670001 11.481237
## Dec 1995 11.847987 11.585264 12.110710 11.442369 12.253605
## Jan 1996 9.920652 9.657609 10.183694 9.514540 10.326763
## Feb 1996 10.214312 9.945993 10.482630 9.800055 10.628569
## Mar 1996 10.181186 9.826866 10.535505 9.634152 10.728220
## Apr 1996 10.348410 10.080091 10.616728 9.934152 10.762667
## May 1996 10.417366 10.149047 10.685684 10.003109 10.831623
## Jun 1996 10.463822 10.195503 10.732140 10.049564 10.878079
## Jul 1996 10.621275 10.352956 10.889594 10.207018 11.035532
## Aug 1996 10.605911 10.337593 10.874230 10.191654 11.020168
## Sep 1996 10.715699 10.447381 10.984018 10.301442 11.129956
## Oct 1996 10.830744 10.562426 11.099063 10.416487 11.245002
## Nov 1996 11.323328 11.055009 11.591646 10.909071 11.737585
## Dec 1996 12.095696 11.827377 12.364014 11.681438 12.509953
#i.How could you improve these predictions by modifying the model?
#Instead of using the actual sales numbers, it might be better to calculate the
#percentage change in sales first then fit the model. Since the percentage unit
#can provide reduce the spikes.
#5.The us_gasoline series consists of weekly data for supplies of US finished
#motor gasoline product, from 2 February 1991 to 20 January 2017.
#The units are in “million barrels per day”. Consider only the data to the end of 2004
#a.Fit a harmonic regression with trend to the data. Experiment with changing
#the number Fourier terms. Plot the observed gasoline and fitted values and comment on what you see.
View(us_gasoline)
autoplot(us_gasoline)
## Plot variable not specified, automatically selected `.vars = Barrels`

gas_ts<-ts(us_gasoline$Barrels,start=(1991-1),end=(2017-1),frequency=52)
gas <- window(gas_ts, end = 2005)
for(num in c(1, 3, 5, 10, 20)){
var_name <- paste("fit",
as.character(num),
"_gasoline_2004",
sep = "")
assign(var_name,
tslm(gas ~ trend + fourier(
gas,
K = num
))
)
print(
autoplot(gas) +
autolayer(get(var_name)$fitted.values,
series = as.character(num)) +
ggtitle(var_name) +
ylab("gasoline") +
guides(colour = guide_legend(title = "Number of Fourier Transform pairs"))
)
}





#The higher the k value, the more fitted the line shows. k=20 shows the the most
#fitted line.
#b.Select the appropriate number of Fourier terms to include by minimizing the AICc or CV value.
for(i in c(1, 3, 5, 10, 20)){
fit_gasoline_2004.name <- paste(
"fit", as.character(i), "_gasoline_2004",
sep = ""
)
writeLines(
paste(
"\n", fit_gasoline_2004.name, "\n"
)
)
print(CV(get(fit_gasoline_2004.name)))
}
##
## fit1_gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 8.012058e-02 -1.969129e+03 -1.969052e+03 -1.945827e+03 8.427750e-01
##
## fit3_gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.470266e-02 -2.023838e+03 -2.023604e+03 -1.981892e+03 8.541546e-01
##
## fit5_gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.365185e-02 -2.034993e+03 -2.034518e+03 -1.974405e+03 8.569479e-01
##
## fit10_gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.173572e-02 -2.056054e+03 -2.054595e+03 -1.948861e+03 8.624864e-01
##
## fit20_gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.367553e-02 -2.036919e+03 -2.031785e+03 -1.836514e+03 8.624881e-01
#The statistic data shows that k=10 minimize the CV (7.17) and AICc (-2.05).
#c.Plot the residuals of the final model using the gg_tsresiduals() function
#and comment on these. Use a Ljung-Box test to check for residual autocorrelation.
checkresiduals(fit10_gasoline_2004)

##
## Breusch-Godfrey test for serial correlation of order up to 104
##
## data: Residuals from Linear regression model
## LM test = 128.54, df = 104, p-value = 0.05168
#The time plot shows some changing variation over time.
#The histogram nearly shows a normal distribution.
#The significant Ljung-Box test is greater than 0.05. The autocorrelation plot
#shows spikes four times.
#d.Generate forecasts for the next year of data and plot these along with
#the actual data for 2005. Comment on the forecasts.
fc_gasoline_2005 <- forecast(
fit10_gasoline_2004,
newdata=data.frame(fourier(
gas, K = 10, h = 52)
)
)
fc_gasoline_2005
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005.019 8.901276 8.557367 9.245186 8.374931 9.427622
## 2005.038 8.940132 8.596126 9.284138 8.413638 9.466626
## 2005.058 8.982049 8.638038 9.326060 8.455548 9.508551
## 2005.077 9.056949 8.712952 9.400946 8.530469 9.583429
## 2005.096 9.143925 8.799930 9.487920 8.617448 9.670401
## 2005.115 9.192581 8.848582 9.536579 8.666099 9.719062
## 2005.135 9.185356 8.841349 9.529363 8.658862 9.711851
## 2005.154 9.160913 8.816906 9.504919 8.634419 9.687406
## 2005.173 9.169559 8.825559 9.513560 8.643075 9.696044
## 2005.192 9.216866 8.872868 9.560865 8.690384 9.743348
## 2005.212 9.264222 8.920221 9.608223 8.737736 9.790708
## 2005.231 9.281951 8.937946 9.625957 8.755458 9.808444
## 2005.250 9.285826 8.941821 9.629831 8.759334 9.812318
## 2005.269 9.312227 8.968226 9.656228 8.785741 9.838713
## 2005.288 9.367159 9.023159 9.711159 8.840675 9.893643
## 2005.308 9.416957 9.072955 9.760958 8.890470 9.943444
## 2005.327 9.433266 9.089261 9.777272 8.906774 9.959758
## 2005.346 9.433725 9.089720 9.777729 8.907234 9.960216
## 2005.365 9.463951 9.119950 9.807952 8.937465 9.990437
## 2005.385 9.540677 9.196677 9.884678 9.014193 10.067162
## 2005.404 9.625983 9.281981 9.969985 9.099496 10.152471
## 2005.423 9.665636 9.321631 10.009640 9.139144 10.192127
## 2005.442 9.647059 9.303055 9.991063 9.120569 10.173550
## 2005.462 9.610131 9.266130 9.954132 9.083645 10.136617
## 2005.481 9.601794 9.257794 9.945795 9.075310 10.128279
## 2005.500 9.629756 9.285754 9.973759 9.103268 10.156244
## 2005.519 9.664433 9.320428 10.008438 9.137941 10.190924
## 2005.538 9.676302 9.332298 10.020306 9.149812 10.202792
## 2005.558 9.658713 9.314712 10.002714 9.132227 10.185199
## 2005.577 9.615773 9.271772 9.959773 9.089288 10.142258
## 2005.596 9.544821 9.200818 9.888823 9.018333 10.071309
## 2005.615 9.445303 9.101298 9.789309 8.918811 9.971795
## 2005.635 9.341433 8.997429 9.685437 8.814943 9.867923
## 2005.654 9.279412 8.935411 9.623412 8.752926 9.805897
## 2005.673 9.290336 8.946336 9.634336 8.763851 9.816820
## 2005.692 9.357646 9.013643 9.701648 8.831157 9.884134
## 2005.712 9.428370 9.084364 9.772376 8.901877 9.954863
## 2005.731 9.457460 9.113457 9.801464 8.930971 9.983950
## 2005.750 9.437719 9.093720 9.781719 8.911235 9.964203
## 2005.769 9.389996 9.045997 9.733996 8.863513 9.916480
## 2005.788 9.337513 8.993510 9.681516 8.811024 9.864001
## 2005.808 9.298876 8.954869 9.642883 8.772382 9.825371
## 2005.827 9.296233 8.952230 9.640237 8.769744 9.822723
## 2005.846 9.346559 9.002561 9.690557 8.820078 9.873040
## 2005.865 9.430551 9.086553 9.774549 8.904070 9.957032
## 2005.885 9.480414 9.136411 9.824418 8.953925 10.006904
## 2005.904 9.423219 9.079208 9.767229 8.896719 9.949719
## 2005.923 9.251762 8.907762 9.595762 8.725278 9.778246
## 2005.942 9.047458 8.703470 9.391446 8.520993 9.573924
## 2005.962 8.918927 8.574940 9.262915 8.392462 9.445392
## 2005.981 8.911344 8.567381 9.255307 8.384917 9.437771
## 2006.000 8.978870 8.634891 9.322849 8.452418 9.505323
gas2005 = window(gas_ts, start = 2005, end = 2006)
autoplot(gas2005, series = "True",
ylab = "Weekly Gas Supply", xlab = "Year",
main = "Forecast of 2005 vs Actual 2005 Gasoline") +
autolayer(fc_gasoline_2005$mean, series = "Forecast")

#By comparing the forecast and actual 2005 gasoline value, it shows that the forecast
#is able to predict a similar pattern for the actual value.
#6.The annual population of Afghanistan is available in the global_economy data set.
#a.Plot the data and comment on its features. Can you observe the effect of the Soviet-Afghan war?
View(global_economy)
global_economy %>%
filter(Country=="Afghanistan") %>%
tsibble(key = Code, index = Year)%>%
autoplot(Population) +
labs(title= "Afghanistan Population",
y = "Population")

#Yes,the Soviet-Afghan war occurred in 1979-1989. From the Afghanistan population chart
#we can see that a decreasing among that period.
#b.Fit a linear trend model and compare this to a piece wise linear trend model
#with knots at 1980 and 1989.
overall<-global_economy %>%
filter(Country=="Afghanistan")%>%
model(TSLM(Population ~ Year))
report(overall)
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -5794518 -2582559 744761 2259222 6036280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -829292529 49730866 -16.68 <2e-16 ***
## Year 425774 25008 17.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3188000 on 56 degrees of freedom
## Multiple R-squared: 0.8381, Adjusted R-squared: 0.8352
## F-statistic: 289.9 on 1 and 56 DF, p-value: < 2.22e-16
#before the war
beforewar<-global_economy %>%
filter(Country=="Afghanistan")%>%
filter(Year<1980)%>%
model(TSLM(Population ~ Year))
report(beforewar)
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -146380.8 -110290.6 -451.2 105877.8 202881.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -470734527 8787062 -53.57 <2e-16 ***
## Year 244657 4462 54.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 115100 on 18 degrees of freedom
## Multiple R-squared: 0.994, Adjusted R-squared: 0.9937
## F-statistic: 3007 on 1 and 18 DF, p-value: < 2.22e-16
#after the war
afterwar<-global_economy %>%
filter(Country=="Afghanistan")%>%
filter(Year>1989)%>%
model(TSLM(Population ~ Year))
report(afterwar)
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -619234 -212927 6598 234280 612277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.670e+09 1.640e+07 -101.8 <2e-16 ***
## Year 8.451e+05 8.184e+03 103.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 349800 on 26 degrees of freedom
## Multiple R-squared: 0.9976, Adjusted R-squared: 0.9975
## F-statistic: 1.066e+04 on 1 and 26 DF, p-value: < 2.22e-16
#c.Generate forecasts from these two models for the five years after the end of
#the data, and comment on the results.
forecast(beforewar,h=5)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Afghanistan TSLM(Population ~ Year) 1980 N(1.4e+07, 1.6e+10) 13686612.
## 2 Afghanistan TSLM(Population ~ Year) 1981 N(1.4e+07, 1.7e+10) 13931270.
## 3 Afghanistan TSLM(Population ~ Year) 1982 N(1.4e+07, 1.7e+10) 14175927.
## 4 Afghanistan TSLM(Population ~ Year) 1983 N(1.4e+07, 1.8e+10) 14420584.
## 5 Afghanistan TSLM(Population ~ Year) 1984 N(1.5e+07, 1.8e+10) 14665241.
forecast(afterwar,h=5)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Afghanistan TSLM(Population ~ Year) 2018 N(3.6e+07, 1.4e+11) 35925602.
## 2 Afghanistan TSLM(Population ~ Year) 2019 N(3.7e+07, 1.4e+11) 36770747.
## 3 Afghanistan TSLM(Population ~ Year) 2020 N(3.8e+07, 1.4e+11) 37615892.
## 4 Afghanistan TSLM(Population ~ Year) 2021 N(3.8e+07, 1.5e+11) 38461037.
## 5 Afghanistan TSLM(Population ~ Year) 2022 N(3.9e+07, 1.5e+11) 39306182.
forecast(overall,h=5)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Afghanistan TSLM(Population ~ Year) 2018 N(3e+07, 1.1e+13) 29919575.
## 2 Afghanistan TSLM(Population ~ Year) 2019 N(3e+07, 1.1e+13) 30345349.
## 3 Afghanistan TSLM(Population ~ Year) 2020 N(3.1e+07, 1.1e+13) 30771123.
## 4 Afghanistan TSLM(Population ~ Year) 2021 N(3.1e+07, 1.1e+13) 31196897.
## 5 Afghanistan TSLM(Population ~ Year) 2022 N(3.2e+07, 1.1e+13) 31622671.
#The estimated coefficient of population shows an obvious decrease after war