Ch3&7 Exercise

knitr::opts_chunk$set(echo=TRUE)
library(tsibbledata)

## Warning: package 'tsibbledata' was built under R version 4.1.2

library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.1.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tsibbledata)
library(tsibble)
library(forecast)
library(dplyr)
library(feasts)

## Loading required package: fabletools

## 
## Attaching package: 'fabletools'

## The following objects are masked from 'package:forecast':
## 
##     accuracy, forecast

library(ggplot2)
library(histogram)
library(MASS)

## Warning: package 'MASS' was built under R version 4.1.2

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(latex2exp)

## Warning: package 'latex2exp' was built under R version 4.1.2

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✓ tibble    3.1.6     ✓ lubridate 1.8.0
## ✓ tidyr     1.2.0     ✓ fable     0.3.1

## Warning: package 'tidyr' was built under R version 4.1.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## x lubridate::date()      masks base::date()
## x dplyr::filter()        masks stats::filter()
## x fabletools::forecast() masks forecast::forecast()
## x tsibble::intersect()   masks base::intersect()
## x lubridate::interval()  masks tsibble::interval()
## x dplyr::lag()           masks stats::lag()
## x MASS::select()         masks dplyr::select()
## x tsibble::setdiff()     masks base::setdiff()
## x tsibble::union()       masks base::union()

##########Chapter 3 exercise
#1.Plot the GDP per capita for each country over time. Which country has the
#highest GDP per capita? How has this changed over time?
View(global_economy)
#GDP per capital
globaleconomy <- global_economy %>%
  mutate(GDPperCapita = GDP/Population)
View(globaleconomy)
#plot GDP per capita for each country over years
globaleconomy %>%
  autoplot(GDPperCapita, show.legend =  FALSE) +
  labs(title= "GDP per capita by Country", y = "GDP per capital in $")

## Warning: Removed 3242 row(s) containing missing values (geom_path).

#Monaco has the highest GDP per capita of $185152.53 in 2014.

globaleconomy %>%
  filter(Country == "Monaco") %>%
  autoplot(GDPperCapita) +
  labs(title= "GDP per capita for Monaco", y = "GDP per capital in $")

## Warning: Removed 11 row(s) containing missing values (geom_path).

#It shows an overall increasing trend of Monaco's GDP per capita

#2a.United States GDP from global_economy
USGDP<-global_economy %>%
  filter(Country == "United States") %>%
  autoplot(GDP / 10 ^ 12) +
  labs(title= "GDP, United States", y = "GDP in trillion $") 
USGDP

#b.Slaughter of Victorian "Bulls, bullocks and steers" in aus_livestock
View(aus_livestock)
aus_livestock %>%
  filter(Animal == "Bulls, bullocks and steers",
         State == "Victoria") %>%
  autoplot(Count) +
  labs(title= "Slaughter of Victoria Bulls, Bullocks, and Steers")

#c.Victorian Electricity Demand from vic_elec
View(vic_elec)
#transfer into daily electricity demand
ve<-vic_elec%>%
  group_by(Date)%>%
  mutate(Demand = sum(Demand))%>%
  distinct(Date,Demand)
View(ve)
ve%>%
  as_tsibble(index=Date)%>%
  autoplot(Demand)+
  labs(title="Daily Victorian Electricity Demand from Jan 2012 to Dec 2014")

#d.Gas production from aus_production
View(aus_production)
aus_production%>%
  autoplot(Gas)+
  labs(title="Quarterly Gas production from 1956Q1-2010Q2")

#It shows an increasing seasonal variation, applies a box-cox transformation
lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "", title = TeX(paste0("Transformed Gas Production with $\\lambda$ = ",
                                  round(lambda,2))))

#3.Why is a Box-Cox transformation unhelpful for the canadian_gas data?
View(canadian_gas)
canadian_gas %>%
  autoplot(Volume) +
  labs(title = "Canadian Gas Production from Jan 1960 to Feb 2005")

lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "", title =TeX(paste0("Transformed Gas Production with $\\lambda$ = ",
                                 round(lambda,2))))

histogram(canadian_gas$Volume)

## Choosing between regular and irregular histogram:
## 
## 1.Building regular histogram with maximum number of bins 86.
## - Choosing number of bins via maximum likelihood with BR penalty.
## - Number of bins chosen: 16.
## 
## 
## 2.Building irregular histogram.
## - Using finest grid based on observations.
## - Choosing number of bins via maximum likelihood with PENB penalty.
## - Using greedy procedure to recursively build a finest partition with at most 100 bins.
## - Pre-selected finest partition with 100 bins.
## - Computing weights for dynamic programming algorithm.
## - Now performing dynamic optimization.
## - Number of bins chosen: 6.
## 
## 
## 
## Regular histogram chosen.

## $breaks
##  [1]  0.96600  2.12615  3.28630  4.44645  5.60660  6.76675  7.92690  9.08705
##  [9] 10.24720 11.40735 12.56750 13.72765 14.88780 16.04795 17.20810 18.36825
## [17] 19.52840
## 
## $counts
##  [1] 29 37 35 18 23 82 70 35 34 19 15 17 19 40 50 19
## 
## $density
##  [1] 0.04611950 0.05884212 0.05566146 0.02862590 0.03657753 0.13040686
##  [7] 0.11132293 0.05566146 0.05407114 0.03021622 0.02385491 0.02703557
## [13] 0.03021622 0.06361310 0.07951638 0.03021622
## 
## $mids
##  [1]  1.546075  2.706225  3.866375  5.026525  6.186675  7.346825  8.506975
##  [8]  9.667125 10.827275 11.987425 13.147575 14.307725 15.467875 16.628025
## [15] 17.788175 18.948325
## 
## $xname
## [1] "canadian_gas$Volume"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

#It shows that the Box-Cox transformation is not helpful. The Box-Cox transformation
#usually is used to transforms our data closely to a normal distribution.
#Because the seasonal variation increases and then decreases after 1970, the Box Cox 
#transformation cannot be used to make the seasonal variation uniform.

#4.What Box-Cox transformation would you select for your retail data?
View(aus_retail)
set.seed(12345678)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,Turnover)+
  labs(title = "Retail Turnover", y = "in millions $")

lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)
myseries %>%
  autoplot(box_cox(Turnover, lambda)) +
  labs(y = "", title = TeX(paste0("Transformed Retail Turnover with $\\lambda$ = ",
                                  round(lambda,2))))

#5.For the following series, find an appropriate Box-Cox transformation in order 
#to stabilise the variance. Tobacco from aus_production, 
View(aus_production)
autoplot(aus_production, Tobacco)+
  labs(title = "Tobacco from 1956Q1 to 2010Q2")

## Warning: Removed 24 row(s) containing missing values (geom_path).

lambda <- aus_production %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Tobacco, lambda)) +
  labs(y = "", title = TeX(paste0("Transformed Tobacco with $\\lambda$ = ",
                                  round(lambda,2))))

## Warning: Removed 24 row(s) containing missing values (geom_path).

#It shows that the box-cox transformation is not really effective on Tobacco

#Economy class passengers between Melbourne and Sydney from ansett, 
View(ansett)
MEL_SYD<-ansett %>%
  filter(Airports=="MEL-SYD",Class=="Economy")
autoplot(MEL_SYD,Passengers)+
  labs(title="Economy class passengers between Melbourne and Sydney")

lambda <- MEL_SYD %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)
MEL_SYD %>%
  autoplot(box_cox(Passengers, lambda)) +
  labs(y = "", title = TeX(paste0("Transformed Number of MEL-SYD Economy Passengers with $\\lambda$ = ",
                                  round(lambda,2))))

#with a lambda of 2, it shows a more clear variation

#and Pedestrian counts at Southern Cross Station from pedestrian.
View(pedestrian)
S<-pedestrian%>%
  filter(Sensor=="Southern Cross Station")
autoplot(S,Count)+
  labs(title = "Hourly Pedestrian Counts at Southern Cross Station 2015-2016")

lambda <- S %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)
S %>%
  autoplot(box_cox(Count, lambda)) +
  labs(y = "", title = TeX(paste0("Transformed Hourly Southern Cross Station Pedestrian Counts with $\\lambda$ = ",
                                  round(lambda,2))))

#6.Show that a 3×5 MA is equivalent to a 7-term weighted moving average 
#with weights of 0.067, 0.133, 0.200, 0.200, 0.200, 0.133, and 0.067.
#3x5 MA=1/15Y1+2/15Y2+3/15Y3+3/15Y4+3/15Y5+2/15Y6+1/15Y7
#weight=c(0.067, 0.133, 0.200, 0.200, 0.200, 0.133, 0.067)

#7.Consider the last five years of the Gas data from aus_production
View(aus_production)
gas <- tail(aus_production, 5*4) %>% 
  dplyr::select(Gas)
View(gas)
#a.plot the time series. 
#Can you identify seasonal fluctuations and/or a trend-cycle?
autoplot(gas,Gas)

#It shows an overall increasing trend from last five years.

#b.Use classical_decomposition with type=multiplicative to calculate 
#the trend-cycle and seasonal indices.
gas_dcmp <- gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) 
components(gas_dcmp) %>%
  autoplot() +
  labs(title = "Classical Multiplicative Decomposition for Gas")

## Warning: Removed 2 row(s) containing missing values (geom_path).

#c.Do the results support the graphical interpretation from part a?
#Yes, it do support the graphical interpretation from part a. It shows an increasing
#trend with a seasonal variation increase from Q1 to Q3 then decrease after Q3.

#d.Compute and plot the seasonally adjusted data.
components(gas_dcmp) %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "red") +
  geom_line(aes(y=season_adjust)) +
  labs(title = "Seasonally Adjusted Gas Production")

#e.Change one observation to be an outlier (e.g., add 300 to one observation), 
#and recompute the seasonally adjusted data. What is the effect of the outlier?
gas2 <- gas
gas2$Gas[10] <- gas2$Gas[10]+300
gas2 %>%
  model(classical_decomposition(Gas,type = "multiplicative"))%>%
  components()%>%
  autoplot() +
  labs(title = "Last Five Years of The Gas Data with 300 added")

## Warning: Removed 2 row(s) containing missing values (geom_path).

gas2 %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "red") +
  geom_line(aes(y=season_adjust ,colour = "grey")) +
  labs(title = "Last Five Years of The Gas Data with 300 added")+
  scale_colour_manual(
    values = c("red","grey"),
    breaks = c("Data", "Seasonally Adjusted"))

#f.Does it make any difference if the outlier is near the end rather than in 
#the middle of the time series?
gas3 <- gas
gas3$Gas[20] <- gas3$Gas[20]+300
gas3 %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Last Five Years of The Gas Data with outlier at the End")

## Warning: Removed 2 row(s) containing missing values (geom_path).

#The seasonal data shows less effect, the trend shows a better pattern than with
#an outlier at the middle and shows a more similar patter as the original one but
#with a peak at the end.

#8.Recall your retail time series data (from Exercise 8 in Section 2.10). 
#Decompose the series using X-11. Does it reveal any outliers, 
#or unusual features that you had not noticed previously?
myseries %>%
  model(classical_decomposition(Turnover,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Multiplicative decomposition of Retail")

## Warning: Removed 6 row(s) containing missing values (geom_path).

myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components() %>%
  autoplot()+
  labs(title = "X-11 decomposition of Retail")

#This method is able to capture more noise in the early 1990s which was a 
#recession and it is able to capture the seasonality better. 
#There are a few data points that look to be more irregular that were not seen before.

#9a.It shows an overall increasing trend of the number of persons in the civilian
#labor force in Australia each month from February 1978 to around 1990, then the
#increasing rate slows down. The seasonal data also shows a slightly increasing variation 
#of a peak followed by a decreasing and then raise to another peak.
#b.Yes, the recession of 1991/1992 is visible in the remainder component.

#10.This exercise uses the canadian_gas data (monthly Canadian gas production 
#in billions of cubic metres, January 1960 – February 2005).
#a.Plot the data using autoplot(), gg_subseries() and gg_season() to look at the
#effect of the changing seasonality over time.
View(canadian_gas)
canadian_gas %>%
  autoplot(Volume)+
  labs(title = "Monthly Canadian Gas Production 1960-2005")

canadian_gas %>%
  gg_subseries(Volume)+
  labs(title = "Monthly Canadian Gas Production 1960-2005")

canadian_gas %>%
  gg_season(Volume)+
  labs(title = "Monthly Canadian Gas Production 1960-2005")

#It shows an increasing trend from 1960 to around 1971, then followed by a slightly
#decreasing trend till 1990, and ends with another raise after 1990.And the gas
#production usually decreases in summer and increases in winter.

#b.Do an STL decomposition of the data. You will need to choose a seasonal window 
#to allow for the changing shape of the seasonal component.
canadian_gas %>%
  model(
    STL(Volume ~ trend(window = 10) +
          season(window = 10),
        robust = TRUE)) %>%
  components() %>%
  autoplot()+
  labs(title="SLT Decomposition of Canadian Gas")

#c.How does the seasonal shape change over time?
#The seasonal shape shows an increase in variation from 1970 till around 1985,
#then followed by a decrease in variation after that.

#d.Can you produce a plausible seasonally adjusted series?
canadian_gas %>%
  model(
    STL(Volume ~ trend(window = 10) +
          season(window = 10),
        robust = TRUE)) %>%
  components() %>%
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Volume, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(title = "STL decomposition of Canadian Gas") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

#e.Compare the results with those obtained using SEATS and X-11. How are they different?
#SEATS
canadian_gas %>%
  model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
  components() %>%
  autoplot() +
  labs(title ="SEATS Decomposition of Canadian Gas Production")

#x-11
canadian_gas %>%
  model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
  components() %>%
  autoplot()+
  labs(title = "X-11 decomposition of Canadian Gas")

#SEATS shows a greater variation than x-11 in irregular component.

###############################################################################
##########Chapter 7 exercises
#1.Half-hourly electricity demand for Victoria, Australia is contained in vic_elec. 
#Extract the January 2014 electricity demand, and aggregate this data to daily 
#with daily total demands and maximum temperatures.
View(vic_elec)
jan14_vic_elec <- vic_elec %>%
  filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
  index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Temperature = max(Temperature)
  )
#a.Plot the data and find the regression model for Demand with temperature as a 
#predictor variable. Why is there a positive relationship?
jan14_vic_elec %>%
  as.data.frame()%>%
  ggplot(aes(x=Temperature, y=Demand)) +
  labs(title="Electricity Demand for Victoria, Australia 2014 Jan",
       y="Electricity Demand",x="Temperature")+
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

tslm_elec<-jan14_vic_elec %>%
  model(TSLM(Demand~Temperature))%>%
  report()

## Series: Demand 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -49978.2 -10218.9   -121.3  18533.2  35440.6 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  59083.9    17424.8   3.391  0.00203 ** 
## Temperature   6154.3      601.3  10.235 3.89e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24540 on 29 degrees of freedom
## Multiple R-squared: 0.7832,  Adjusted R-squared: 0.7757
## F-statistic: 104.7 on 1 and 29 DF, p-value: 3.8897e-11

#The regression output indicates that the higher the temperature, people will demand
#more electric power. One reason could be that people need to use air conditioner.

#b.Produce a residual plot. Is the model adequate? Are there any outliers or 
#influential observations?
jan14_vic_elec %>%
  as.data.frame()%>%
  ggplot(aes(x=Temperature, y=Demand)) +
  labs(title="Electricity Demand for Victoria, Australia 2014 Jan",
       y="Electricity Demand",x="Temperature")+
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

tslm_elec %>% gg_tsresiduals()

#The model is adequate. The residuals tend to increase gradually over the time.
#Most of the residuals is within the range of 0 to 30,000 but there was outliers.

#c.Use the model to forecast the electricity demand that you would expect for 
#the next day if the maximum temperature was 15∘C and compare it with the forecast 
#if the with maximum temperature was  35∘C. 
#Do you believe these forecasts? The following R code will get you started:
newscenarios<-scenarios(
  "Temperature of 15∘C" = new_data(jan14_vic_elec, 1) %>%
    mutate(Temperature = 15),
  "Temperature of 35∘C" = new_data(jan14_vic_elec, 1) %>%
    mutate(Temperature = 35),
  names_to = "Scenario")
  
fcast <- forecast(tslm_elec,newscenarios)
fcast

## # A fable: 2 x 6 [1D]
## # Key:     Scenario, .model [2]
##   Scenario            .model    Date                   Demand  .mean Temperature
##   <chr>               <chr>     <date>                 <dist>  <dbl>       <dbl>
## 1 Temperature of 15∘C TSLM(Dem… 2014-02-01 N(151398, 6.8e+08) 1.51e5          15
## 2 Temperature of 35∘C TSLM(Dem… 2014-02-01 N(274484, 6.4e+08) 2.74e5          35

#Temperature of 15∘C: 1.51e5, Temperature of 35∘C: 2.74e5. I do believe this forecast
#it's constant with the previous result that a higher temperature is associated
#with a higher demand of electric power.

jan14_vic_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan14_vic_elec, 1) %>%
      mutate(Temperature = 15)
  ) %>%
  autoplot(jan14_vic_elec)

jan14_vic_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan14_vic_elec, 1) %>%
      mutate(Temperature = 35)
  ) %>%
  autoplot(jan14_vic_elec)

#d.Given prediction intervals for your forecasts
lm <- lm(Demand ~ Temperature, data=jan14_vic_elec)
forecast(lm, newdata=data.frame(Temperature=c(15,35)))

##           Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 151398.35       151398.4 117127.2 185669.5  97951.22 204845.5
## 274484.25       274484.2 241333.0 307635.5 222783.69 326184.8

#e.Plot Demand vs Temperature for all of the available data  in vic_elec 
#aggregated to daily total demand and maximum temperature. 
#What does this say about your model?
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Temperature")

#The temperature does have a relationship with electricity demand, but it's not
#a linear relationship. Below the temperature of 20, the electricity demand shows
#a negative relationship with the temperature; however, after 20, it shows a positive
#relationship between temperature and eletricity demand.

#2.Data set olympic_running contains the winning times (in seconds) in each 
#Olympic Games sprint, middle-distance and long-distance track events from 1896 to 2016.
#a.Plot the winning time against the year for each event. Describe the main features of the plot.
View(olympic_running)
autoplot(olympic_running)

## Plot variable not specified, automatically selected `.vars = Time`

#It shows a obvious decreasing trend for 10,000 mem's and women's running over years
#It also shows missing values for some periods

#b.Fit a regression line to the data for each event. Obviously the winning times 
#have been decreasing, but at what average rate per year?
#filter data into male and female
male<-olympic_running%>%
  filter(Sex=='men')

female<-olympic_running%>%
  filter(Sex=='women')
View(female)
#Male Running Section:
fitted1 = male %>%
  group_by(Length) %>% do(model = lm(Time~Year, data =.))
fitted1$model

## [[1]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    35.01158     -0.01261  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    69.37650     -0.02488  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##   172.48148     -0.06457  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    405.8457      -0.1518  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    843.4367      -0.3151  
## 
## 
## [[6]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##     2853.20        -1.03  
## 
## 
## [[7]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    6965.459       -2.666

#100m: time using decreases by 0.01261 per year
#200m: time using decreases by 0.02488 per year
#400m: time using decreases by0.06457 per year
#800m: time using decreases by 0.1518 per year
#1,500m: time using decreases by 0.3151 per year
#5,000m: time using decreases by 1.03 per year
#10,000m: time using decreases by 2.666 per year

#Female Running Section:
fitted2 = female %>%
  group_by(Length) %>% do(model = lm(Time~Year, data =.))
fitted2$model

## [[1]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    39.18816     -0.01419  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    89.13761     -0.03363  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##   129.39165     -0.04008  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##     511.369       -0.198  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    -50.9926       0.1468  
## 
## 
## [[6]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    1504.175       -0.303  
## 
## 
## [[7]]
## 
## Call:
## lm(formula = Time ~ Year, data = .)
## 
## Coefficients:
## (Intercept)         Year  
##    8825.260       -3.496

#100m: time using decreases by 0.01419 per year
#200m: time using decreases by 0.03363 per year
#400m: time using decreases by 0.04008 per year
#800m: time using decreases by 0.198 per year
#1,500m: time using decreases by 0.1468 per year
#5,000m: time using decreases by 0.303 per year
#10,000m: time using decreases by 3.496 per year

#c.Plot the residuals against the year. 
#What does this indicate about the suitability of the fitted lines?
mens100<-male%>%
  filter(Length==100)
men100<-lm(Time~Year,data=mens100)
checkresiduals(men100)

## 
##  Breusch-Godfrey test for serial correlation of order up to 6
## 
## data:  Residuals
## LM test = 2.2103, df = 6, p-value = 0.8994

#p-value>0.05, residuals are not correlated each other, also the histogram shows
#a slightly left skewed.
mens1500<-male%>%
  filter(Length==1500)
men1500<-lm(Time~Year,data=mens1500)
checkresiduals(men1500)

## 
##  Breusch-Godfrey test for serial correlation of order up to 6
## 
## data:  Residuals
## LM test = 5.4204, df = 6, p-value = 0.4911

#p-value>0.05, residuals are not correlated each other, also the histogram shows
#a slightly right skewed.

women100<-female%>%
  filter(Length==100)
fwomen100<-lm(Time~Year,data=women100)
checkresiduals(fwomen100)

## 
##  Breusch-Godfrey test for serial correlation of order up to 5
## 
## data:  Residuals
## LM test = 1.482, df = 5, p-value = 0.9151

#p-value>0.05, residuals are not correlated each other.
women1500<-female%>%
  filter(Length==1500)
fwomen1500<-lm(Time~Year,data=women1500)
checkresiduals(fwomen1500)

## 
##  Breusch-Godfrey test for serial correlation of order up to 5
## 
## data:  Residuals
## LM test = 3.116, df = 5, p-value = 0.6821

#p-value>0.05, residuals are not correlated each other, also the histogram shows
#a slightly left skewed.

#d.Predict the winning time for each race in the 2020 Olympics. 
#Give a prediction interval for your forecasts. 
#What assumptions have you made in these calculations?
mens100 %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(
    new_data(olympic_running, 1) %>%
      mutate(Year = 2020)
  ) %>%
  autoplot(olympic_running)

mens1500 %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(
    new_data(olympic_running, 1) %>%
      mutate(Year = 2020)
  ) %>%
  autoplot(olympic_running)

women100 %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(
    new_data(olympic_running, 1) %>%
      mutate(Year = 2020)
  ) %>%
  autoplot(olympic_running)

women1500 %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(
    new_data(olympic_running, 1) %>%
      mutate(Year = 2020)
  ) %>%
  autoplot(olympic_running)

#All of the four graphs show that the time using for 2020 will decrease, which
#is consitent with the previous finding.

#3. logy=β0 + β1logx + ε could be also wrote as
#y=e^(β0 + β1logx + e)
#dy/dx=(β1/x)e^(β0 + β1logx + e)
#dy/dx = β1*y/x
#β1=(dy/dx)*(x/y) --> β1 measures the ratio of the percentage change in y
#to the percentage change in x, so, β1 is the elasticity coefficient.

#4.The data set souvenirs concerns the monthly sales figures of a shop which opened
#in Jan 1987 and sells gifts, souvenirs, and novelties. The shop is situated on
#the wharf at a beach resort town in Queensland, Australia. The sales volume varies
#with the seasonal population of tourists. There is a large influx of visitors to
#the town at Christmas and for the local surfing festival, held every March since 1988.
#Over time, the shop has expanded its premises, range of products, and staff.
#a.Produce a time plot of the data and describe the patterns in the graph
#Identify any unusual or unexpected fluctuations in the time series.
View(souvenirs)
autoplot(souvenirs)

## Plot variable not specified, automatically selected `.vars = Sales`

#The plot shows an increasing trend with seasonal variation. One unusual pattern
#was between year of 1993 and 1994, it shows a greater turbulence than before.

#b.Explain why it is necessary to take logarithms of these data before fitting a model.
#Because logarithms can handle a non-linear relationship and transform a skewed
#variable into one that is more approximately normal for better prediction.
#It's necessary to transfer to logarithms to keep a constant seasonal pattern.

#c.Fit a regression model to the logarithms of these sales data with a linear trend, 
#seasonal dummies and a “surfing festival” dummy variable.
souvenirs_ts<-ts(souvenirs$Sales,start=1987,end=1993,frequency=12)
head(souvenirs_ts,84)

##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1987  1664.81  2397.53  2840.71  3547.29  3752.96  3714.74  4349.61  3566.34
## 1988  2499.81  5198.24  7225.14  4806.03  5900.88  4951.34  6179.12  4752.15
## 1989  4717.02  5702.63  9957.58  5304.78  6492.43  6630.80  7349.62  8176.62
## 1990  5921.10  5814.58 12421.25  6369.77  7609.12  7224.75  8121.22  7979.25
## 1991  4826.64  6470.23  9638.77  8821.17  8722.37 10209.48 11276.55 12552.22
## 1992  7615.03  9849.69 14558.40 11587.33  9332.56 13082.09 16732.78 19888.61
## 1993 10243.24                                                               
##           Sep      Oct      Nov      Dec
## 1987  5021.82  6423.48  7600.60 19756.21
## 1988  5496.43  5835.10 12600.08 28541.72
## 1989  8573.17  9690.50 15151.84 34061.01
## 1990  8093.06  8476.70 17914.66 30114.41
## 1991 11637.39 13606.89 21822.11 45060.69
## 1992 23933.38 25391.35 36024.80 80721.71
## 1993

fest<- rep(0, length(souvenirs_ts))
fest[seq_along(fest)%%12 == 3] = 1
fest[3] = 0
fest<- ts(fest, freq = 12, start=c(1987,1))
souvenirs_ts1<- data.frame(souvenirs_ts, fest)
souvenirs_tslm<- tslm(BoxCox(souvenirs_ts1$souvenirs_ts,0)~season+trend+fest)
souvenirs_tslm

## 
## Call:
## tslm(formula = BoxCox(souvenirs_ts1$souvenirs_ts, 0) ~ season + 
##     trend + fest)
## 
## Coefficients:
## (Intercept)      season2      season3      season4      season5      season6  
##     7.67063      0.27302      0.21925      0.36583      0.41414      0.43996  
##     season7      season8      season9     season10     season11     season12  
##     0.57677      0.54076      0.62991      0.72431      1.19625      1.94798  
##       trend         fest  
##     0.02064      0.56067

#d.Plot the residuals against time and against the fitted values. Do these plots
#reveal any problems with the model?
autoplot(souvenirs_tslm$residuals)

#The plot shows the residuals are around 0. So, the logarithms transformation
#does improve the model.

#e.Do boxplots of the residuals for each month. Does this reveal any problems with 
#the model?
boxplot(residuals(souvenirs_tslm)~cycle(residuals(souvenirs_tslm)))

#Yes, the wide range residuals for several months (e.g., month 3) become an issue
#that prevent the model to fit the residuals.

#f.What do the values of the coefficients tell you about each variable?
souvenirs_tslm$coefficients

## (Intercept)     season2     season3     season4     season5     season6 
##  7.67063278  0.27301763  0.21924940  0.36583070  0.41414455  0.43995797 
##     season7     season8     season9    season10    season11    season12 
##  0.57676896  0.54076308  0.62990870  0.72431141  1.19625228  1.94797768 
##       trend        fest 
##  0.02064238  0.56067468

#The coefficients shows an increasing trend in sales over the year.

#g.What does the Ljung-Box test tell you about your model?
checkresiduals(souvenirs_tslm)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals from Linear regression model
## LM test = 32.702, df = 17, p-value = 0.01229

#The p-value of 0.012 (<0.05) shows that the residuals are correlated.
#The histogram almost shows a normal distribution

#h.Regardless of your answers to the above questions, use your regression model
#to predict the monthly sales for 1994, 1995, and 1996. Produce prediction intervals
#for each of your forecasts.
fore <- data.frame(fest = rep(0, 47))
preds <- forecast(souvenirs_tslm, newdata=fore)
preds

##          Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## Feb 1993       9.471186  9.217087  9.725285  9.078883  9.863490
## Mar 1993       9.438060  9.099830  9.776290  8.915867  9.960253
## Apr 1993       9.605284  9.351185  9.859383  9.212980  9.997588
## May 1993       9.674240  9.420141  9.928339  9.281937 10.066544
## Jun 1993       9.720696  9.466597  9.974795  9.328392 10.113000
## Jul 1993       9.878149  9.624050 10.132249  9.485846 10.270453
## Aug 1993       9.862786  9.608687 10.116885  9.470482 10.255089
## Sep 1993       9.972574  9.718475 10.226673  9.580270 10.364877
## Oct 1993      10.087619  9.833520 10.341718  9.695315 10.479923
## Nov 1993      10.580202 10.326103 10.834301 10.187899 10.972506
## Dec 1993      11.352570 11.098471 11.606669 10.960266 11.744874
## Jan 1994       9.425235  9.171780  9.678689  9.033926  9.816543
## Feb 1994       9.718895  9.460927  9.976862  9.320619 10.117171
## Mar 1994       9.685769  9.342813 10.028724  9.156280 10.215258
## Apr 1994       9.852993  9.595025 10.110960  9.454716 10.251269
## May 1994       9.921949  9.663981 10.179916  9.523673 10.320225
## Jun 1994       9.968405  9.710437 10.226372  9.570128 10.366681
## Jul 1994      10.125858  9.867890 10.383826  9.727582 10.524134
## Aug 1994      10.110494  9.852527 10.368462  9.712218 10.508771
## Sep 1994      10.220282  9.962315 10.478250  9.822006 10.618559
## Oct 1994      10.335327 10.077360 10.593295  9.937051 10.733604
## Nov 1994      10.827911 10.569943 11.085878 10.429635 11.226187
## Dec 1994      11.600278 11.342311 11.858246 11.202002 11.998555
## Jan 1995       9.672943  9.415130  9.930757  9.274905 10.070981
## Feb 1995       9.966603  9.703880 10.229326  9.560985 10.372221
## Mar 1995       9.933477  9.585149 10.281806  9.395693 10.471262
## Apr 1995      10.100701  9.837978 10.363424  9.695083 10.506319
## May 1995      10.169657  9.906934 10.432380  9.764039 10.575276
## Jun 1995      10.216113  9.953390 10.478836  9.810495 10.621731
## Jul 1995      10.373566 10.110843 10.636290  9.967948 10.779185
## Aug 1995      10.358203 10.095480 10.620926  9.952585 10.763821
## Sep 1995      10.467991 10.205268 10.730714 10.062373 10.873609
## Oct 1995      10.583036 10.320313 10.845759 10.177418 10.988654
## Nov 1995      11.075619 10.812896 11.338342 10.670001 11.481237
## Dec 1995      11.847987 11.585264 12.110710 11.442369 12.253605
## Jan 1996       9.920652  9.657609 10.183694  9.514540 10.326763
## Feb 1996      10.214312  9.945993 10.482630  9.800055 10.628569
## Mar 1996      10.181186  9.826866 10.535505  9.634152 10.728220
## Apr 1996      10.348410 10.080091 10.616728  9.934152 10.762667
## May 1996      10.417366 10.149047 10.685684 10.003109 10.831623
## Jun 1996      10.463822 10.195503 10.732140 10.049564 10.878079
## Jul 1996      10.621275 10.352956 10.889594 10.207018 11.035532
## Aug 1996      10.605911 10.337593 10.874230 10.191654 11.020168
## Sep 1996      10.715699 10.447381 10.984018 10.301442 11.129956
## Oct 1996      10.830744 10.562426 11.099063 10.416487 11.245002
## Nov 1996      11.323328 11.055009 11.591646 10.909071 11.737585
## Dec 1996      12.095696 11.827377 12.364014 11.681438 12.509953

#i.How could you improve these predictions by modifying the model?
#Instead of using the actual sales numbers, it might be better to calculate the
#percentage change in sales first then fit the model. Since the percentage unit
#can provide reduce the spikes.

#5.The us_gasoline series consists of weekly data for supplies of US finished 
#motor gasoline product, from 2 February 1991 to 20 January 2017. 
#The units are in “million barrels per day”. Consider only the data to the end of 2004
#a.Fit a harmonic regression with trend to the data. Experiment with changing 
#the number Fourier terms. Plot the observed gasoline and fitted values and comment on what you see.
View(us_gasoline)
autoplot(us_gasoline)

## Plot variable not specified, automatically selected `.vars = Barrels`

gas_ts<-ts(us_gasoline$Barrels,start=(1991-1),end=(2017-1),frequency=52)
gas <- window(gas_ts, end = 2005)
for(num in c(1, 3, 5, 10, 20)){
  var_name <- paste("fit",
                    as.character(num),
                    "_gasoline_2004",
                    sep = "")
  
  assign(var_name,
         tslm(gas ~ trend + fourier(
           gas,
           K = num
         ))
  )
  
  print(
    autoplot(gas) +
      autolayer(get(var_name)$fitted.values,
                series = as.character(num)) +
      ggtitle(var_name) +
      ylab("gasoline") +
      guides(colour = guide_legend(title = "Number of Fourier Transform pairs"))
  )
}

#The higher the k value, the more fitted the line shows. k=20 shows the the most
#fitted line.

#b.Select the appropriate number of Fourier terms to include by minimizing the AICc or CV value.
for(i in c(1, 3, 5, 10, 20)){
  fit_gasoline_2004.name <- paste(
    "fit", as.character(i), "_gasoline_2004",
    sep = ""
  )
  writeLines(
    paste(
      "\n", fit_gasoline_2004.name, "\n"
    )
  )
  print(CV(get(fit_gasoline_2004.name)))
}

## 
##  fit1_gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  8.012058e-02 -1.969129e+03 -1.969052e+03 -1.945827e+03  8.427750e-01 
## 
##  fit3_gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.470266e-02 -2.023838e+03 -2.023604e+03 -1.981892e+03  8.541546e-01 
## 
##  fit5_gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.365185e-02 -2.034993e+03 -2.034518e+03 -1.974405e+03  8.569479e-01 
## 
##  fit10_gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.173572e-02 -2.056054e+03 -2.054595e+03 -1.948861e+03  8.624864e-01 
## 
##  fit20_gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.367553e-02 -2.036919e+03 -2.031785e+03 -1.836514e+03  8.624881e-01

#The statistic data shows that k=10 minimize the CV (7.17) and AICc (-2.05).

#c.Plot the residuals of the final model using the gg_tsresiduals() function 
#and comment on these. Use a Ljung-Box test to check for residual autocorrelation.
checkresiduals(fit10_gasoline_2004)

## 
##  Breusch-Godfrey test for serial correlation of order up to 104
## 
## data:  Residuals from Linear regression model
## LM test = 128.54, df = 104, p-value = 0.05168

#The time plot shows some changing variation over time.
#The histogram nearly shows a normal distribution.
#The significant Ljung-Box test is greater than 0.05. The autocorrelation plot
#shows spikes four times.

#d.Generate forecasts for the next year of data and plot these along with 
#the actual data for 2005. Comment on the forecasts.
fc_gasoline_2005 <- forecast(
  fit10_gasoline_2004,
  newdata=data.frame(fourier(
    gas, K = 10, h = 52)
  )
)
fc_gasoline_2005

##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## 2005.019       8.901276 8.557367  9.245186 8.374931  9.427622
## 2005.038       8.940132 8.596126  9.284138 8.413638  9.466626
## 2005.058       8.982049 8.638038  9.326060 8.455548  9.508551
## 2005.077       9.056949 8.712952  9.400946 8.530469  9.583429
## 2005.096       9.143925 8.799930  9.487920 8.617448  9.670401
## 2005.115       9.192581 8.848582  9.536579 8.666099  9.719062
## 2005.135       9.185356 8.841349  9.529363 8.658862  9.711851
## 2005.154       9.160913 8.816906  9.504919 8.634419  9.687406
## 2005.173       9.169559 8.825559  9.513560 8.643075  9.696044
## 2005.192       9.216866 8.872868  9.560865 8.690384  9.743348
## 2005.212       9.264222 8.920221  9.608223 8.737736  9.790708
## 2005.231       9.281951 8.937946  9.625957 8.755458  9.808444
## 2005.250       9.285826 8.941821  9.629831 8.759334  9.812318
## 2005.269       9.312227 8.968226  9.656228 8.785741  9.838713
## 2005.288       9.367159 9.023159  9.711159 8.840675  9.893643
## 2005.308       9.416957 9.072955  9.760958 8.890470  9.943444
## 2005.327       9.433266 9.089261  9.777272 8.906774  9.959758
## 2005.346       9.433725 9.089720  9.777729 8.907234  9.960216
## 2005.365       9.463951 9.119950  9.807952 8.937465  9.990437
## 2005.385       9.540677 9.196677  9.884678 9.014193 10.067162
## 2005.404       9.625983 9.281981  9.969985 9.099496 10.152471
## 2005.423       9.665636 9.321631 10.009640 9.139144 10.192127
## 2005.442       9.647059 9.303055  9.991063 9.120569 10.173550
## 2005.462       9.610131 9.266130  9.954132 9.083645 10.136617
## 2005.481       9.601794 9.257794  9.945795 9.075310 10.128279
## 2005.500       9.629756 9.285754  9.973759 9.103268 10.156244
## 2005.519       9.664433 9.320428 10.008438 9.137941 10.190924
## 2005.538       9.676302 9.332298 10.020306 9.149812 10.202792
## 2005.558       9.658713 9.314712 10.002714 9.132227 10.185199
## 2005.577       9.615773 9.271772  9.959773 9.089288 10.142258
## 2005.596       9.544821 9.200818  9.888823 9.018333 10.071309
## 2005.615       9.445303 9.101298  9.789309 8.918811  9.971795
## 2005.635       9.341433 8.997429  9.685437 8.814943  9.867923
## 2005.654       9.279412 8.935411  9.623412 8.752926  9.805897
## 2005.673       9.290336 8.946336  9.634336 8.763851  9.816820
## 2005.692       9.357646 9.013643  9.701648 8.831157  9.884134
## 2005.712       9.428370 9.084364  9.772376 8.901877  9.954863
## 2005.731       9.457460 9.113457  9.801464 8.930971  9.983950
## 2005.750       9.437719 9.093720  9.781719 8.911235  9.964203
## 2005.769       9.389996 9.045997  9.733996 8.863513  9.916480
## 2005.788       9.337513 8.993510  9.681516 8.811024  9.864001
## 2005.808       9.298876 8.954869  9.642883 8.772382  9.825371
## 2005.827       9.296233 8.952230  9.640237 8.769744  9.822723
## 2005.846       9.346559 9.002561  9.690557 8.820078  9.873040
## 2005.865       9.430551 9.086553  9.774549 8.904070  9.957032
## 2005.885       9.480414 9.136411  9.824418 8.953925 10.006904
## 2005.904       9.423219 9.079208  9.767229 8.896719  9.949719
## 2005.923       9.251762 8.907762  9.595762 8.725278  9.778246
## 2005.942       9.047458 8.703470  9.391446 8.520993  9.573924
## 2005.962       8.918927 8.574940  9.262915 8.392462  9.445392
## 2005.981       8.911344 8.567381  9.255307 8.384917  9.437771
## 2006.000       8.978870 8.634891  9.322849 8.452418  9.505323

gas2005 = window(gas_ts, start = 2005, end = 2006)

autoplot(gas2005, series = "True", 
         ylab = "Weekly Gas Supply", xlab = "Year", 
         main = "Forecast of 2005 vs Actual 2005 Gasoline") + 
  autolayer(fc_gasoline_2005$mean, series = "Forecast")

#By comparing the forecast and actual 2005 gasoline value, it shows that the forecast
#is able to predict a similar pattern for the actual value.

#6.The annual population of Afghanistan is available in the global_economy data set.
#a.Plot the data and comment on its features. Can you observe the effect of the Soviet-Afghan war?
View(global_economy)
global_economy %>%
  filter(Country=="Afghanistan") %>%
  tsibble(key = Code, index = Year)%>%
  autoplot(Population) +
  labs(title= "Afghanistan Population",
       y = "Population")

#Yes,the Soviet-Afghan war occurred in 1979-1989. From the Afghanistan population chart
#we can see that a decreasing among that period.

#b.Fit a linear trend model and compare this to a piece wise linear trend model 
#with knots at 1980 and 1989.
overall<-global_economy %>%
  filter(Country=="Afghanistan")%>%
  model(TSLM(Population ~ Year))
report(overall)

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5794518 -2582559   744761  2259222  6036280 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -829292529   49730866  -16.68   <2e-16 ***
## Year            425774      25008   17.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3188000 on 56 degrees of freedom
## Multiple R-squared: 0.8381,  Adjusted R-squared: 0.8352
## F-statistic: 289.9 on 1 and 56 DF, p-value: < 2.22e-16

#before the war
beforewar<-global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year<1980)%>%
  model(TSLM(Population ~ Year))
report(beforewar)

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -146380.8 -110290.6    -451.2  105877.8  202881.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -470734527    8787062  -53.57   <2e-16 ***
## Year            244657       4462   54.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115100 on 18 degrees of freedom
## Multiple R-squared: 0.994,   Adjusted R-squared: 0.9937
## F-statistic:  3007 on 1 and 18 DF, p-value: < 2.22e-16

#after the war
afterwar<-global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year>1989)%>%
  model(TSLM(Population ~ Year))
report(afterwar)

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -619234 -212927    6598  234280  612277 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.670e+09  1.640e+07  -101.8   <2e-16 ***
## Year         8.451e+05  8.184e+03   103.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 349800 on 26 degrees of freedom
## Multiple R-squared: 0.9976,  Adjusted R-squared: 0.9975
## F-statistic: 1.066e+04 on 1 and 26 DF, p-value: < 2.22e-16

#c.Generate forecasts from these two models for the five years after the end of 
#the data, and comment on the results.
forecast(beforewar,h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  1980 N(1.4e+07, 1.6e+10) 13686612.
## 2 Afghanistan TSLM(Population ~ Year)  1981 N(1.4e+07, 1.7e+10) 13931270.
## 3 Afghanistan TSLM(Population ~ Year)  1982 N(1.4e+07, 1.7e+10) 14175927.
## 4 Afghanistan TSLM(Population ~ Year)  1983 N(1.4e+07, 1.8e+10) 14420584.
## 5 Afghanistan TSLM(Population ~ Year)  1984 N(1.5e+07, 1.8e+10) 14665241.

forecast(afterwar,h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  2018 N(3.6e+07, 1.4e+11) 35925602.
## 2 Afghanistan TSLM(Population ~ Year)  2019 N(3.7e+07, 1.4e+11) 36770747.
## 3 Afghanistan TSLM(Population ~ Year)  2020 N(3.8e+07, 1.4e+11) 37615892.
## 4 Afghanistan TSLM(Population ~ Year)  2021 N(3.8e+07, 1.5e+11) 38461037.
## 5 Afghanistan TSLM(Population ~ Year)  2022 N(3.9e+07, 1.5e+11) 39306182.

forecast(overall,h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  2018   N(3e+07, 1.1e+13) 29919575.
## 2 Afghanistan TSLM(Population ~ Year)  2019   N(3e+07, 1.1e+13) 30345349.
## 3 Afghanistan TSLM(Population ~ Year)  2020 N(3.1e+07, 1.1e+13) 30771123.
## 4 Afghanistan TSLM(Population ~ Year)  2021 N(3.1e+07, 1.1e+13) 31196897.
## 5 Afghanistan TSLM(Population ~ Year)  2022 N(3.2e+07, 1.1e+13) 31622671.

#The estimated coefficient of population shows an obvious decrease after war

Ch3&7 Exercise

Rachel Zhou

4/3/2022