#install and load libary
#install.packages("fpp3")
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.1.6 ✔ tsibble 1.1.3
## ✔ dplyr 1.0.7 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.1.4 ✔ feasts 0.3.0
## ✔ lubridate 1.8.0 ✔ fable 0.3.2
## ✔ ggplot2 3.3.5 ✔ fabletools 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tsibble)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
3.1 Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
summary(global_economy)
## Country Code Year GDP
## Afghanistan : 58 ABW : 58 Min. :1960 Min. :8.824e+06
## Albania : 58 AFG : 58 1st Qu.:1974 1st Qu.:2.155e+09
## Algeria : 58 AGO : 58 Median :1989 Median :1.483e+10
## American Samoa: 58 ALB : 58 Mean :1989 Mean :1.034e+12
## Andorra : 58 AND : 58 3rd Qu.:2003 3rd Qu.:1.839e+11
## Angola : 58 ARB : 58 Max. :2017 Max. :8.074e+13
## (Other) :14802 (Other):14802 NA's :3322
## Growth CPI Imports Exports
## Min. :-64.047 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.617 1st Qu.: 13.46 1st Qu.: 21.61 1st Qu.: 17.54
## Median : 3.913 Median : 53.86 Median : 31.13 Median : 26.95
## Mean : 3.909 Mean : 56.93 Mean : 38.03 Mean : 33.39
## 3rd Qu.: 6.242 3rd Qu.: 90.28 3rd Qu.: 49.08 3rd Qu.: 42.25
## Max. :149.973 Max. :4583.70 Max. :427.58 Max. :433.22
## NA's :3756 NA's :7480 NA's :4554 NA's :4563
## Population
## Min. :4.279e+03
## 1st Qu.:9.251e+05
## Median :6.345e+06
## Mean :2.060e+08
## 3rd Qu.:4.220e+07
## Max. :7.530e+09
## NA's :3
# I would like to check what kind of data that global_economy contain.
head(global_economy)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
# look a little bit deeper to see the actual data
global_economy %>%
autoplot(GDP/Population, show.legend = FALSE) +
labs(title= "GDP per capita for each country", y = "Dollar", x ="change over year")
## Warning: Removed 3242 row(s) containing missing values (geom_path).
#set up a formula to get GDP per capita for each country
GDPpercapita <- global_economy %>%
group_by(Country, GDP, Population) %>%
summarise(GDPPerCap = GDP/Population) %>%
arrange(desc(GDPPerCap))
#sorting from the highest to lowest
head(GDPpercapita)
## # A tsibble: 6 x 5 [1Y]
## # Key: Country, GDP, Population [6]
## # Groups: Country, GDP [6]
## Country GDP Population Year GDPPerCap
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Monaco 7060236168. 38132 2014 185153.
## 2 Monaco 6476490406. 35853 2008 180640.
## 3 Liechtenstein 6657170923. 37127 2014 179308.
## 4 Liechtenstein 6391735894. 36834 2013 173528.
## 5 Monaco 6553372278. 37971 2013 172589.
## 6 Monaco 6468252212. 38499 2016 168011.
#getting the top one. we can see Monaco has the highest GDPPerCap, the next is Liechtenstein
3.2 For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
United States GDP from global_economy. Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock. Victorian Electricity Demand from vic_elec. Gas production from aus_production.
#United States GDP from global_economy
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP) +
labs(title= "United States GDP from global_economy")
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP/Population) +
labs(title= "Edited United States GDP from global_economy")
#trying to see the GDP per Person like question 3.1
#Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
autoplot(Count) +
labs(title= "Slaughter of Victorian “Bulls, bullocks and steers")
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
mutate(Year = year(Month)) %>%
index_by(Year) %>%
summarise(Count = sum(Count)) %>%
autoplot(Count) +
labs(title= "Edited Slaughter of Victorian “Bulls, bullocks and steers")
#trying to see oversee the change by year since it is a bit too busy by month.
#Victorian Electricity Demand from vic_elec.
vic_elec %>%
autoplot(Demand)+
labs(title= "Victorian Electricity Demand")
vic_elec %>%
mutate(Date = yearmonth(Date)) %>%
index_by(Date) %>%
summarise(Demand = sum(Demand)) %>%
autoplot(Demand)+
labs(title= "Edited Victorian Electricity Demand")
#try to use summarise(Demand = sum(Demand)) to make it more readable.
#Gas production from aus_production
aus_production %>%
autoplot(Gas)+
labs(title= "Gas production")
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda))+
labs(title= "Gas production")
#A good value of λ is one which makes the size of the seasonal variation about the same across the whole series
Why is a Box-Cox transformation unhelpful for the canadian_gas data?
canadian_gas %>%
autoplot(Volume)
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda))
#the value of λ for canadian_gas does not make the size of the seasonal variation about the same across the whole series
3.4 What Box-Cox transformation would you select for your retail data (from Exercise 8 in Section 2.10)?
#from ex.8 in section 2.1
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries, Turnover)
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>%
autoplot(box_cox(Turnover, lambda))
lambda
## [1] 0.08303631
#the λ is 0.083
3.5 For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.
#Tobacco from aus_production
autoplot(aus_production, Tobacco)
## Warning: Removed 24 row(s) containing missing values (geom_path).
lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(title= "Edited aus_production")
## Warning: Removed 24 row(s) containing missing values (geom_path).
#Economy class passengers between Melbourne and Sydney from ansett
econfrommeltosyd <- ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD")
econfrommeltosyd %>% autoplot(Passengers)
lambda <- econfrommeltosyd %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
econfrommeltosyd %>%
autoplot(box_cox(Passengers, lambda))+
labs(title= "Edited econfrommeltosyd")
#Pedestrian counts at Southern Cross Station from pedestrian
Southerncount <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
group_by(Sensor) %>%
index_by(Week = yearweek(Date_Time)) %>%
summarise(Count = sum(Count))
Southerncount %>% autoplot(Count)
lambda <- Southerncount %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
Southerncount %>%
autoplot(box_cox(Count, lambda)) +
labs(title= "Edited Southerncount")
3.7 Consider the last five years of the Gas data from aus_production.
gas <- tail(aus_production, 5*4) |> select(Gas) Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle? Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices. Do the results support the graphical interpretation from part a? Compute and plot the seasonally adjusted data. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier? Does it make any difference if the outlier is near the end rather than in the middle of the time series?
gas <- tail(aus_production, 5*4) %>%
select(Gas)
#get it from the question
autoplot(gas, Gas) +
labs(title= "plot for a.")
#there is a trend that starting of the year is low, and it rising during Q3.
gas_multi <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_multi) %>%
autoplot()+
labs(title= "plot for b.")
## Warning: Removed 2 row(s) containing missing values (geom_path).
#ans of c, yes, we can see the increase trend from plot for b match the plot for a.
components(gas_multi) %>%
as_tsibble() %>%
autoplot(Gas) +
geom_line(aes(y=season_adjust)) +
labs(title= "plot for d.")
#pick the highest gas which is 252
gas %>%
mutate(Gas = ifelse(Gas == 252, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas) +
geom_line(aes(y=season_adjust))
# one line does not got affected as much as the other.
#pick the 2020Q2 which is 236
gas %>%
mutate(Gas = ifelse(Gas == 236, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas) +
geom_line(aes(y=season_adjust))
#2 lines are really close
3.8 Recall your retail time series data (from Exercise 8 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
x11_decomposition <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_decomposition) +
labs(title = "X11")
#from 2000 to 2010, it has less noise.
Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
Decomposition of the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995. Figure 3.19: Decomposition of the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
Seasonal component from the decomposition shown in the previous figure. Figure 3.20: Seasonal component from the decomposition shown in the previous figure.
Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.
ans: we can see a increasing trend besides there is a drop around 1991, the trend is flater during that period of time.
Is the recession of 1991/1992 visible in the estimated components?
ans: yes, as we mentioned early, i am more interested to see if 2019 to 2023 count as recession if there is a data and plot like this one.