Consider the GDP information in global_economy. Plot the GDP per capita for each country over time.
Plotted only the top 10 countries based on the sum of GDP per capita, and found:
1-Monaco and Lichtenstein have the highest GDP per capita (low population, very wealthy individuals)
2-Of the top 10 gdp per capita countries, only 1 is above 10 million in population, the US, which is over 300 million.
3-Large amounts of wealth are concentrated in small countries with populations under 10 million.
global_economy$gdp_pc <-global_economy$GDP/global_economy$Population
global_economy %>%
tsibble(key = Code, index = Year) %>%
autoplot(gdp_pc, show.legend = FALSE) +
theme_ft_rc() +
labs(title = "GDP per Capita",
y = "USD$")+
scale_y_continuous(labels = scales::dollar)
top_10_gdpc <- as.data.frame(
global_economy %>%
mutate(sum = gdp_pc) %>%
as.tibble() %>%
group_by(Country) %>%
summarise(top_gdp_pc = sum(sum, na.rm = T)) %>%
slice_max(top_gdp_pc,n=10))
knitr::kable(top_10_gdpc,"html")
| Country | top_gdp_pc |
|---|---|
| Monaco | 4064271 |
| Liechtenstein | 3200305 |
| Luxembourg | 2398911 |
| Norway | 1945344 |
| Switzerland | 1857141 |
| Bermuda | 1740885 |
| Qatar | 1604869 |
| Denmark | 1495016 |
| Sweden | 1443351 |
| United States | 1437519 |
global_economy %>%
filter(Country %in%
c(
"Monaco",
"Liechtenstein",
"Luxembourg",
"Norway",
"Switzerland",
"Bermuda",
"Qatar",
"Denmark",
"Sweden",
"United States")) %>%
autoplot(gdp_pc) +
theme_ft_rc() +
labs(title= "Top 10 Countries",
subtitle = "GDP per Capita",
y = "$US",
x = "Year") +
scale_y_continuous(labels = scales::dollar)
global_economy %>%
filter(Country %in%
c(
"Monaco",
"Liechtenstein",
"Luxembourg",
"Norway",
"Switzerland",
"Bermuda",
"Qatar",
"Denmark",
"Sweden"
#"United States"
)) %>%
autoplot(Population/1000000) +
theme_ft_rc() +
labs(title= "Top 10 Countries GDP per Capita",
subtitle = "Population excl. US",
y = "Millions of People",
x = "Year") +
scale_y_continuous(labels = scales::number)
For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
United States GDP from global_economy
U.S. Economy –> stole code from the web… and from Tim
Since in the book–and in real life–we should be always adjusting money for inflation, since the value of money is not constant over time, I found it prudent from Tim, and from Hyndman to adjust for inflation.
The real GDP per capita was anchored at 2002–after 911, in order to see the real increase over time Growth is assumed over time, but in real terms, GDP per capita in 2017 at about USD 60K, is about USD 43K in 2002 US dollars.
US_econ <-
global_economy %>%
filter(Country == "United States") %>%
select(Country,
Code,
Year,
GDP,
CPI,
Population,
gdp_pc)
US_econ <-
US_econ %>%
mutate(CPI_2002 = US_econ$CPI[US_econ$Year == 2002]) %>%
mutate(real_gdp = GDP*CPI_2002/CPI) %>%
mutate(real_gdp_pc = real_gdp/Population)
plot_real_gdp <-
ggplot(data = US_econ) +
theme_ft_rc() +
geom_line(aes(
x= Year
,y= GDP/1000000000)
,color = "#fc595f") +
geom_line(aes(
x= Year
,y= real_gdp/1000000000)
,color = "#468499") +
labs(x = "Year",
y = "GDP, Billions USD$",
title = "Nominal and Real GDP",
subtitle = "United States") +
scale_y_continuous(labels = scales::dollar)
plot_real_gdp_pc <-
ggplot(data = US_econ) +
theme_ft_rc() +
geom_line(aes(
x= Year
,y= gdp_pc/1000)
,color = "#fc595f") +
geom_line(aes(
x= Year
,y= real_gdp_pc/1000)
,color = "#468499") +
labs(x = "Year",
y = "GDP, Thousands USD$",
title = "Nominal and Real GDP per Capita",
subtitle = "United States") +
scale_y_continuous(labels = scales::dollar)
plot_real_gdp
plot_real_gdp_pc
#ggplotly(plot_real_gdp) #subtitles don't work with plotly
#ggplotly(plot_real_gdp_pc)
Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.
Seasonality appears clear, so, when decomposing: -no trend appears over the last 10 years. -Cattle Cycles are usually between 4-7 years, that is why I took the last 8
knitr::kable(head(aus_livestock), "html")
| Month | Animal | State | Count |
|---|---|---|---|
| 1976 Jul | Bulls, bullocks and steers | Australian Capital Territory | 2300 |
| 1976 Aug | Bulls, bullocks and steers | Australian Capital Territory | 2100 |
| 1976 Sep | Bulls, bullocks and steers | Australian Capital Territory | 2100 |
| 1976 Oct | Bulls, bullocks and steers | Australian Capital Territory | 1900 |
| 1976 Nov | Bulls, bullocks and steers | Australian Capital Territory | 2100 |
| 1976 Dec | Bulls, bullocks and steers | Australian Capital Territory | 1800 |
knitr::kable(unique(aus_livestock$Animal), "html")
| x |
|---|
| Bulls, bullocks and steers |
| Calves |
| Cattle (excl. calves) |
| Cows and heifers |
| Lambs |
| Pigs |
| Sheep |
vic_sltr_bull <-
aus_livestock %>%
filter(State == "Victoria") %>%
filter(Animal == "Bulls, bullocks and steers") %>%
mutate(`5-MA` = slider::slide_dbl(Count, mean,.before = 12, .after = 12, .complete = TRUE))
vic_sltr_bull %>%
autoplot(Count) +
geom_line(aes(y=`5-MA`),color="#D55E00")+
labs(x = "Month",
y = "Count",
color = "#fc595f9",
title = "Victoria Bull Slaughter") +
theme_ft_rc()
#---------
vic_sltr_bull_dcmp <-
vic_sltr_bull %>%
filter(year(Month) > 2009)
vic_sltr_bull_dcmp %>%
model(ETS(Count)) %>%
components() %>% autoplot() +
theme_ft_rc()
Victoria Electricity Demand from vic_elec
After the aggregation and decomposing, we can see that demand increases into the winter and decreases into the summer.
we can also notice that the trend is downward, where demand for electricity has gone down over time.
#vic_elec %>%
#model(ETS(Demand)) %>%
#components() %>% autoplot() + theme_calc(theme_dark())
#impossible, need to aggregate -- found how to aggregate online
v <- vic_elec %>%
group_by(Date) %>% #with dplyr
mutate(Demand = mean(Demand)) %>%
distinct(Date, Demand)
v %>%
as_tsibble(index = Date) %>%
autoplot(Demand) +
labs(title= "Daily Avg. Electricity Demand",
subtitle = "Victoria, Australia",
y = "Demand, MegaWatts") +
theme_ft_rc()
v %>%
mutate(Date = yearmonth(Date)) %>%
group_by(Date) %>%
summarise(Demand = mean(Demand)) %>%
as_tsibble(index = Date) %>%
autoplot(Demand) +
labs(title= "Monthly Avg. Electricity Demand",
subtitle = "Victoria, Australia",
y = "Demand, MegaWatts") +
theme_ft_rc()
v_1 <- v %>%
mutate(Date = yearmonth(Date)) %>%
group_by(Date) %>%
summarise(Demand = mean(Demand)) %>%
as_tsibble(index = Date)
v_1 %>%
model(ETS(Demand))%>%
components() %>% autoplot() +
theme_ft_rc()
Gas production from aus_production
Removed data prior to 1980. It looks like after that year, the trend is clearer
It also shows that production is highly seasonal.
when performing an ETS model, the slope was “Additive”, while the seasonal component was relatively clear.
aus_production %>%
autoplot(Gas) +
theme_ft_rc() +
labs(title = "Australian Gas Production")
aus_gas <-
aus_production %>%
filter(year(Quarter)>1980)
aus_gas %>%
autoplot(Gas) +
theme_ft_rc() +
labs(title = "Australian Gas Production")
aus_gas %>%
model(ETS(Gas)) %>%
components() %>% autoplot() +
theme_ft_rc()
Candian Gas- Why is a Box-Cox transformation unhelpful for the canadian_gas data?
Initially, it appears that Canadian Gas production goes in “Cycles”, as in:
-it has periods of stationarity, and linear behavior that could be segmented. -However, the Box-Cox transformation in this case is not useful because it did not help reduce the variance over-time. -In other words, when adjusted, it is almost the same variance as the original.
#inspect data
view_canadian_gas <-as.data.frame(canadian_gas)
rm(view_canadian_gas)
canadian_gas %>%
autoplot(Volume) +
labs(x = "Month"
, y = "Volume"
, title = "Canadian Gas, Raw"
) +
theme_ft_rc() +
geom_line(col = "#CE22D6")
canadian_gas%>%
summarise(Volume = sum(Volume)) %>%
gg_subseries(Volume)
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume,lambda)) +
labs(y = "Cubic meters",
title = paste0("Monthly Canadian Gas where lambda = ",round(lambda,4))) +
theme_ft_rc() +
geom_line(col = "#CE22D6")
Turnover: Retail turnover in $Million AUD
1 - there is definitely seasonality 2 - Variance is not constant, so, yes Box Cox, theoretically, could help 3 - After running the Box-Cox transformation, variance was reduced.
#inspect data
#temp_aus_retail <-as.data.frame(aus_retail)
#temp_aus_retail
#rm(temp_aus_retail)
set.seed(12345678)
myseries_aus_ret <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries_aus_ret) +
labs(x = "Date"
, y = "Retail turnover in $Million AUD"
, title = "Australian Retail Turnover"
, subtitle = "Restaurants and catering services") +
theme_ft_rc() +
geom_line(col = "#CE22D6")
## Plot variable not specified, automatically selected `.vars = Turnover`
# gg_season(myseries_aus_ret)
# gg_subseries(myseries_aus_ret)
# gg_lag(myseries_aus_ret)
lambda_retail <-
myseries_aus_ret %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries_aus_ret %>%
autoplot(box_cox(Turnover, lambda_retail)) +
labs(y = paste("Turnover - lambda = ", round(lambda_retail, 2)),
title = "Monthly Retail Turnover",
subtitle = paste("Box-Cox Transformation with lambda = ", round(lambda_retail, 2))) +
theme_ft_rc() +
geom_line(col = "#CE22D6")
Find appropriate Box-Cox transformations for 3 series of aus_production
Tobacco
Lambda at 0.93, not a major transformation, so not great (closer to 1, almost) Variance was reduced, but it appears it wont help much.
aus_production %>%
autoplot(Tobacco) +
labs(title = "Australian Tobacco Production"
, subtitle = "By Quarter"
, x = "Quarter"
, y = "Tonnes") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
lambda_tobacco <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda_tobacco)) +
labs(title = "Australian Tobacco Production"
, subtitle = paste("By Quarter lambda = ", round(lambda_tobacco, 2))
, x = "Quarter"
, y = "Tonnes") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
Ansett Airline Passengers
The results were terrible with the Box-Cox transformation. I suspect because of the zero values.
#Inspect quickly
#head(ansett)
#unique(ansett$Airports)
ansett %>%
filter(Class=="Economy",
Airports=="MEL-SYD") %>%
autoplot(Passengers) +
labs(x = "Week",
y = "Passengers",
title = "Weekly Passengers",
subtitle = "Economy, Melbourne to Sydney, 1987 - 1992") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
lambda_ansett <-
ansett %>%
filter(Class=="Economy",
Airports=="MEL-SYD") %>%
features(Passengers/1000000, features = guerrero) %>%
pull(lambda_guerrero)
ansett %>%
filter(Class=="Economy",
Airports=="MEL-SYD") %>%
autoplot(box_cox(Passengers, lambda_ansett)) +
labs(y = "Passengers"
, x = "Week"
, title = "Weekly Passengers"
, subtitle = paste("Economy, Melbourne to Sydney, 1987 - 1992, lambda = ", round(lambda_ansett,2))) +
theme_ft_rc() +
geom_line(col = "#1B9E77") +
scale_y_continuous(labels = scales::number)
Hourly Pedestrian
High lambda, but transformation appeared ok, but the variance is large. I suspect that further aggregation could help, but I was unable to compress this further.
#inspect data
#head(pedestrian)
pedestrian %>%
filter(Sensor == "Southern Cross Station")%>%
autoplot(Count)+
labs(title = "Hourly pedestrian counts") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
#seems aggregation could help
pedestrian_SCS <-
pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
index_by(Date = as.Date(Date_Time)) %>%
summarize(Count = sum(Count))
pedestrian_SCS %>%
autoplot(Count) +
labs(x = "Date",
y = "Count",
title = "Daily Pedestrians") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
lambda_pedestrian <-
pedestrian_SCS %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
pedestrian_SCS %>%
autoplot(box_cox(Count, lambda_pedestrian)) +
labs(title = paste("Daily Pedestrians, lambda = ", round(lambda_pedestrian, 2)),
subtitle = "Southern Cross Station"
, y = "Pedestrians"
, x = "Date"
) +
theme_ft_rc() +
geom_line(col = "#1B9E77")
Show that a 3×5 MA is equivalent to a 7-term weighted moving average with weights of 0.067, 0.133, 0.200, 0.200, 0.200, 0.133, and 0.067.
Answer
A 3X5 MA is when a 3-MA follows a moving of a 5-MA series. So, first it is a 5 moving average at position 3 in order to “center it” (think in excel terms from position 1 to 5 in rows, but the CMA is at 3, but averaging 1:5, 1:2 before, the one in the center, 3, and 2 ahead 4:5)
= 1/15(Y1) 2/15(Y2) 3/15(Y3) 3/15(Y4) 3/15(Y5) 2/15(Y6) 1/15(Y7) = 0.067 0.133 0.2 0.2 0.2 0.133 0.067
Australian Production, Gas again
Seasonality is clear –> Production increases into Q2 and peaks in Q3 Yes results on decomposition do support a strong seasonal behavior, as well as an upward linear trend.
When we change and add 300:
gas <- tail(aus_production, 5*4) %>% select(Gas)
autoplot(gas, Gas)+
labs(title = "Quarterly Australian Gas Production"
, subtitle = "Q3 2005 - Q2 2010"
, y = "petajoules"
, x = "Quarter") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
gas %>%
gg_subseries(Gas) +
labs(title = "Quarterly Australian Gas Production",
subtitle = "Subseries Plot") +
theme_ft_rc() +
geom_line(col = "#1B9E77")
gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title = "Classical Multiplicative Decomposition"
, subtitle = "Australian Gas Production") +
theme_ft_rc()
#computations - Seasonal
gas_decom <- gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components()
gas_decom %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
color = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Australian Gas Production") +
theme_ft_rc() +
scale_colour_manual(
values = c("#1B9E77", "red", "orange"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
# Change and add 300
# when adding the outlier, we can see it clearly.It changed the trend though
# seasonality seemed relatively similar.
# when adding the outlier at the end --> trend changes, but original seasonality
# remains.
gas_out <- gas
gas_out$Gas[9] <- gas_out$Gas[12]+300
gas_out %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
theme_ft_rc()
gas_out2 <- gas
gas_out2$Gas[20] <- gas_out2$Gas[20]+300
gas_out2 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
theme_ft_rc()
Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
Recall your retail time series data (from Exercise 8 in Section 2.10).
While the initial chart shows an apparent clear upward trend, seasonality changes over time, meaning, the variance is greater at the beginning and end. The decomposition also shows that the error or “irregularity” term is greater at the beginning of the time series.
x11_decomp <-
myseries_aus_ret %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_decomp) +
labs(title ="Decomposition of Australian Department Stores Turnover using X-11.") +
theme_ft_rc()
Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
Answer
Seasonality also changes heading into 1990, and its variance also increases. The irregular term experiences a “shock” effect somewhere in 1991 or 1992; the gg_season chart shows December is the highest peak month, and March is not too far off.
This exercise uses the canadian_gas data (monthly Canadian gas production in billions of cubic metres, January 1960 – February 2005).
Plot the data using autoplot(), gg_subseries() and gg_season() to look at the effect of the changing seasonality over time.
Do an STL decomposition of the data. You will need to choose a seasonal window to allow for the changing shape of the seasonal component.
How does the seasonal shape change over time? [Hint: Try plotting the seasonal component using gg_season().]
Can you produce a plausible seasonally adjusted series?
Compare the results with those obtained using SEATS and X-11. How are they different?
Answers
Variance of the seasonal component is far wider around 1975 - 1990, and while this variance is reduced thereafter, seasonality also appears to change.
X11 shows much greater variance in the seasonal component at the beginning of the time series
When using X11 and Seats yielded similar results. The irregular term is greatly reduced at the end compared to STL.
#a.
canadian_gas %>%
autoplot(Volume)+
labs(title = "Monthly Canadian Gas Production"
, subtitle = "contiguous"
,y = "billions of cubic meter")+
theme_ft_rc()+
geom_line(col = "#1B9E77")
canadian_gas %>%
gg_subseries(Volume)+
labs(title = "Monthly Canadian Gas Production"
, subtitle = "subseries"
,y = "billions of cubic meter")
canadian_gas %>%
gg_season(Volume)+
labs(title = "Monthly Canadian Gas Production"
, subtitle = "seasonal"
,y = "billions of cubic meters")+
theme_ft_rc()
#decomp
can_stl <-
canadian_gas %>%
model(STL(Volume ~ trend(window = 21) +
season(window = 13), robust = TRUE)) %>%
components()
autoplot(can_stl)+
labs(title = "STL decomposition of Canadian Gas Production") +
theme_ft_rc()
#10.d
canadian_gas %>%
model(
STL(Volume ~ trend(window = 21) +
season(window = 13),
robust = TRUE)) %>%
components() %>%
ggplot(aes(x = Month)) +
geom_line(aes(y = Volume, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(title = "STL decomposition of Canadian Gas Production") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
) +
theme_fivethirtyeight()
#10 e
canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
components() %>%
autoplot()+
labs(title = "X-11 Decomp of Canadian Gas Production") +
theme_fivethirtyeight()
canadian_gas %>%
model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
components() %>%
autoplot() +
labs(title ="SEATS Decomp of Canadian Gas Production") +
theme_fivethirtyeight()