Email : juliansalomo2@gmail.com
RPubs : https://rpubs.com/juliansalomo/
Department : Business Statistics
Address : ARA Center, Matana University Tower
Jl. CBD Barat Kav, RT.1, Curug Sangereng, Kelapa Dua, Tangerang, Banten 15810.
global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?global_economy %>% mutate('GDPPC' = GDP / Population) %>% select(Country, Year, GDPPC) %>% arrange(desc(GDPPC)) %>% head(10)## # A tsibble: 10 x 3 [1Y]
## # Key: Country [2]
## Country Year GDPPC
## <fct> <dbl> <dbl>
## 1 Monaco 2014 185153.
## 2 Monaco 2008 180640.
## 3 Liechtenstein 2014 179308.
## 4 Liechtenstein 2013 173528.
## 5 Monaco 2013 172589.
## 6 Monaco 2016 168011.
## 7 Liechtenstein 2015 167591.
## 8 Monaco 2007 167125.
## 9 Liechtenstein 2016 164993.
## 10 Monaco 2015 163369.
global_economy %>% autoplot(GDP / Population) + guides(color=FALSE)global_economyglobal_economy %>% filter(Code =="USA") %>% autoplot(GDP)global_economy %>% filter(Code =="USA") %>% autoplot(GDP/Population) I’m doing a population transformation here, and as we can see that the two plots aren’t that different
aus_livestockaus_livestock %>% filter(State=="Victoria", Animal=="Bulls, bullocks and steers") %>% autoplot(Count)vic_elec.vic_elec %>% autoplot(Demand)vic_elec %>%
group_by(Date) %>%
index_by(Date = yearweek(Time)) %>%
summarise(Demand = sum(Demand)) %>%
autoplot(Demand)I’m doing a calendar transformation here so we can do plot-based analysis, because plots without transformation are terrible. And as we can see, after the transformation the plot looks a lot better.
aus_productionaus_production %>% autoplot(Gas)lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))This time I did a Box-Cox transformation, because the variance increases and decreases with the series level. After we perform the transformation, we can see that the variances look similar.
canadian_gas data?canadian_gas %>% autoplot(Volume)lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda))+
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))We were unable to stabilize the variance after selecting the best value of lambda and applying box cox to the time series results. The box-cox transformation produces similar results to the original, as shown by the comparison of both autoplots; in other words, the transformation does not provide stationarity to time series.
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>% autoplot(Turnover)myseries %>% autoplot(box_cox(Turnover, lambda))aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.# Tobacco from `aus_production`
aus_production %>% autoplot(Tobacco)lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda))+
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Tobacco production with $\\lambda$ = ",
round(lambda,2))))# Economy class passengers between Melbourne and Sydney from `ansett`
MELSYD <-ansett %>%
filter(Class == 'Economy', Airports=='MEL-SYD')
MELSYD %>%
autoplot(Passengers)lambda <- MELSYD %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
ansett %>%
filter(Class == 'Economy', Airports=='MEL-SYD') %>%
autoplot(box_cox(Passengers, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Passengers production with $\\lambda$ = ",
round(lambda,2))))# Pedestrian counts at Southern Cross Station from `pedestrian`
SCS <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
group_by(Sensor) %>%
index_by(Date = yearweek(Date_Time)) %>%
summarise(Count = sum(Count))
SCS %>%
autoplot(Count)lambda <- SCS %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
SCS %>%
autoplot(box_cox(Count, lambda))A 3X5 MA is when a 3-MA follows a moving of a 5-MA series. The calculation for \(3\times5\) MA:
\[ \begin{align} 5MA&=-,-,\frac{Y_1+Y_2+Y_3+Y_4+Y_5}{5},\frac{Y_2+Y_3+Y_4+Y_5+Y_6}{5}, \frac{Y_3+Y_4+Y_5+Y_6+Y_7}{5}, -, - \\ 3\times5MA&= -,-,-,\frac{\frac{Y_1+Y_2+Y_3+Y_4+Y_5}{5}+\frac{Y_2+Y_3+Y_4+Y_5+Y_6}{5}+ \frac{Y_3+Y_4+Y_5+Y_6+Y_7}{5}}{3}, -, - \\ &=-,-,-,\frac{{Y_1+Y_2+Y_3+Y_4+Y_5}+{Y_2+Y_3+Y_4+Y_5+Y_6}+ {Y_3+Y_4+Y_5+Y_6+Y_7}}{15}, -, - \\ &=-,-,-,\frac{Y_1}{15}+\frac{2 Y_2}{15}+\frac{Y_3}{5}+\frac{Y_4}{5}+\frac{Y_5}{5}+\frac{2 Y_6}{15}+ \frac{Y_7}{15}, -, - \end{align} \]
Now we take coefficient for each variables and turn them into decimal number.
paste(round(c(1/15,2/15,1/5, 1/5, 2/15, 1/15),3))## [1] "0.067" "0.133" "0.2" "0.2" "0.133" "0.067"
The number is match which is mean \(3\times5\)-MA is equivalent to a 7-term weighted moving average
aus_production.gas <- tail(aus_production, 5*4) %>% select(Gas)gas %>%
autoplot(Gas) + labs(y = "Petajoules")There is some strong seasonality and a trend.
classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.decomp <- gas %>%
model(decomp = classical_decomposition(Gas, type = "multiplicative")) %>%
components()
decomp %>% autoplot()Yes,The decomposition has captured the seasonality and a slight trend
as_tsibble(decomp) %>%
autoplot(season_adjust) +
labs(title = "Seasonally adjusted data", y = "Petajoules")gas %>%
mutate(Gas = if_else(Quarter == yearquarter("2008Q1"), Gas + 300, Gas)) %>%
model(decomp = classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust) +
labs(title = "Seasonally adjusted data", y = "Petajoules")Since the outlier has changed the estimation of the seasonal component, the “seasonally adjusted” data now indicates some seasonality.
gas %>%
mutate(Gas = if_else(Quarter == yearquarter("2010Q2"), Gas + 300, Gas)) %>%
model(decomp = classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust) +
labs(title = "Seasonally adjusted data", y = "Petajoules")Since the outlier is in a part of the data where the pattern cannot be estimated, the seasonally adjusted data now display no seasonality.
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == "A3349337W")x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title =
"X11 decomposition of retail data.")It’s worthwhile to acknowledge that the magnitude of the seasonal data decreases over time, but the upward trend continues. Although it’s difficult to see in the normal plot of the results, I’d assume that the seasonal swing was much stronger prior to 1990.
Figure 3.19: Decomposition of the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
Figure 3.20: Seasonal component from the decomposition shown in the previous figure.
stl_labour <- stl(labour, s.window = 13, robust = TRUE)
autoplot(labour, series="Data") +
autolayer(trendcycle(stl_labour), series="Trend") +
autolayer(seasadj(stl_labour), series="Seasonally Adjusted") +
xlab("Year") + ylab("Number of people") +
ggtitle("Number of people in the civilian labour force in Australia") +
scale_colour_manual(values=c("gray","blue","red"),
breaks=c("Data","Seasonally Adjusted","Trend"))The number of people working in the civilian sector in Australia has risen steadily over time. There were significant recessions in 1991 and 1992, which were reflected in seasonally adjusted results. I discovered that there were only minor changes in the seasonal dimension over time as compared to the original data scale using ggsubseriesplot.
It can be seen in the data that has been seasonally adjusted, that there were significant recessions in 1991 and 1992.
canadian_gas data (monthly Canadian gas production in billions of cubic metres, January 1960 – February 2005).autoplot(), gg_subseries() and gg_season() to look at the effect of the changing seasonality over time.canadian_gas %>% autoplot(Volume)canadian_gas %>% gg_subseries(Volume)canadian_gas %>% gg_season(Volume)stl_cangas <-canadian_gas %>%
model(
STL(Volume ~ trend(window = 21) +
season(window = 13),
robust = TRUE)) %>%
components()The number of consecutive years to use in estimating each value in the seasonal component is given by seasonal window.
gg_season().]stl_cangas %>%
gg_season(Volume)Over time we can see that the shape of the plot changes. Initially, the plot shapes throughout the year were quite the same, but over time there were noticeable differences over the years
plauseasonally <- canadian_gas %>%
model (stl = STL(Volume)) %>%
components()
plauseasonally %>%
as_tsibble() %>%
autoplot(Volume, color = "black") +
geom_line(aes(y=season_adjust), color = "red")+
labs (title = "Canadian gas's Total Volume")#X11ff
x11_dcmp <- canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume~x11())) %>%
components()
autoplot(x11_dcmp)+labs(title = "Decomposition of canadian gas using x11")#SEATS
x11_dcmp <- canadian_gas %>%
model(seats = X_13ARIMA_SEATS(Volume~seats())) %>%
components()
autoplot(x11_dcmp)+labs(title = "Decomposition of canadian gas using SEATS")