Email : juliansalomo2@gmail.com
RPubs : https://rpubs.com/juliansalomo/
Department : Business Statistics
Address : ARA Center, Matana University Tower
Jl. CBD Barat Kav, RT.1, Curug Sangereng, Kelapa Dua, Tangerang, Banten 15810.
?gafa_stock
paste("gafa_stock is a time series of Historical stock prices from 2014-2018 for Google, Amazon, Facebook and Apple. ")## [1] "gafa_stock is a time series of Historical stock prices from 2014-2018 for Google, Amazon, Facebook and Apple. "
datatable(gafa_stock,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('Table GAFA stock prices.')),
extensions = 'FixedColumns',
option = list(scrollX = TRUE, fixedColumns = TRUE)
)Autoplot of gafa_stock
gafa_stock %>% autoplot(Close)Time interval of gafa_stock
interval(gafa_stock)## <interval[1]>
## [1] !
exclamation mark means there is no fix interval for gafa_stock
?PBS
paste("PBS is a monthly tsibble with two values:Scripts (Total number of scripts), Cost (Cost of the scripts in $AUD)")## [1] "PBS is a monthly tsibble with two values:Scripts (Total number of scripts), Cost (Cost of the scripts in $AUD)"
head(PBS)## # A tsibble: 6 x 9 [1M]
## # Key: Concession, Type, ATC1, ATC2 [1]
## Month Concession Type ATC1 ATC1_desc ATC2 ATC2_desc Scripts Cost
## <mth> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 1991 Jul Concession~ Co-pa~ A Alimentary ~ A01 STOMATOLOG~ 18228 67877
## 2 1991 Aug Concession~ Co-pa~ A Alimentary ~ A01 STOMATOLOG~ 15327 57011
## 3 1991 Sep Concession~ Co-pa~ A Alimentary ~ A01 STOMATOLOG~ 14775 55020
## 4 1991 Oct Concession~ Co-pa~ A Alimentary ~ A01 STOMATOLOG~ 15380 57222
## 5 1991 Nov Concession~ Co-pa~ A Alimentary ~ A01 STOMATOLOG~ 14371 52120
## 6 1991 Dec Concession~ Co-pa~ A Alimentary ~ A01 STOMATOLOG~ 15028 54299
Autoplot of PBS
PBS %>% autoplot(Cost)Time interval of PBS
interval(PBS)## <interval[1]>
## [1] 1M
1M means the time interval of PBS is monthly
?vic_elec
paste("vic_elec is a half-hourly tsibble with three values")## [1] "vic_elec is a half-hourly tsibble with three values"
datatable(vic_elec,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('Table Half-hourly electricity demand for Victoria, Australia.')),
extensions = 'FixedColumns',
option = list(scrollX = TRUE, fixedColumns = TRUE)
)Autoplot of vic_elec
vic_elec %>% autoplot(Demand)Time interval of vic_elec
interval(vic_elec)## <interval[1]>
## [1] 30m
30m means the time interval of vic_elec is half-hourly
?pelt
paste("Pelt is a time series of Hudson Bay Company trading records for Snowshoe Hare and Canadian Lynx furs from 1845 to 1935. This data contains trade records for all areas of the company.")## [1] "Pelt is a time series of Hudson Bay Company trading records for Snowshoe Hare and Canadian Lynx furs from 1845 to 1935. This data contains trade records for all areas of the company."
datatable(pelt,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('Table Pelt trading records.')),
extensions = 'FixedColumns',
option = list(scrollX = TRUE, fixedColumns = TRUE)
)Autoplot of pelt
pelt %>% autoplot(Lynx)Time interval of pelt
interval(pelt)## <interval[1]>
## [1] 1Y
1Y means the time interval of pelt is yearly
gafa_stock %>% group_by(Symbol) %>% filter(Close == max(Close))## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
tute1 <- readr::read_csv("tute1.csv")
datatable(tute1,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('Quarterly sales for a small company over the period 1981-2005')),
extensions = 'FixedColumns',
option = list(scrollX = TRUE, fixedColumns = TRUE)
)mytimeseries <- tute1 %>%
mutate(Quarter = yearmonth(Quarter)) %>%
as_tsibble(index = Quarter)mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")Check what happens when you don’t include facet_grid().
mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() When we don’t include facet_grid(), all of the plot will not be divided and will be placed in one graph. Also there will be no name tag for each plot which means we can only read it from the color only.
USgas package contains data on the demand for natural gas in the US.USgas package.#install.packages("USgas")
library(USgas)us_total <- us_total %>%
as_tsibble(index = year, key = state)new_england<- us_total %>%
group_by(state) %>%
filter(state %in% c('Maine', 'Vermont', 'New
Hampshire', 'Massachusetts',
'Connecticut' ,'Rhode Island')) %>%
ungroup()
new_england %>% autoplot(y)tourism.xlsx here and read it into R using readxl::read_excel().tourism <-read_excel("tourism.xlsx")
datatable(tourism,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('Tourism')),
extensions = 'FixedColumns',
option = list(scrollX = TRUE, fixedColumns = TRUE)
)tourism_tsibble <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(key = c("Region", "State", "Purpose"),index = "Quarter")tourism_tsibble %>% group_by(Region, Purpose) %>%
summarise(Trips = mean(Trips)) %>%
ungroup() %>%
filter(Trips == max(Trips))## # A tsibble: 1 x 4 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter Trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
tourism_tsibble %>%
group_by(State) %>%
summarise(Trips = sum(Trips)) %>%
ungroup()## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ... with 630 more rows
aus_production
aus_production %>% autoplot(Bricks)pelt
pelt %>% autoplot(Lynx)gafastock
gafa_stock %>% autoplot(Close)vic_elec
vic_elec %>% autoplot(Demand)vic_elec %>% ggplot(aes(x= Date, y= Demand, group = Holiday)) +
geom_line(aes(col=Holiday)) +
facet_grid(Holiday ~ ., scales ="free" )datatable(aus_arrivals,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('data set comprises quarterly international arrivals to Australia from Japan, New Zealand, UK and the US')),
extensions = 'FixedColumns',
option = list(scrollX = TRUE, fixedColumns = TRUE)
)aus_arrivals %>% autoplot(Arrivals)aus_arrivals %>% gg_season(Arrivals)aus_arrivals %>% gg_subseries(Arrivals)Explore your chosen retail time series using the following functions:
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))Explore your chosen retail time series using the following functions:
autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() %>% autoplot()
Can you spot any seasonality, cyclicity and trend? What do you learn about the series?
myseries %>% autoplot(Turnover)myseries %>% gg_season(Turnover)myseries %>% gg_subseries(Turnover)myseries %>% gg_lag(Turnover)myseries %>% ACF(Turnover) %>%
autoplot() * From the autoplot, we can see a clear seasonal or cyclic pattern in the time series, and a upward trend.
The seasonal plot shows that there are indeed seasonal patterns. The plot also reveals that there is a typical big jump every year in December, and a drop in February. Sales begin to increase in the fall, peaking between November and December, then decreasing after January, likely to coincide with holiday shopping and sales for Christmas.
The seasonal subseries offers a new perspective on seasonality by showing the monthly mean values. We see a large increase from November to December and a decrease from December to February, but also a small, decreasing trend in turnover from January to June and a similar increase from July to November, before the big spike from November to December.
*In this lag graph, the data is difficult to analyze. We can see some negative and positive relationships, but due to the high number of graphs and the fact that this is a monthly graph, it’s hard to tell much different.
autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and us_gasoline.“Total Private” Employed from us_employment
Priv <- us_employment %>%
filter(Title == "Total Private")
Priv %>% autoplot(Employed)Priv %>% gg_season(Employed)Priv %>% gg_subseries(Employed)Priv %>% gg_lag(Employed)Priv %>% ACF(Employed)## # A tsibble: 29 x 3 [1M]
## # Key: Series_ID [1]
## Series_ID lag acf
## <chr> <lag> <dbl>
## 1 CEU0500000001 1M 0.997
## 2 CEU0500000001 2M 0.993
## 3 CEU0500000001 3M 0.990
## 4 CEU0500000001 4M 0.986
## 5 CEU0500000001 5M 0.983
## 6 CEU0500000001 6M 0.980
## 7 CEU0500000001 7M 0.977
## 8 CEU0500000001 8M 0.974
## 9 CEU0500000001 9M 0.971
## 10 CEU0500000001 10M 0.968
## # ... with 19 more rows
Bricks from aus_production
aus_production %>% autoplot(Bricks)aus_production %>% gg_season(Bricks)aus_production %>% gg_subseries(Bricks)aus_production %>% gg_lag(Bricks)aus_production %>% ACF(Bricks)## # A tsibble: 22 x 2 [1Q]
## lag acf
## <lag> <dbl>
## 1 1Q 0.900
## 2 2Q 0.815
## 3 3Q 0.813
## 4 4Q 0.828
## 5 5Q 0.720
## 6 6Q 0.642
## 7 7Q 0.655
## 8 8Q 0.692
## 9 9Q 0.609
## 10 10Q 0.556
## # ... with 12 more rows
Hare from pelt
pelt %>% autoplot(Hare)pelt %>% gg_subseries(Hare)pelt %>% gg_lag(Hare)pelt %>% ACF(Hare)## # A tsibble: 19 x 2 [1Y]
## lag acf
## <lag> <dbl>
## 1 1Y 0.658
## 2 2Y 0.214
## 3 3Y -0.155
## 4 4Y -0.401
## 5 5Y -0.493
## 6 6Y -0.401
## 7 7Y -0.168
## 8 8Y 0.113
## 9 9Y 0.307
## 10 10Y 0.340
## 11 11Y 0.296
## 12 12Y 0.206
## 13 13Y 0.0372
## 14 14Y -0.153
## 15 15Y -0.285
## 16 16Y -0.295
## 17 17Y -0.202
## 18 18Y -0.0676
## 19 19Y 0.0956
“H02” Cost from PBS
H02 <- PBS %>% filter(ATC2 == "H02")
H02 %>% autoplot(Cost)H02 %>% gg_season(Cost)H02 %>% gg_subseries(Cost)H02 %>% ACF(Cost)## # A tsibble: 92 x 6 [1M]
## # Key: Concession, Type, ATC1, ATC2 [4]
## Concession Type ATC1 ATC2 lag acf
## <chr> <chr> <chr> <chr> <lag> <dbl>
## 1 Concessional Co-payments H H02 1M 0.834
## 2 Concessional Co-payments H H02 2M 0.679
## 3 Concessional Co-payments H H02 3M 0.514
## 4 Concessional Co-payments H H02 4M 0.352
## 5 Concessional Co-payments H H02 5M 0.264
## 6 Concessional Co-payments H H02 6M 0.219
## 7 Concessional Co-payments H H02 7M 0.253
## 8 Concessional Co-payments H H02 8M 0.337
## 9 Concessional Co-payments H H02 9M 0.464
## 10 Concessional Co-payments H H02 10M 0.574
## # ... with 82 more rows
us_gasoline
us_gasoline %>% autoplot(Barrels)us_gasoline %>% gg_season(Barrels)us_gasoline %>% gg_subseries(Barrels)us_gasoline %>% gg_lag(Barrels)us_gasoline %>% ACF(Barrels)## # A tsibble: 31 x 2 [1W]
## lag acf
## <lag> <dbl>
## 1 1W 0.893
## 2 2W 0.882
## 3 3W 0.873
## 4 4W 0.866
## 5 5W 0.847
## 6 6W 0.844
## 7 7W 0.832
## 8 8W 0.831
## 9 9W 0.822
## 10 10W 0.808
## # ... with 21 more rows
1-B, 2-A, 3-, 4-C
Victoria_Pig<- aus_livestock %>%
filter(State == "Victoria",
Animal == "Pigs",
between(year(Month),1990,1995))
Victoria_Pig %>% ACF(Count) %>% autoplot()Almost all of the spikes are out of the bounds which is confirming the series is not white noise.
dgoog <- gafa_stock %>%
filter(Symbol == "GOOG", year(Date) >= 2018) %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE) %>%
mutate(diff = difference(Close))Because when we use a filter for the GOOG symbol, the interval for GOOG by Date will be chaotic and not the same for each row, so we need to change our index to use the row sequence number so that the interval for each row is the same
google_stock <- gafa_stock %>%
filter(Symbol == "GOOG") %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE)
google_stock %>%
ACF(difference(Close)) %>%
autoplot()There are total 30 lags, 5% of 30 is
5/100*30## [1] 1.5
then, if there are more than 1.5 lags that out of the bound, the data series is not white noises.
From the plot above we can clearly see that there are 3 lags that out of the bound, then the data series is probably not white noise.