Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.
Use ? (or help()) to find out about the about the data in each series.
What is the time interval of each series?
Use autoplot() to produce a time plot of each series.
For the last plot, modify the axis labels and title.
Answer.
#install.packages("fpp3")
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.4
## ✔ dplyr 1.1.3 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.4.3 ✔ fabletools 0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
head(aus_production, 3)
head(pelt, 3)
head(gafa_stock)
head(vic_elec)
help(aus_production)
## starting httpd help server ... done
?pelt
Lynx contains data related to number of canadian Lynx pelts traded.
help("gafa_stock")
gafa_stock data contains the historical prices for Google, Amazon, Facebook and Apple. All prices are in $USD. The time series ‘Close’ contains the closing price of the each stock mentioned above for the given period.
help("vic_elec")
vic_elec contains the data for the half_hourly electricity demand for Victoria, Australia. The time series Demand has total electricity demand in MWh.
The time series Bricks from aus_production has time interval of 1 quarter.
The time interval of time series Lynx from pelt is 1 year.
The time interval of time series Close from gafa_stock is 1 day.
The time interval of time series Demand from vic_elec is 30 minutes
autoplot(aus_production, Bricks)+
labs(title = "Plot of the timeseries Bricks")
## Warning: Removed 20 rows containing missing values (`geom_line()`).
autoplot(pelt, Lynx)+
labs(title = "Plot of the timeseries Lynx")
autoplot(gafa_stock, Close)+
labs(title = "Plot of the timeseries Closing price of each stock")
autoplot(vic_elec, Demand)+
labs(title = "Plot of the timeseries Demand from the data vic_elec",
x="Time with interval 30 minutes",
y="Demand (MWh)")
Use filter() to find what days corresponding to the peak closing price for each of the four stocks in gafa_stock.
Answer.
gafa_stock|>
filter(Symbol=='AAPL')|>
filter(Close== max(Close))
gafa_stock|>
group_by(Symbol)|>
filter(Close==max(Close))
Hence, it can be seen that AAPL had its peak on 2018-10-03, AMZN had its peak 2018-09-04, FB had its peak on 2018-07-25, and GOOG had its peak on 2018-07-26
Download the file tute1.csv from the book website, open it in Excel or some other spreadsheet application, and review its contents. YOu should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for information.
a. Read the data into R
tute1<- readr::read_csv('tute1.csv')
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tute1)
b. Convert the data into time series
mytimeseries <- tute1|>
mutate(Quarter = yearquarter(Quarter))|>
as_tibble(index = Quarter)
head(mytimeseries, 3)
c. Construct time series plots of each of the three series.
mytimeseries |>
pivot_longer(-Quarter)|>
ggplot(aes(x=Quarter, y=value, color =name))+
geom_line()+
facet_grid(name ~., scales ='free_y')
mytimeseries |>
pivot_longer(-Quarter)|>
ggplot(aes(x=Quarter, y=value, color =name ))+
geom_line()
if we don’t include facet_grid() then all the plots are in the same canvas.
The USgas package contains data on the demand for natural gas in the US.
a. Install the USgas package
#install.packages("USgas")
b. Create tsibble from us_total with year as the index and state as the key.
library(USgas)
## Warning: package 'USgas' was built under R version 4.3.2
head(us_total)
us_tg <- us_total|>
mutate(year= year)|>
as_tsibble(key = state,
index =year)
head(us_tg)
c. PLot the annual natural gas consumption by state for the New England Area (comprising the states of Maine, Vermont, New Hampshire, Massachussetes, connecticut and Rhode Island).
us_tg|> filter(
state == 'Maine'|
state == "Vermont"|
state == "New Hampshire"|
state == "Connecticut"|
state == "Rhode Island"
)|>
ggplot(aes(x=year, y=y, col = state))+
geom_line()
a. Download tourism.xlsx from the book website and read into R using the readxl::read_excel()
tourism <- readxl::read_excel("tourism.xlsx")
head(tourism, 3)
b. Create a tsibble which is identical to the tourism tsibble package.
tourism<- tourism|>
mutate(Quarter = yearquarter(Quarter))|>
as_tsibble(key = c(Region, State, Purpose, Trips),index =Quarter)
head(tourism, 3)
c. Find what combination of Region and Purpose had a maximum number of overnight trips on average.
av_trips <- tourism|>
select(Region, Purpose, Trips)|>
group_by(Region, Purpose)|>
summarize(av_trip = mean(Trips))
av_trips|>
filter(av_trip==max(av_trip))
library(dplyr)
new_tourism <- tourism |> group_by(State, Purpose, Region)|>
summarize(
Total_trip = sum(Trips)
)
new_tourism
Use the following graphic function autoplot(), gg_season(), gg_subseries(), gg_log(), ACF() and explore features from the following time series: “Total Private” Employed from the us_employment, Bricks from aus_production, Hare from pelt, “Ho2” Cost from PBS and Barrels from us_gasoline.
Can you spot any seasonality, cyclicity and trend?
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?
Answer.
head(us_employment)
total_priv <- us_employment|>
filter(Title == "Total Private")|>
select(Employed)
autoplot(total_priv, Employed)+
labs(title = "plot of total private employed",
x= "Time (months)",
y= "Total number of private employed")
In this plot, the seasonality is clearly seen. The graph has certain period of seasonality. Also there is a strong upaward trend in the data from 1940 to 2020. Although there are some dips in the graph but after dips the data holds the trend.
gg_season(total_priv, Employed)+
labs( y= "Total number of private employed",
x="Time (month)",
title = "Seasonal plot of total private employed")
This graph shows that the data repeats seasonality eash and every year. All the lines on the graph are almost the parallel to each other thus depicting strong seasonality in the data.
gg_subseries(total_priv, Employed)+
labs(y= "Total number of private employed",
x="Time (month)",
title = "Subseries of total private employed")
This graph shows the sub-series for each and every month of each year. Almost the same pattern can be seen in each graph which shows the cyclic.
gg_lag(total_priv, Employed, geom = 'point')+
labs(y = "Total number of private employed",
x="Time (month)",
title = "Subseries of total private employed")+
theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))
total_priv |>
ACF(Employed, lag_max = 48) |>
autoplot() +
labs(title="Total number of private employed")
head(aus_production, 3)
bricks <- aus_production|>
select(Bricks)
autoplot(bricks, Bricks)+
labs(title = "plot of clay bricks production in million")
## Warning: Removed 20 rows containing missing values (`geom_line()`).
gg_season(bricks, Bricks)+
labs(title = "plot of clay bricks production in million")
## Warning: Removed 20 rows containing missing values (`geom_line()`).
gg_subseries(bricks, Bricks)+
labs(title = "plot of clay bricks production in million")
## Warning: Removed 5 rows containing missing values (`geom_line()`).
gg_lag(bricks, Bricks)+
labs(title = "plot of clay bricks production in million")+
theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))
## Warning: Removed 20 rows containing missing values (gg_lag).
bricks |>
ACF(Bricks, lag_max = 50 ) |>
autoplot() +
labs(title="Plot of ACF of bricks ")
head(pelt, 3)
hare <- pelt|>
select(Hare)
autoplot(hare, Hare)+
labs(title = "plo;t of number of snowshoe Hare pelts traded")
#gg_season(hare, Hare)+
#labs(title = "Seasonal plot of snowshoe Hare pelts traded")
gg_subseries(hare, Hare)+
labs(title = "Subseries plot of hare")
gg_lag(hare, Hare)+
labs(title = "Lag plots of snowshoe hare pelts traded")+
theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))
hare |>
ACF(Hare, lag_max = 50 ) |>
autoplot() +
labs(title="Plot of ACF of hare")
head(PBS,3)
cost <- PBS|>
filter(ATC2=="H02")|>
select(Month, Cost)
autoplot(cost, Cost)+
labs(title = "")
gg_season(cost, Cost)+
labs(title = "")
gg_subseries(cost, Cost)+
labs(title = "")
lag1_series<- cost|>filter(
Concession =='Concessional',
Type =="Co-payments"
)
gg_lag(lag1_series, Cost)+
labs(title = "Lag plots of snowshoe hare pelts traded")+
theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))
cost |>
ACF(Cost, lag_max = 50 ) |>
autoplot() +
labs(title="")
Barrels from us_gasoline
head(us_gasoline,3)
autoplot(us_gasoline, Barrels)+
labs(title = "plot barrels of gasoline ",
y= "Barrel (million/day)",
x='Week')
gg_season(us_gasoline, Barrels)+
labs(title = "Seasonal plot of barrels per day")
gg_subseries(us_gasoline, Barrels)+
labs(title = "Subseries plot of Barrels")
gg_lag(us_gasoline, Barrels)+
labs(title = "Lag plots of Barrels")+
theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1))
us_gasoline |>
ACF(Barrels, lag_max = 50 ) |>
autoplot()+
labs(title="Plot of ACF of Barrels")