Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.
-Use ? (or help()) to find out about the data in each series.
-What is the time interval of each series?
-Use autoplot() to produce a time plot of each series.
-For the last plot, modify the axis labels and title.
#?aus_production comment this out so it does not run everytime
autoplot(aus_production, Bricks)+theme_minimal()
#?pelt
autoplot(pelt,Lynx)+theme_minimal()
#?gafa_stock
autoplot(gafa_stock, Close)+theme_minimal()
#?vic_elec
autoplot(vic_elec,Demand)+
ggtitle ('Half-hourly electricity demand for Victoria, Australia') +
ylab('Electricity Demand') + xlab('Year') +theme_minimal()
Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.
gafa_stock |>
group_by(Symbol) |>
filter (Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.
#Assumes file is in working directory
tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")+theme_minimal()
Without facet_grid(), there is a single plot with one y-axis.
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +theme_minimal()
The USgas package contains data on the demand for natural gas in the US. a. Install the USgas package.
library(USgas) # here to keep with Exercise
us_tot <- us_total |>
as_tsibble(index = year, key=state)
c.Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
NE<-c('Maine','Vermont','New Hampshire','Massachusetts','Connecticut','Rhode Island')
NE_tot <- us_tot |>
filter (state %in% NE)
autoplot(NE_tot)+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
ggtitle ('Annual Natural Gas Consumption by NE State') +
ylab('Gas Consumption') + xlab('Year') +theme_minimal()
## Plot variable not specified, automatically selected `.vars = y`
a.Download tourism.xlsx from the book website and read it into R using readxl::read_excel().
#Assumes file is in working directory
trism <- readxl::read_excel("tourism.xlsx")
tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
trism_ts <- trism |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter, key=c(Region, State, Purpose))
trism_ts
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
c.Find what combination of Region and Purpose had the maximum number of overnight trips on average.
trism_ts |>
group_by(Region, Purpose) |>
summarise (AvgTrp = mean(Trips)) |>
ungroup() |>
filter (AvgTrp ==max(AvgTrp))
## # A tsibble: 1 x 4 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter AvgTrp
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
d.Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
(Assuming this means total trips per State per Quarter, since we are talking time series, and it did not say to combine by Quarter.)
(new_ts<-trism_ts |>
group_by(State) |>
summarise(totTrps = sum(Trips)))
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter totTrps
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
[This one is in curriculum but not on the list for week2 - left it in since I did it already….]
The aus_arrivals data set comprises quarterly international arrivals to Australia from Japan, New Zealand, UK and the US.
- Use autoplot(), gg_season() and gg_subseries() to compare the differences between the arrivals from these four countries.
- Can you identify any unusual observations?
aa<-aus_arrivals
autoplot(aa) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
xlab("Quarter") +
theme_minimal()
Observations:
aa<-aus_arrivals
gg_season(aa) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
ggtitle ('Seasonal plot: Arrivals by Country') +
xlab("Quarter") +
theme_minimal()
Observations:
aa<-aus_arrivals
gg_subseries(aa) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
ggtitle ('Seasonal plot: Arrivals by Country') +
xlab("Quarter") + ylab("Arrivals(K)") +
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
Observations:
Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.
Can you spot any seasonality, cyclicity and trend?
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?
Observations from plots below:
- the trend is generally up, with a dip around 2008
tp<-us_employment |>
filter(Title=="Total Private") |>
summarise(totEmp = sum(Employed))
autoplot(tp) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()
gg_season(tp)+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()
## Plot variable not specified, automatically selected `y = totEmp`
gg_subseries(tp)+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Plot variable not specified, automatically selected `y = totEmp`
recent_tp<-tp |>
filter (year(Month)>=2000)
gg_lag(recent_tp, geom = "point")+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Plot variable not specified, automatically selected `y = totEmp`
recent_tp |>
ACF(tp) |>
autoplot()+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
Observations from plots below:
- was trending up until early 1980s
- procduction was higher in Q2 & Q3
brks<-aus_production |>
select(Bricks)
autoplot(brks) +
theme_minimal()
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_season(brks)+
theme_minimal()
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_subseries(brks)+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
gg_lag(brks, geom = "point")+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
ACF(brks) |>
autoplot()+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Response variable not specified, automatically selected `var = Bricks`
Observations from plots below:
- The early 1860s and mid 1880 were bad years for hares.
hr<-pelt |>
select(Hare)
autoplot(hr) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()
Note: The pelt data is by year, so there is no season. Cannot use gg_season().
gg_subseries(hr)+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()
## Plot variable not specified, automatically selected `y = Hare`
gg_lag(hr, geom = "point")+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Plot variable not specified, automatically selected `y = Hare`
ACF(hr) |>
autoplot()+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Response variable not specified, automatically selected `var = Hare`
Observations from plots below:
- Consessional (co-pay and safety net) are generally rising
- General(co-pay and safety net) are flat to decreasing
- Both Concessional and General Safety net has similar seasonal
trends
H02<-PBS|>
filter(ATC2=='H02')
autoplot(H02) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()
## Plot variable not specified, automatically selected `.vars = Scripts`
gg_season(H02)+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()
## Plot variable not specified, automatically selected `y = Scripts`
gg_subseries(H02)+
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Plot variable not specified, automatically selected `y = Scripts`
(Getting error - not sure how to fix - commented out to knit)
#gg_lag(H02, geom = "point")+
# scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))+
# theme_minimal()+
# theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
#ACF(H02) |>
#autoplot()+
# theme_minimal()+
# theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
Observations from plots below:
- Generally rising until about 2008, then dips, then rises again
- Peaks around June
brl<-us_gasoline
autoplot(brl) +
theme_minimal()
## Plot variable not specified, automatically selected `.vars = Barrels`
gg_season(brl)+
theme_minimal()
## Plot variable not specified, automatically selected `y = Barrels`
gg_subseries(brl)+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Plot variable not specified, automatically selected `y = Barrels`
gg_lag(brl, geom = "point")+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Plot variable not specified, automatically selected `y = Barrels`
ACF(brl) |>
autoplot()+
theme_minimal()+
theme(axis.text.x = element_text(angle=90, vjust=1, hjust=1))
## Response variable not specified, automatically selected `var = Barrels`