library(fpp3)
library(tidyverse)
library(USgas)
library(readxl)
DATA624_HW1_CameronGray
DATA624: Homework 1
2.1
aus_production
: 1956 Q1 to 2010 Q2
pelt
: 1845 to 1935
gafa_stock
: 2014-01-02 to 2018-12-31
vic_elec
: 2012-01-01 AEDT to 2014-12-31 23:30:00 AEDT
min(aus_production$Quarter)
<yearquarter[1]>
[1] "1956 Q1"
# Year starts on: January
max(aus_production$Quarter)
<yearquarter[1]>
[1] "2010 Q2"
# Year starts on: January
min(pelt$Year)
[1] 1845
max(pelt$Year)
[1] 1935
min(gafa_stock$Date)
[1] "2014-01-02"
max(gafa_stock$Date)
[1] "2018-12-31"
min(vic_elec$Time)
[1] "2012-01-01 AEDT"
max(vic_elec$Time)
[1] "2014-12-31 23:30:00 AEDT"
c('Quarter', 'Bricks')] |>
aus_production[,autoplot()
c('Year', 'Lynx')] |>
pelt[,autoplot()
|>
gafa_stock autoplot(vars(Close))
c('Time', 'Demand')] |>
vic_elec[,autoplot() +
labs(title='Electricity Demand', subtitle = '(Victoria, Australia)') +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
xlab('Time (30 min)') +
ylab('Electricity Demand (MWh)')
2.2
|> filter(Close == max(Close, na.rm=TRUE), .by = Symbol) gafa_stock
# A tsibble: 4 x 8 [!]
# Key: Symbol [4]
Symbol Date Open High Low Close Adj_Close Volume
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
2.3
<- readr::read_csv("tute1.csv")
tute1 # View(tute1)
<- tute1 |>
mytimeseries mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
|>
mytimeseries pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
|>
mytimeseries pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
# facet_grid(name ~ ., scales = "free_y")
2.4
<- tsibble(us_total, index = year, key = state)
ts_us_total
print(ts_us_total)
# A tsibble: 1,266 x 3 [1Y]
# Key: state [53]
year state y
<int> <chr> <int>
1 1997 Alabama 324158
2 1998 Alabama 329134
3 1999 Alabama 337270
4 2000 Alabama 353614
5 2001 Alabama 332693
6 2002 Alabama 379343
7 2003 Alabama 350345
8 2004 Alabama 382367
9 2005 Alabama 353156
10 2006 Alabama 391093
# ℹ 1,256 more rows
|>
ts_us_total filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts",
"Connectivut", "Rhode Island")) |>
ggplot(aes(x=year, y=y, color = state)) +
geom_line() +
facet_wrap('state', scales = "free_y")
2.5
The Region and Purpose with the highest average of trips is Sydney/Visiting.
<- read_excel('tourism.xlsx')
tourism_excel
<- tourism_excel |>
ts_tourism_excel mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter, key = c(Region, State, Purpose)) |>
print()
# A tsibble: 24,320 x 5 [1Q]
# Key: Region, State, Purpose [304]
Quarter Region State Purpose Trips
<qtr> <chr> <chr> <chr> <dbl>
1 1998 Q1 Adelaide South Australia Business 135.
2 1998 Q2 Adelaide South Australia Business 110.
3 1998 Q3 Adelaide South Australia Business 166.
4 1998 Q4 Adelaide South Australia Business 127.
5 1999 Q1 Adelaide South Australia Business 137.
6 1999 Q2 Adelaide South Australia Business 200.
7 1999 Q3 Adelaide South Australia Business 169.
8 1999 Q4 Adelaide South Australia Business 134.
9 2000 Q1 Adelaide South Australia Business 154.
10 2000 Q2 Adelaide South Australia Business 169.
# ℹ 24,310 more rows
<- tsibble::tourism |>
tourism_tsibble print()
# A tsibble: 24,320 x 5 [1Q]
# Key: Region, State, Purpose [304]
Quarter Region State Purpose Trips
<qtr> <chr> <chr> <chr> <dbl>
1 1998 Q1 Adelaide South Australia Business 135.
2 1998 Q2 Adelaide South Australia Business 110.
3 1998 Q3 Adelaide South Australia Business 166.
4 1998 Q4 Adelaide South Australia Business 127.
5 1999 Q1 Adelaide South Australia Business 137.
6 1999 Q2 Adelaide South Australia Business 200.
7 1999 Q3 Adelaide South Australia Business 169.
8 1999 Q4 Adelaide South Australia Business 134.
9 2000 Q1 Adelaide South Australia Business 154.
10 2000 Q2 Adelaide South Australia Business 169.
# ℹ 24,310 more rows
|>
tourism_excel group_by(Region, Purpose) |>
summarise(mean = mean(Trips)) |>
arrange(desc(mean)) |>
# head(1) |>
print()
# A tibble: 304 × 3
# Groups: Region [76]
Region Purpose mean
<chr> <chr> <dbl>
1 Sydney Visiting 747.
2 Melbourne Visiting 619.
3 Sydney Business 602.
4 North Coast NSW Holiday 588.
5 Sydney Holiday 550.
6 Gold Coast Holiday 528.
7 Melbourne Holiday 507.
8 South Coast Holiday 495.
9 Brisbane Visiting 493.
10 Melbourne Business 478.
# ℹ 294 more rows
|>
ts_tourism_excel group_by(State) |>
summarise(Trips = sum(Trips)) |>
print()
# A tsibble: 640 x 3 [1Q]
# Key: State [8]
State Quarter Trips
<chr> <qtr> <dbl>
1 ACT 1998 Q1 551.
2 ACT 1998 Q2 416.
3 ACT 1998 Q3 436.
4 ACT 1998 Q4 450.
5 ACT 1999 Q1 379.
6 ACT 1999 Q2 558.
7 ACT 1999 Q3 449.
8 ACT 1999 Q4 595.
9 ACT 2000 Q1 600.
10 ACT 2000 Q2 557.
# ℹ 630 more rows
2.8
“Total Private” Employed
from us_employment
The most applicable trend in this data can be found in the autoplot graph where it is clear that the trend is in the upward growth and not really in the seasonality or cyclicity. Based on all the other graphs there is barely any other cyclic trends. The gg_season
plot has very similar lines for each year, gg_subseries
the plots are almost identical month to month, gg_lag
all the lags appear the same and the ACF
has a very small downward trend. In general none of these have a noticeable trend which makes sense for employment since that tends to stay pretty consistent throughout the year with seasonal occupations canceling each other out. There don’t seem to be any unusual years in the data apart from the occasion dip in growth but that’s how employment tends to work, it waxes and wanes over time.
<- us_employment |>
total_private filter(Title == "Total Private") |>
print()
# A tsibble: 969 x 4 [1M]
# Key: Series_ID [1]
Month Series_ID Title Employed
<mth> <chr> <chr> <dbl>
1 1939 Jan CEU0500000001 Total Private 25338
2 1939 Feb CEU0500000001 Total Private 25447
3 1939 Mar CEU0500000001 Total Private 25833
4 1939 Apr CEU0500000001 Total Private 25801
5 1939 May CEU0500000001 Total Private 26113
6 1939 Jun CEU0500000001 Total Private 26485
7 1939 Jul CEU0500000001 Total Private 26481
8 1939 Aug CEU0500000001 Total Private 26848
9 1939 Sep CEU0500000001 Total Private 27468
10 1939 Oct CEU0500000001 Total Private 27830
# ℹ 959 more rows
|>
total_private autoplot()
|>
total_private gg_season()
|>
total_private gg_subseries()
|>
total_private gg_lag(geom = 'point')
|>
total_private ACF(Employed) |>
autoplot()
Bricks
from aus_production
While there was a general increase in the number of bricks produced there is also noticeable cyclic and seasonal trends. You can see a bulge toward the middle of the month in the gg_season
plot which indicates an increase in brick production for Q2 and Q3. The gg_subseries
plot clearly shows how much more is produced in Q2 and Q3 while also showing that from year to year the pattern stays largely the same since they all tend to have the same shape. The ACF
graph shows that there is indeed a cycle to it as the graph tends downward with a cyclic pattern as the lag is increased. There don’t seem to be any years that stand out other than the growth period at the beginning but after that the cycle seemed pretty stable with a bit of growth and decline over the years. There seemed to be a peak of production around 1980 but then a more stable amount produced before and after.
<- aus_production |>
bricks select(Quarter, Bricks) |>
print()
# A tsibble: 218 x 2 [1Q]
Quarter Bricks
<qtr> <dbl>
1 1956 Q1 189
2 1956 Q2 204
3 1956 Q3 208
4 1956 Q4 197
5 1957 Q1 187
6 1957 Q2 214
7 1957 Q3 227
8 1957 Q4 222
9 1958 Q1 199
10 1958 Q2 229
# ℹ 208 more rows
|>
bricks autoplot()
|>
bricks gg_season()
|>
bricks gg_subseries()
|>
bricks gg_lag(geom = 'point')
|>
bricks ACF(Bricks) |>
autoplot()
Hare
from pelt
Since this data is yearly data there isn’t as much to discern seasonally since it isn’t granular enough to get anything seasonal out of it. But there does seem to be a weirdly cyclic trend to the data despite this. gg-season
and gg_subseries
as mentioned before don’t work with this data since the data is annual so their graphs either won’t generate or don’t provide any useful information. ACF
on the other hand shows a very clear picture with it’s almost sine wave pattern. The data from year to year is extremely cyclic with a clear pattern, this most likely has an underlying cause that would require more insight into the data set to understand. Maybe there is a cyclic pattern to rabbit populations or prices for pelts changed or maybe there were legal years to hunt for pelts and others were not, there is most likely something driving this pattern but hard to discern from the data provided alone. There are a couple of abnormal peaks in ~1864 and ~1885 but other than that the data seems to follow the trend.
<- pelt |>
hare select(Year, Hare) |>
print()
# A tsibble: 91 x 2 [1Y]
Year Hare
<dbl> <dbl>
1 1845 19580
2 1846 19600
3 1847 19610
4 1848 11990
5 1849 28040
6 1850 58000
7 1851 74600
8 1852 75090
9 1853 88480
10 1854 61280
# ℹ 81 more rows
|>
hare autoplot()
# gg_season() doesn't work with this data set because it is yearly data
# hare |>
# gg_season()
|>
hare gg_subseries()
|>
hare gg_lag(geom = 'point')
|>
hare ACF(Hare) |>
autoplot()
“H02” Cost
from PBS
This data is very interesting. Generally from year to year there is either an upward or downward trend by type but it is very consistent but the cycles are extremely consistent by type. This is well illustrated by the gg_season
, gg_subseries
, and the ACF
plots. In the gg_season
and gg_subseries
plot you can clearly see that there is a seasonality to the data, especially in the “safety net” data where it increases throughout the year (likely due to people hitting their co-payment threshold as described in the data help). The “concessional co-payments” seem to follow the inverse of these by starting low increasing toward the middle and then decreasing again back down toward the end of the year. The ACF
graph clearly shows that this is highly cyclic with it’s sine pattern for the types described previously. There don’t seem to be any strange years in this data.
<- PBS |>
cost filter(ATC2 == "H02") |>
print()
# A tsibble: 816 x 9 [1M]
# Key: Concession, Type, ATC1, ATC2 [4]
Month Concession Type ATC1 ATC1_desc ATC2 ATC2_desc Scripts Cost
<mth> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1991 Jul Concessional Co-paym… H Systemic… H02 CORTICOS… 63261 317384
2 1991 Aug Concessional Co-paym… H Systemic… H02 CORTICOS… 53528 269891
3 1991 Sep Concessional Co-paym… H Systemic… H02 CORTICOS… 52822 269703
4 1991 Oct Concessional Co-paym… H Systemic… H02 CORTICOS… 54016 280418
5 1991 Nov Concessional Co-paym… H Systemic… H02 CORTICOS… 49281 268070
6 1991 Dec Concessional Co-paym… H Systemic… H02 CORTICOS… 51798 277139
7 1992 Jan Concessional Co-paym… H Systemic… H02 CORTICOS… 42436 221772
8 1992 Feb Concessional Co-paym… H Systemic… H02 CORTICOS… 52913 272345
9 1992 Mar Concessional Co-paym… H Systemic… H02 CORTICOS… 62908 325700
10 1992 Apr Concessional Co-paym… H Systemic… H02 CORTICOS… 68499 349271
# ℹ 806 more rows
|>
cost autoplot()
|>
cost gg_season()
|>
cost gg_subseries()
# cost |>
# # this plot has trouble with the different categories in the other columns
# select(Month, Cost) |>
# gg_lag(geom = 'point')
|>
cost ACF(Cost) |>
autoplot()
Barrels
from us_gasoline
This data is quite granular for the year which provides good detail but to the point of creating a lot of noise, making it hard to read the data (if I were doing my own analysis I would probably bin by month or quarter to make the graphs easier to read but I will answer the questions as they are asked for this assignment). There is very slight trend visible in the gg_subseries
plot where there is a small increase toward the middle of the year, which is the summer time for the northern hemisphere, which makes sense since demand tends to be high for gasoline since a lot of people are driving more to spend more time outside and go on vacation. It follows naturally that the supply will increase to meet this demand. Other than that there is a slight decrease on the ACF
graph pointing to a trend but it is most likely a small one and the same one that can be seen in the gg_subseries
. There is clearly general growth from the beginning of the chart to the end with a bit of decline and stablization in there but that is most likely from political and economic forces that I am too young to remember.
|>
us_gasoline print()
# A tsibble: 1,355 x 2 [1W]
Week Barrels
<week> <dbl>
1 1991 W06 6.62
2 1991 W07 6.43
3 1991 W08 6.58
4 1991 W09 7.22
5 1991 W10 6.88
6 1991 W11 6.95
7 1991 W12 7.33
8 1991 W13 6.78
9 1991 W14 7.50
10 1991 W15 6.92
# ℹ 1,345 more rows
|>
us_gasoline autoplot()
|>
us_gasoline gg_season()
|>
us_gasoline gg_subseries()
|>
us_gasoline gg_lag(geom = 'point')
|>
us_gasoline ACF(Barrels) |>
autoplot()