HDupre_HW1_DATA624

2.1

GAFA

help(gafa_stock)
head(gafa_stock)

## # A tsibble: 6 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close    Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
## 1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
## 2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
## 3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
## 4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
## 5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
## 6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200

autoplot(gafa_stock, Adj_Close) +
  labs(title = 'GAFA Adjusted Closing Price',
       y = 'Price ($)',
       x = 'Year')

The interval on the GAFA data appears to be daily but the [!] symbol when we call gafa_stock indicates that the tsibble object has no set interval.

PBS

head(PBS)

## # A tsibble: 6 x 9 [1M]
## # Key:       Concession, Type, ATC1, ATC2 [1]
##      Month Concession   Type       ATC1  ATC1_desc ATC2  ATC2_desc Scripts  Cost
##      <mth> <chr>        <chr>      <chr> <chr>     <chr> <chr>       <dbl> <dbl>
## 1 1991 Jul Concessional Co-paymen… A     Alimenta… A01   STOMATOL…   18228 67877
## 2 1991 Aug Concessional Co-paymen… A     Alimenta… A01   STOMATOL…   15327 57011
## 3 1991 Sep Concessional Co-paymen… A     Alimenta… A01   STOMATOL…   14775 55020
## 4 1991 Oct Concessional Co-paymen… A     Alimenta… A01   STOMATOL…   15380 57222
## 5 1991 Nov Concessional Co-paymen… A     Alimenta… A01   STOMATOL…   14371 52120
## 6 1991 Dec Concessional Co-paymen… A     Alimenta… A01   STOMATOL…   15028 54299

help(PBS)
autoplot(PBS, Scripts) +
  labs(title = 'Number of Prescriptions',
       y = 'Total',
       x = 'Year') +
  theme(legend.position='none')

The interval on the PBS data is monthly, as stated in the help documentation.

vic_elec

head(vic_elec)

## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE

help(vic_elec)
autoplot(vic_elec, Demand) +
  labs(title = 'Victoria Electricity Demand',
       y = 'Demand',
       x = 'Date')

The interval on the vic_elec data is half-hourly.

pelt

head(pelt)

## # A tsibble: 6 x 3 [1Y]
##    Year  Hare  Lynx
##   <dbl> <dbl> <dbl>
## 1  1845 19580 30090
## 2  1846 19600 45150
## 3  1847 19610 49150
## 4  1848 11990 39520
## 5  1849 28040 21230
## 6  1850 58000  8420

help(pelt)
autoplot(pelt, Hare) +
  labs(title = 'Count of Hare Pelts',
       y = 'Pelts',
       x = 'Year')

The interval for the pelt trading record data is yearly, as indicated by the [1Y] when calling the tsibble object.

2.2

my_gafa <- gafa_stock

my_gafa %>%
  group_by(Symbol) %>%
  filter(Close == max(Close))

## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

By first grouping by each stock’s ticker symbol and then filtering for the maximum Close value, we can find the dates of the highest closing price for each.

2.3

tute1 <- read.csv('https://raw.githubusercontent.com/hdupre/DATA624/main/HW1/tute1.csv')

mytimeseries <- tute1 %>%
  mutate(Quarter = yearmonth(Quarter)) %>%
  as_tsibble(index = Quarter)


mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

2.4

usgas_ts <- USgas::us_total %>%
  as_tsibble(key = c(state), index = year)

usgas_ts %>%
  filter(state == c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island')) %>%
  mutate(y = y/1000) %>%
  autoplot(y) +
    labs(title = 'New England Natural Gas Consumption',
        y = "Million Cubic Feet ('000)",
        x = 'Year')

2.5

library(readxl)

tourism <- read_excel('tourism.xlsx')

tourism_ts <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(key = c(Region, State, Purpose),
             index = Quarter)


tourism %>%
  select(Region, Purpose, Trips) %>%
  group_by(Region, Purpose) %>%
  summarise(trip_average = mean(Trips)) %>%
  arrange(desc(trip_average))

## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.

## # A tibble: 304 × 3
## # Groups:   Region [76]
##    Region          Purpose  trip_average
##    <chr>           <chr>           <dbl>
##  1 Sydney          Visiting         747.
##  2 Melbourne       Visiting         619.
##  3 Sydney          Business         602.
##  4 North Coast NSW Holiday          588.
##  5 Sydney          Holiday          550.
##  6 Gold Coast      Holiday          528.
##  7 Melbourne       Holiday          507.
##  8 South Coast     Holiday          495.
##  9 Brisbane        Visiting         493.
## 10 Melbourne       Business         478.
## # … with 294 more rows

# Visiting Sydney had the highest average trips at ~747.27

library(stringr)

# Concatenate Region and Purpose
tourism$RegionPurpose <- str_c(tourism$Region,' ',tourism$Purpose)


tourism_ts_state <- tourism %>%
  group_by(State, Quarter) %>%
  mutate(total_trips = sum(Trips)) %>%
  select(Quarter,RegionPurpose,State,total_trips) %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(key = c(RegionPurpose,State),
             index = Quarter)


tourism_ts_state

## # A tsibble: 24,320 x 4 [1Q]
## # Key:       RegionPurpose, State [304]
## # Groups:    State @ Quarter [640]
##    Quarter RegionPurpose     State           total_trips
##      <qtr> <chr>             <chr>                 <dbl>
##  1 1998 Q1 Adelaide Business South Australia       1735.
##  2 1998 Q2 Adelaide Business South Australia       1395.
##  3 1998 Q3 Adelaide Business South Australia       1213.
##  4 1998 Q4 Adelaide Business South Australia       1453.
##  5 1999 Q1 Adelaide Business South Australia       1541.
##  6 1999 Q2 Adelaide Business South Australia       1636.
##  7 1999 Q3 Adelaide Business South Australia       1283.
##  8 1999 Q4 Adelaide Business South Australia       1387.
##  9 2000 Q1 Adelaide Business South Australia       1833.
## 10 2000 Q2 Adelaide Business South Australia       1415.
## # … with 24,310 more rows

The question says to sum total trips by State, but without grouping by Quarter as well there’s no point in having a tsibble – all the values regardless of RegionPurpose and Quarter would be the same for any State. Even in its current state, the RegionPurpose column is creating duplicate values without contributing more granular understanding.

2.8

set.seed(81023948)

my_aus_retail <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

my_aus_retail

## # A tsibble: 441 x 5 [1M]
## # Key:       State, Industry [1]
##    State    Industry           `Series ID`    Month Turnover
##    <chr>    <chr>              <chr>          <mth>    <dbl>
##  1 Tasmania Clothing retailing A3349371A   1982 Apr      6.7
##  2 Tasmania Clothing retailing A3349371A   1982 May      7.4
##  3 Tasmania Clothing retailing A3349371A   1982 Jun      6.7
##  4 Tasmania Clothing retailing A3349371A   1982 Jul      7.1
##  5 Tasmania Clothing retailing A3349371A   1982 Aug      5.8
##  6 Tasmania Clothing retailing A3349371A   1982 Sep      5.8
##  7 Tasmania Clothing retailing A3349371A   1982 Oct      5.3
##  8 Tasmania Clothing retailing A3349371A   1982 Nov      7.1
##  9 Tasmania Clothing retailing A3349371A   1982 Dec     11.1
## 10 Tasmania Clothing retailing A3349371A   1983 Jan      5.3
## # … with 431 more rows

The selected series is the clothing retailing industry in the state of Tasmania.

autoplot(my_aus_retail, Turnover)

There seems to be an overall upward trend though this is made a little more murky with a large spike and subsequent drop in turnover beginning around 2014. Spikes are also visible in yearly intervals which indicates seasonality.

gg_season(my_aus_retail,Turnover)

#### The seasonal plot makes it more clear that turnover tends to sharply increase from November to December.

gg_subseries(my_aus_retail,Turnover)

December clearly has the highest turnover with a much higher mean (as indicated by the blue line). This plot also shows that turnover has increased significantly from 1982 to 2020, though there was a sharp overall drop during the ’90s.

gg_lag(my_aus_retail,Turnover, geom='point')

#### All lags have a positive relationship. Lags 1 and 2 further reinforce the seasonality of the data being driven by high turnover in December. These plots also made me go back and take a closer look at February/March as I previously hadn’t noticed that these months generally see the lowest turnover. The gg_season plot does reflect this.

my_aus_retail %>%
  ACF(Turnover) %>%
  autoplot()

All lags are positive with lag 1 and 12 being higher than the others – showring that peaks tend to be 12 months apart. We can also see a slight decrease in trend as lags increase.

Conclusions

I learned that December sees major spikes in turnover while turnover is the lowest in February/March – presumably because of the holidays and subsequent lull. Overall turnover has been increasing since 1983, though there was a generalized fall in the ’90s.