2.1

a) autoplots

autoplot(gafa_stock, Open)

PBS %>%
  filter(ATC2 == "A10")%>%
  autoplot(Cost)

autoplot(vic_elec, Temperature)

pelt%>%
  pivot_longer(c(Hare, Lynx))%>%
  autoplot(value)

b)

# historical stock prices (USD) and volume between 2014-2018 for goog, amzn, fb and aapl
# time interval is (trading) days
help(gafa_stock)

## starting httpd help server ... done

# monthly Australian medicare prescription data (number of prescriptions and costs(AUD) )
# time interval is monthly
help(PBS)


# Electricity demand for Victoria, Australia (Demand, temp, holiday indicator)
# time interval of 30 minutes
help(vic_elec)


# Annual fur pelt trading data from 1845 to 1935 (number of hare and lynx pelts traded)
# time interval is one year
help(pelt)

2.2

# finds row where closing price is highest for a given ticker in gafa_stock dataset
max_close_price = function(ticker){
  stock = gafa_stock%>%
    filter(Symbol == ticker)%>%
    filter(Close == max(Close))%>%
    return()
}

# prints row where closing price is highest for each unique ticker in gafa_stock
tickers = gafa_stock$Symbol%>%
  unique()

for (ticker in tickers){
  print(max_close_price(ticker))
}

## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close  Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>
## 1 AMZN   2018-09-04 2026. 2050.  2013 2040.     2040. 5721100
## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close  Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>
## 1 GOOG   2018-07-26  1251 1270. 1249. 1268.     1268. 2405600

2.3

a) load tute1

tute1 = read.csv('https://raw.githubusercontent.com/schoolkidrich/CUNY_MSDS/main/DATA_624/hw1/tute1.csv')

head(tute1)

##      Quarter  Sales AdBudget   GDP
## 1 1981-03-01 1020.2    659.2 251.8
## 2 1981-06-01  889.2    589.0 290.9
## 3 1981-09-01  795.0    512.5 290.8
## 4 1981-12-01 1003.9    614.1 292.4
## 5 1982-03-01 1057.7    647.2 279.1
## 6 1982-06-01  944.4    602.0 254.0

b) convert data to time series

tute1.series = tute1%>%
  mutate(Quarter = yearmonth(Quarter))%>%
  as_tsibble(index = Quarter)

head(tute1.series)

## # A tsibble: 6 x 4 [3M]
##    Quarter Sales AdBudget   GDP
##      <mth> <dbl>    <dbl> <dbl>
## 1 1981 Mar 1020.     659.  252.
## 2 1981 Jun  889.     589   291.
## 3 1981 Sep  795      512.  291.
## 4 1981 Dec 1004.     614.  292.
## 5 1982 Mar 1058.     647.  279.
## 6 1982 Jun  944.     602   254

c) construct time series plot

when you don’t include facet_grid() all the lines are plotted onto a single graph

tute1.series %>%
  pivot_longer(c(Sales,AdBudget,GDP))%>%
  ggplot(aes(x = Quarter, y = value, color = name))+
  geom_line()+ facet_grid(name ~., scales = "free_y")

2.4

a) load USgas package

library(USgas)

## Warning: package 'USgas' was built under R version 4.0.5

b) create time series object from us_total with year and index and state as key

us_total.series = us_total%>%
  as_tsibble(index = year, key= state)
  
head(us_total.series)

## # A tsibble: 6 x 3 [1Y]
## # Key:       state [1]
##    year state        y
##   <int> <chr>    <int>
## 1  1997 Alabama 324158
## 2  1998 Alabama 329134
## 3  1999 Alabama 337270
## 4  2000 Alabama 353614
## 5  2001 Alabama 332693
## 6  2002 Alabama 379343

c) plot plot annual gas consumption for new england area

us_total.series%>%
  filter(state == c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island'))%>%
  ggplot(aes(x = year, y = y, color = state))+
  geom_line()+
  facet_grid(state ~., scale = 'free')+
  labs(title = 'Annual Gas Consumption for New England Area')

2.5

a) read tourism.xlsx into R

tour = readxl::read_excel('tourism.xlsx')
head(tour)

## # A tibble: 6 x 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.

b) create time series object using tour data

tour.series = tour%>%
  mutate(Quarter = yearquarter(Quarter))%>%
  as_tsibble(index = Quarter, key = c(Region, Purpose))
head(tour.series)

## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

c) find combination of Region and Purpose that has maximum number of trips on average

# summarize average trip length by region and purpose then finds combination with highest
tour%>%
  group_by(Region, Purpose)%>%
  summarize(avg_trip_length = mean(Trips), .groups = "keep")%>%
  ungroup()%>%
  filter(avg_trip_length == max(avg_trip_length))

## # A tibble: 1 x 3
##   Region Purpose  avg_trip_length
##   <chr>  <chr>              <dbl>
## 1 Sydney Visiting            747.

d) create new time series object which combines purposes and regions

trips.state = tour.series%>%
  group_by(State)%>%
  summarize(total_trips = sum(Trips))

head(trips.state)

## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   State Quarter total_trips
##   <chr>   <qtr>       <dbl>
## 1 ACT   1998 Q1        551.
## 2 ACT   1998 Q2        416.
## 3 ACT   1998 Q3        436.
## 4 ACT   1998 Q4        450.
## 5 ACT   1999 Q1        379.
## 6 ACT   1999 Q2        558.

2.8

set.seed(8009)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

head(myseries)

## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State         Industry                           `Series ID`    Month Turnover
##   <chr>         <chr>                              <chr>          <mth>    <dbl>
## 1 Western Aust~ Clothing, footwear and personal a~ A3349825J   1982 Apr     28.8
## 2 Western Aust~ Clothing, footwear and personal a~ A3349825J   1982 May     32.1
## 3 Western Aust~ Clothing, footwear and personal a~ A3349825J   1982 Jun     28.5
## 4 Western Aust~ Clothing, footwear and personal a~ A3349825J   1982 Jul     29  
## 5 Western Aust~ Clothing, footwear and personal a~ A3349825J   1982 Aug     25.3
## 6 Western Aust~ Clothing, footwear and personal a~ A3349825J   1982 Sep     26.9

overall, turnover has steadily increased over time but has plateaued since ~2010

myseries%>%
  autoplot(Turnover)

Turnover seems to always be highest during November - December. This trend has always been the case, but in recent years the spike in turnover has increased. In recent years, there is also a dip in turnover around February and a smaller spike around may

myseries%>%
  gg_season(Turnover)

data 624 hw1

Richard

2/13/2022

2.1