title: “HW 1” author: “Matt Mullis” date: “09/07/22” output: html_document —

Load packages and data

library(fpp3)
library(readr)
library(lubridate)
library(tsibble)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(ggfortify)
library(tidyquant)
library(USgas)
library(readxl)
# install and load any package necessary

Questions

Exercise 1

library(readxl)
tute1 <- readxl:: read_excel("//Users//mattmullis//Downloads//tute1.xlsx", 
    col_types = c("date", "numeric", "numeric", 
        "numeric"))
ts <- tute1 %>%
  mutate(Quarter = yearmonth(Quarter)) %>%
  as_tsibble(index = Quarter)

ggplot(data=ts)+
  geom_line(mapping=aes(x=Quarter, y= Sales))

ggplot(data=ts)+
  geom_line(mapping=aes(x=Quarter, y= AdBudget))

ggplot(data=ts)+
  geom_line(mapping=aes(x=Quarter, y= GDP))

ts%>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Exercise 2

library(USgas)

totals <- us_total %>% 
  as_tsibble(index=year,key=state)
totals %>% 
  filter(state=="New Hampshire"|state=="Maine"|state=="Vermont"|state=="Massachusetts"|state=="Connecticut"|state=="Rhode Island") %>%
  autoplot(y/1e3)+
  labs(y="Natural Gas Consumption (Thousands)", x="Time")

Exercise 3

tourism <- readxl::read_excel("//Users//mattmullis//Downloads//tourism.xlsx") %>% 
  mutate(Quarter= yearquarter(Quarter))
head(tourism)
## # A tibble: 6 × 5
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.
tourism_ts <- as_tsibble(tourism, key= c(Purpose, Region, State, Trips), index = Quarter)

tourism_ts %>% group_by(Region, Purpose) %>% 
  mutate(avg = mean(Trips)) %>% 
  ungroup(tourism_ts) %>% 
  filter(avg==max(avg)) %>% 
  distinct(Region, Purpose, avg)
## # A tibble: 1 × 3
##   Region Purpose    avg
##   <chr>  <chr>    <dbl>
## 1 Sydney Visiting  747.
tourism_ts %>% 
  group_by(State) %>% 
  summarise(total= sum(Trips)) %>% 
  ungroup()
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter total
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  551.
##  2 ACT   1998 Q2  416.
##  3 ACT   1998 Q3  436.
##  4 ACT   1998 Q4  450.
##  5 ACT   1999 Q1  379.
##  6 ACT   1999 Q2  558.
##  7 ACT   1999 Q3  449.
##  8 ACT   1999 Q4  595.
##  9 ACT   2000 Q1  600.
## 10 ACT   2000 Q2  557.
## # … with 630 more rows

Exercise 4

autoplot(aus_arrivals)
## Plot variable not specified, automatically selected `.vars = Arrivals`

gg_season(aus_arrivals)
## Plot variable not specified, automatically selected `y = Arrivals`

gg_subseries(aus_arrivals)
## Plot variable not specified, automatically selected `y = Arrivals`

#Things i notice: in the autoplot, Japanese visits decrease significantly after
#about 1995. also, there is much more seasonality after about 2000. in the 
#ggseason plot, the UK has a massive dip during q1, stays constant to q3, then
#raises a lot in q4. Every other country is much more conistant. The subseries
#is a little harder to interpret, but there is a clear upward trend for literally
#all the graphs, but in come countries like japan there is the dip in the mid
#90's

Exercise 5

set.seed(12355555)
ttt <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(ttt)
## Plot variable not specified, automatically selected `.vars = Turnover`

#I see that its mostly trending upward, with a very seasonal pattern, because the 
#sudden shocks are evenly spaced. its a little cyclic starting around 2000. 
gg_season(ttt)
## Plot variable not specified, automatically selected `y = Turnover`

#this shows me the seasonal jump happens in November/ December. very strong seasonality and 
#this makes the trend look stronger.
gg_subseries(ttt)
## Plot variable not specified, automatically selected `y = Turnover`

#again, shows that the winter months are when this data jumps. Graph is very jumbled
#for everything but seasonal data. 
gg_lag(ttt)
## Plot variable not specified, automatically selected `y = Turnover`

#the fourth plot is a data visualization tool. 
ttt %>% 
  ACF(Turnover) %>% 
  autoplot()

#the fifth plot contains more vertical lines than any of the other plots. all the 
#lines are the same height pretty much except 12 and 24. 

Exercise 6

fb <- filter(gafa_stock, Symbol == "FB") %>% 
  select(Close)
mean(fb$Close)
## [1] 120.4625
sd(fb$Close)
## [1] 41.32364
vecfb <- pull(fb,Close)
diffvec <- diff(vecfb)
diffmean <- mean(diffvec)
print(diffmean)
## [1] 0.06076372
diffsd <- sd(diffvec)
print(diffsd)
## [1] 2.414555
diffskew <- skewness(diffvec)
print(diffskew)
## [1] -3.973192
diffkurt <- kurtosis(diffvec)
print(diffkurt)
## [1] 71.02921
handmean <-sum(vecfb)/length(vecfb)
print(handmean)
## [1] 120.4625
handsd <- sqrt(sum((vecfb - handmean)^2) / (length(vecfb) - 1))
print(handsd)
## [1] 41.32364
handskew <- sum(((vecfb)-handmean)^3) / ((length(vecfb)-1) * (handsd^3))
print(handskew)
## [1] 0.2278433
handkurt <- sum((vecfb)-(handmean^4)) / ((length(vecfb)-1)* (handsd^4))
print(handkurt)
## [1] -72.26996

exercise 7

SO_data <- readxl::read_excel("//Users//mattmullis//Downloads//SO data forecast hw 1.xlsx") %>% 
  select(Date, AdjClose) %>% 
  mutate(n=row_number()) 
my_plot <-  filter(SO_data, month(Date)== "6") 
  ggplot(data=my_plot)+
  geom_line(mapping=aes(x=Date, y= AdjClose,group=1))

#d
Jan <- filter(SO_data, month(Date)=="1")
mean(Jan$AdjClose)
## [1] 66.23176
var(Jan$AdjClose)
## [1] 0.3253819
Feb <- filter(SO_data, month(Date)=="2")
mean(Feb$AdjClose)
## [1] 64.42741
var(Feb$AdjClose)
## [1] 3.130927
Mar <- filter(SO_data, month(Date)=="3")
mean(Mar$AdjClose)
## [1] 67.52217
var(Mar$AdjClose)
## [1] 4.374493
Apr <- filter(SO_data, month(Date)=="4")
mean(Apr$AdjClose)
## [1] 73.85194
var(Apr$AdjClose)
## [1] 1.27328
May <- filter(SO_data, month(Date)=="5")
mean(May$AdjClose)
## [1] 73.36229
var(May$AdjClose)
## [1] 1.161209
Jun<- filter(SO_data, month(Date)=="6")
mean(Jun$AdjClose)
## [1] 70.2457
var(Jun$AdjClose)
## [1] 12.23514
Jul <- filter(SO_data, month(Date)=="7")
mean(Jul$AdjClose)
## [1] 71.6892
var(Jul$AdjClose)
## [1] 2.508793
Aug <- filter(SO_data, month(Date)=="8")
mean(Aug$AdjClose)
## [1] 77.94644
var(Aug$AdjClose)
## [1] 1.585646
Sep <- filter(SO_data, month(Date)=="9")
mean(Sep$AdjClose)
## [1] 64.40742
var(Sep$AdjClose)
## [1] 36.58955
Oct <- filter(SO_data, month(Date)=="10")
mean(Oct$AdjClose)
## [1] 60.33739
var(Oct$AdjClose)
## [1] 0.3122135
Nov<- filter(SO_data, month(Date)=="11")
mean(Nov$AdjClose)
## [1] 60.54399
var(Nov$AdjClose)
## [1] 0.3000452
Dec <- filter(SO_data, month(Date)=="12")
mean(Dec$AdjClose)
## [1] 64.03926
var(Dec$AdjClose)
## [1] 3.783693