title: “HW 1” author: “Matt Mullis” date: “09/07/22” output: html_document —
library(fpp3)
library(readr)
library(lubridate)
library(tsibble)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(ggfortify)
library(tidyquant)
library(USgas)
library(readxl)
# install and load any package necessary
library(readxl)
tute1 <- readxl:: read_excel("//Users//mattmullis//Downloads//tute1.xlsx",
col_types = c("date", "numeric", "numeric",
"numeric"))
ts <- tute1 %>%
mutate(Quarter = yearmonth(Quarter)) %>%
as_tsibble(index = Quarter)
ggplot(data=ts)+
geom_line(mapping=aes(x=Quarter, y= Sales))
ggplot(data=ts)+
geom_line(mapping=aes(x=Quarter, y= AdBudget))
ggplot(data=ts)+
geom_line(mapping=aes(x=Quarter, y= GDP))
ts%>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
library(USgas)
totals <- us_total %>%
as_tsibble(index=year,key=state)
totals %>%
filter(state=="New Hampshire"|state=="Maine"|state=="Vermont"|state=="Massachusetts"|state=="Connecticut"|state=="Rhode Island") %>%
autoplot(y/1e3)+
labs(y="Natural Gas Consumption (Thousands)", x="Time")
tourism <- readxl::read_excel("//Users//mattmullis//Downloads//tourism.xlsx") %>%
mutate(Quarter= yearquarter(Quarter))
head(tourism)
## # A tibble: 6 × 5
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
tourism_ts <- as_tsibble(tourism, key= c(Purpose, Region, State, Trips), index = Quarter)
tourism_ts %>% group_by(Region, Purpose) %>%
mutate(avg = mean(Trips)) %>%
ungroup(tourism_ts) %>%
filter(avg==max(avg)) %>%
distinct(Region, Purpose, avg)
## # A tibble: 1 × 3
## Region Purpose avg
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
tourism_ts %>%
group_by(State) %>%
summarise(total= sum(Trips)) %>%
ungroup()
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter total
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # … with 630 more rows
autoplot(aus_arrivals)
## Plot variable not specified, automatically selected `.vars = Arrivals`
gg_season(aus_arrivals)
## Plot variable not specified, automatically selected `y = Arrivals`
gg_subseries(aus_arrivals)
## Plot variable not specified, automatically selected `y = Arrivals`
#Things i notice: in the autoplot, Japanese visits decrease significantly after
#about 1995. also, there is much more seasonality after about 2000. in the
#ggseason plot, the UK has a massive dip during q1, stays constant to q3, then
#raises a lot in q4. Every other country is much more conistant. The subseries
#is a little harder to interpret, but there is a clear upward trend for literally
#all the graphs, but in come countries like japan there is the dip in the mid
#90's
set.seed(12355555)
ttt <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(ttt)
## Plot variable not specified, automatically selected `.vars = Turnover`
#I see that its mostly trending upward, with a very seasonal pattern, because the
#sudden shocks are evenly spaced. its a little cyclic starting around 2000.
gg_season(ttt)
## Plot variable not specified, automatically selected `y = Turnover`
#this shows me the seasonal jump happens in November/ December. very strong seasonality and
#this makes the trend look stronger.
gg_subseries(ttt)
## Plot variable not specified, automatically selected `y = Turnover`
#again, shows that the winter months are when this data jumps. Graph is very jumbled
#for everything but seasonal data.
gg_lag(ttt)
## Plot variable not specified, automatically selected `y = Turnover`
#the fourth plot is a data visualization tool.
ttt %>%
ACF(Turnover) %>%
autoplot()
#the fifth plot contains more vertical lines than any of the other plots. all the
#lines are the same height pretty much except 12 and 24.
fb <- filter(gafa_stock, Symbol == "FB") %>%
select(Close)
mean(fb$Close)
## [1] 120.4625
sd(fb$Close)
## [1] 41.32364
vecfb <- pull(fb,Close)
diffvec <- diff(vecfb)
diffmean <- mean(diffvec)
print(diffmean)
## [1] 0.06076372
diffsd <- sd(diffvec)
print(diffsd)
## [1] 2.414555
diffskew <- skewness(diffvec)
print(diffskew)
## [1] -3.973192
diffkurt <- kurtosis(diffvec)
print(diffkurt)
## [1] 71.02921
handmean <-sum(vecfb)/length(vecfb)
print(handmean)
## [1] 120.4625
handsd <- sqrt(sum((vecfb - handmean)^2) / (length(vecfb) - 1))
print(handsd)
## [1] 41.32364
handskew <- sum(((vecfb)-handmean)^3) / ((length(vecfb)-1) * (handsd^3))
print(handskew)
## [1] 0.2278433
handkurt <- sum((vecfb)-(handmean^4)) / ((length(vecfb)-1)* (handsd^4))
print(handkurt)
## [1] -72.26996
SO_data <- readxl::read_excel("//Users//mattmullis//Downloads//SO data forecast hw 1.xlsx") %>%
select(Date, AdjClose) %>%
mutate(n=row_number())
my_plot <- filter(SO_data, month(Date)== "6")
ggplot(data=my_plot)+
geom_line(mapping=aes(x=Date, y= AdjClose,group=1))
#d
Jan <- filter(SO_data, month(Date)=="1")
mean(Jan$AdjClose)
## [1] 66.23176
var(Jan$AdjClose)
## [1] 0.3253819
Feb <- filter(SO_data, month(Date)=="2")
mean(Feb$AdjClose)
## [1] 64.42741
var(Feb$AdjClose)
## [1] 3.130927
Mar <- filter(SO_data, month(Date)=="3")
mean(Mar$AdjClose)
## [1] 67.52217
var(Mar$AdjClose)
## [1] 4.374493
Apr <- filter(SO_data, month(Date)=="4")
mean(Apr$AdjClose)
## [1] 73.85194
var(Apr$AdjClose)
## [1] 1.27328
May <- filter(SO_data, month(Date)=="5")
mean(May$AdjClose)
## [1] 73.36229
var(May$AdjClose)
## [1] 1.161209
Jun<- filter(SO_data, month(Date)=="6")
mean(Jun$AdjClose)
## [1] 70.2457
var(Jun$AdjClose)
## [1] 12.23514
Jul <- filter(SO_data, month(Date)=="7")
mean(Jul$AdjClose)
## [1] 71.6892
var(Jul$AdjClose)
## [1] 2.508793
Aug <- filter(SO_data, month(Date)=="8")
mean(Aug$AdjClose)
## [1] 77.94644
var(Aug$AdjClose)
## [1] 1.585646
Sep <- filter(SO_data, month(Date)=="9")
mean(Sep$AdjClose)
## [1] 64.40742
var(Sep$AdjClose)
## [1] 36.58955
Oct <- filter(SO_data, month(Date)=="10")
mean(Oct$AdjClose)
## [1] 60.33739
var(Oct$AdjClose)
## [1] 0.3122135
Nov<- filter(SO_data, month(Date)=="11")
mean(Nov$AdjClose)
## [1] 60.54399
var(Nov$AdjClose)
## [1] 0.3000452
Dec <- filter(SO_data, month(Date)=="12")
mean(Dec$AdjClose)
## [1] 64.03926
var(Dec$AdjClose)
## [1] 3.783693