autoplot(gafa_stock, Open)
PBS %>%
filter(ATC2 == "A10")%>%
autoplot(Cost)
autoplot(vic_elec, Temperature)
pelt%>%
pivot_longer(c(Hare, Lynx))%>%
autoplot(value)
# historical stock prices (USD) and volume between 2014-2018 for goog, amzn, fb and aapl
# time interval is (trading) days
help(gafa_stock)
## starting httpd help server ... done
# monthly Australian medicare prescription data (number of prescriptions and costs(AUD) )
# time interval is monthly
help(PBS)
# Electricity demand for Victoria, Australia (Demand, temp, holiday indicator)
# time interval of 30 minutes
help(vic_elec)
# Annual fur pelt trading data from 1845 to 1935 (number of hare and lynx pelts traded)
# time interval is one year
help(pelt)
# finds row where closing price is highest for a given ticker in gafa_stock dataset
max_close_price = function(ticker){
stock = gafa_stock%>%
filter(Symbol == ticker)%>%
filter(Close == max(Close))%>%
return()
}
# prints row where closing price is highest for each unique ticker in gafa_stock
tickers = gafa_stock$Symbol%>%
unique()
for (ticker in tickers){
print(max_close_price(ticker))
}
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
tute1 = read.csv('https://raw.githubusercontent.com/schoolkidrich/CUNY_MSDS/main/DATA_624/hw1/tute1.csv')
head(tute1)
## Quarter Sales AdBudget GDP
## 1 1981-03-01 1020.2 659.2 251.8
## 2 1981-06-01 889.2 589.0 290.9
## 3 1981-09-01 795.0 512.5 290.8
## 4 1981-12-01 1003.9 614.1 292.4
## 5 1982-03-01 1057.7 647.2 279.1
## 6 1982-06-01 944.4 602.0 254.0
tute1.series = tute1%>%
mutate(Quarter = yearmonth(Quarter))%>%
as_tsibble(index = Quarter)
head(tute1.series)
## # A tsibble: 6 x 4 [3M]
## Quarter Sales AdBudget GDP
## <mth> <dbl> <dbl> <dbl>
## 1 1981 Mar 1020. 659. 252.
## 2 1981 Jun 889. 589 291.
## 3 1981 Sep 795 512. 291.
## 4 1981 Dec 1004. 614. 292.
## 5 1982 Mar 1058. 647. 279.
## 6 1982 Jun 944. 602 254
when you don’t include facet_grid() all the lines are plotted onto a single graph
tute1.series %>%
pivot_longer(c(Sales,AdBudget,GDP))%>%
ggplot(aes(x = Quarter, y = value, color = name))+
geom_line()+ facet_grid(name ~., scales = "free_y")
library(USgas)
## Warning: package 'USgas' was built under R version 4.0.5
us_total.series = us_total%>%
as_tsibble(index = year, key= state)
head(us_total.series)
## # A tsibble: 6 x 3 [1Y]
## # Key: state [1]
## year state y
## <int> <chr> <int>
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
us_total.series%>%
filter(state == c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island'))%>%
ggplot(aes(x = year, y = y, color = state))+
geom_line()+
facet_grid(state ~., scale = 'free')+
labs(title = 'Annual Gas Consumption for New England Area')
tour = readxl::read_excel('tourism.xlsx')
head(tour)
## # A tibble: 6 x 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
tour.series = tour%>%
mutate(Quarter = yearquarter(Quarter))%>%
as_tsibble(index = Quarter, key = c(Region, Purpose))
head(tour.series)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
# summarize average trip length by region and purpose then finds combination with highest
tour%>%
group_by(Region, Purpose)%>%
summarize(avg_trip_length = mean(Trips), .groups = "keep")%>%
ungroup()%>%
filter(avg_trip_length == max(avg_trip_length))
## # A tibble: 1 x 3
## Region Purpose avg_trip_length
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
trips.state = tour.series%>%
group_by(State)%>%
summarize(total_trips = sum(Trips))
head(trips.state)
## # A tsibble: 6 x 3 [1Q]
## # Key: State [1]
## State Quarter total_trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
set.seed(8009)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Aust~ Clothing, footwear and personal a~ A3349825J 1982 Apr 28.8
## 2 Western Aust~ Clothing, footwear and personal a~ A3349825J 1982 May 32.1
## 3 Western Aust~ Clothing, footwear and personal a~ A3349825J 1982 Jun 28.5
## 4 Western Aust~ Clothing, footwear and personal a~ A3349825J 1982 Jul 29
## 5 Western Aust~ Clothing, footwear and personal a~ A3349825J 1982 Aug 25.3
## 6 Western Aust~ Clothing, footwear and personal a~ A3349825J 1982 Sep 26.9
overall, turnover has steadily increased over time but has plateaued since ~2010
myseries%>%
autoplot(Turnover)
Turnover seems to always be highest during November - December. This trend has always been the case, but in recent years the spike in turnover has increased. In recent years, there is also a dip in turnover around February and a smaller spike around may
myseries%>%
gg_season(Turnover)