Objective

This assignment explores time series data to uncover trends, patterns, and cycles across various datasets. Through data analysis and visualization, it highlights real-world fluctuations and emphasizes the importance of proper data preparation for accurate insights.

Load Required Packages

Exercise 2.1: Exploring Four Time Series

# Check dataset structure
glimpse(aus_production)

## Rows: 218
## Columns: 7
## $ Quarter     <qtr> 1956 Q1, 1956 Q2, 1956 Q3, 1956 Q4, 1957 Q1, 1957 Q2, 1957…
## $ Beer        <dbl> 284, 213, 227, 308, 262, 228, 236, 320, 272, 233, 237, 313…
## $ Tobacco     <dbl> 5225, 5178, 5297, 5681, 5577, 5651, 5317, 6152, 5758, 5641…
## $ Bricks      <dbl> 189, 204, 208, 197, 187, 214, 227, 222, 199, 229, 249, 234…
## $ Cement      <dbl> 465, 532, 561, 570, 529, 604, 603, 582, 554, 620, 646, 637…
## $ Electricity <dbl> 3923, 4436, 4806, 4418, 4339, 4811, 5259, 4735, 4608, 5196…
## $ Gas         <dbl> 5, 6, 7, 6, 5, 7, 7, 6, 5, 7, 8, 6, 5, 7, 8, 6, 6, 8, 8, 7…

glimpse(pelt)

## Rows: 91
## Columns: 3
## $ Year <dbl> 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855,…
## $ Hare <dbl> 19580, 19600, 19610, 11990, 28040, 58000, 74600, 75090, 88480, 61…
## $ Lynx <dbl> 30090, 45150, 49150, 39520, 21230, 8420, 5560, 5080, 10170, 19600…

glimpse(gafa_stock)

## Rows: 5,032
## Columns: 8
## Key: Symbol [4]
## $ Symbol    <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAP…
## $ Date      <date> 2014-01-02, 2014-01-03, 2014-01-06, 2014-01-07, 2014-01-08,…
## $ Open      <dbl> 79.38286, 78.98000, 76.77857, 77.76000, 76.97285, 78.11429, …
## $ High      <dbl> 79.57571, 79.10000, 78.11429, 77.99429, 77.93714, 78.12286, …
## $ Low       <dbl> 78.86000, 77.20428, 76.22857, 76.84571, 76.95571, 76.47857, …
## $ Close     <dbl> 79.01857, 77.28286, 77.70428, 77.14857, 77.63715, 76.64571, …
## $ Adj_Close <dbl> 66.96433, 65.49342, 65.85053, 65.37959, 65.79363, 64.95345, …
## $ Volume    <dbl> 58671200, 98116900, 103152700, 79302300, 64632400, 69787200,…

glimpse(vic_elec)

## Rows: 52,608
## Columns: 5
## $ Time        <dttm> 2012-01-01 00:00:00, 2012-01-01 00:30:00, 2012-01-01 01:0…
## $ Demand      <dbl> 4382.825, 4263.366, 4048.966, 3877.563, 4036.230, 3865.597…
## $ Temperature <dbl> 21.40, 21.05, 20.70, 20.55, 20.40, 20.25, 20.10, 19.60, 19…
## $ Date        <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-0…
## $ Holiday     <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…

# Generate time series plots
autoplot(aus_production, Bricks) +
  ggtitle("Bricks Production in Australia")

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

autoplot(pelt, Lynx) +
  ggtitle("Lynx Pelts Data")

autoplot(gafa_stock, Close) +
  ggtitle("Stock Closing Prices")

autoplot(vic_elec, Demand) +
  ggtitle("Electricity Demand in Victoria") +  
  xlab("Year") +  
  ylab("Electricity Demand (MW)") +  
  theme_minimal()

Exercise 2.2: Finding Peak Closing Prices

Find the peak closing price for each stock in gafa_stock

gafa_stock %>%
  group_by(Symbol) %>%
  filter(Close == max(Close)) %>%
  select(Date, Symbol, Close)

## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Date       Symbol Close
##   <date>     <chr>  <dbl>
## 1 2018-10-03 AAPL    232.
## 2 2018-09-04 AMZN   2040.
## 3 2018-07-25 FB      218.
## 4 2018-07-26 GOOG   1268.

Exercise 2.3: Analyzing tute1.csv Data

# Load CSV file
tute1 <- readr::read_csv("tute1.csv")

## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Check column names
glimpse(tute1)

## Rows: 100
## Columns: 4
## $ Quarter  <date> 1981-03-01, 1981-06-01, 1981-09-01, 1981-12-01, 1982-03-01, …
## $ Sales    <dbl> 1020.2, 889.2, 795.0, 1003.9, 1057.7, 944.4, 778.5, 932.5, 99…
## $ AdBudget <dbl> 659.2, 589.0, 512.5, 614.1, 647.2, 602.0, 530.7, 608.4, 637.9…
## $ GDP      <dbl> 251.8, 290.9, 290.8, 292.4, 279.1, 254.0, 295.6, 271.7, 259.6…

# Convert data to time series
tute1_ts <- tute1 %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(index = Quarter)

# Plot all three series
autoplot(tute1_ts, vars(Sales, AdBudget, GDP)) +
  ggtitle("Quarterly Time Series: Sales, AdBudget, and GDP")

# Reshape data for facet_grid()
tute1_ts_long <- tute1_ts %>%
  pivot_longer(cols = c(Sales, AdBudget, GDP), names_to = "name", values_to = "value")

# Verify structure
glimpse(tute1_ts_long)

## Rows: 300
## Columns: 3
## Key: name [3]
## $ Quarter <qtr> 1981 Q1, 1981 Q1, 1981 Q1, 1981 Q2, 1981 Q2, 1981 Q2, 1981 Q3,…
## $ name    <chr> "Sales", "AdBudget", "GDP", "Sales", "AdBudget", "GDP", "Sales…
## $ value   <dbl> 1020.2, 659.2, 251.8, 889.2, 589.0, 290.9, 795.0, 512.5, 290.8…

# Plot with facet_grid()
tute1_ts_long %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") +
  labs(title = "Quarterly Trends: Sales, AdBudget, and GDP",
       x = "Year", y = "Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

##### Exercise 2.4: US Natural Gas Consumption

# Convert US gas data to tsibble
us_gas_ts <- us_total %>%
  as_tsibble(index = year, key = state)

# Check column names
glimpse(us_gas_ts)

## Rows: 1,266
## Columns: 3
## Key: state [53]
## $ year  <int> 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007…
## $ state <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama"…
## $ y     <int> 324158, 329134, 337270, 353614, 332693, 379343, 350345, 382367, …

# Filter for New England states and plot using correct column
us_gas_ts %>%
  filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) %>%
  ggplot(aes(x = year, y = y, colour = state)) +  # Use correct column name
  geom_line() +
  ggtitle("Annual Natural Gas Consumption in New England")

Exercise 2.5: Tourism Data Analysis

# Load tourism data
tourism_data <- readxl::read_excel("tourism.xlsx")

library(lubridate)  # Ensure lubridate is loaded

# Convert Quarter to yearquarter format
tourism_data <- tourism_data %>%
  mutate(Quarter = yearquarter(ymd(Quarter)))  # Convert character to proper year-quarter format

# Convert to tsibble
tourism_ts <- tourism_data %>%
  as_tsibble(index = Quarter, key = c(Region, State, Purpose))

# Verify structure
glimpse(tourism_ts)

## Rows: 24,320
## Columns: 5
## Key: Region, State, Purpose [304]
## $ Quarter <qtr> 1998 Q1, 1998 Q2, 1998 Q3, 1998 Q4, 1999 Q1, 1999 Q2, 1999 Q3,…
## $ Region  <chr> "Adelaide", "Adelaide", "Adelaide", "Adelaide", "Adelaide", "A…
## $ State   <chr> "South Australia", "South Australia", "South Australia", "Sout…
## $ Purpose <chr> "Business", "Business", "Business", "Business", "Business", "B…
## $ Trips   <dbl> 135.0777, 109.9873, 166.0347, 127.1605, 137.4485, 199.9126, 16…

#####Exercise 2.8: Analyzing Time Series Features

#### Total Private Employment

glimpse(us_employment)

## Rows: 143,412
## Columns: 4
## Key: Series_ID [148]
## $ Month     <mth> 1939 Jan, 1939 Feb, 1939 Mar, 1939 Apr, 1939 May, 1939 Jun, …
## $ Series_ID <chr> "CEU0500000001", "CEU0500000001", "CEU0500000001", "CEU05000…
## $ Title     <chr> "Total Private", "Total Private", "Total Private", "Total Pr…
## $ Employed  <dbl> 25338, 25447, 25833, 25801, 26113, 26485, 26481, 26848, 2746…

# Filter only "Total Private" employment data before plotting
us_employment %>%
  filter(Title == "Total Private") %>%  # Select only relevant data
  autoplot(Employed) +
  ggtitle("Total Private Employment Over Time")

### Bricks Production

autoplot(aus_production, Bricks) +
  ggtitle("Bricks Production Over Time")

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

#### Hare Population

autoplot(pelt, Hare) +
  ggtitle("Hare Population Over Time")

#### HO2 Cost From PBS

PBS %>%
  filter(ATC2 == "H02") %>%
  autoplot(Cost) +
  ggtitle("H02 Cost Over Time")

### US Gasoline Barrels

autoplot(us_gasoline, Barrels) +
  ggtitle("US Gasoline Barrels Over Time")

### Conclusion This assignment helped me understand time series analysis by exploring trends, seasonality, and cycles in different datasets. I observed upward trends in employment and stock prices, seasonal patterns in electricity demand and bricks production, and cyclic fluctuations in animal populations.

I also learned the importance of data preprocessing, such as handling duplicates and ensuring correct formats. Moving forward, I would explore forecasting techniques to predict future trends. Overall, this assignment gave me a better understanding of how time series data can be used to analyze real-world patterns and inform decision-making.

Data 624 HW 1

Sheriann McLarty

2025-02-10