## Homework 7: Time Series Analysis

Loading the Data

# load data
co2_data <- as_tsibble(co2, index = yearmonth)

# inspect the data
glimpse(co2_data)
## Rows: 468
## Columns: 2
## $ index <mth> 1959 Jan, 1959 Feb, 1959 Mar, 1959 Apr, 1959 May, 1959 Jun, 1959…
## $ value <dbl> 315.42, 316.31, 316.50, 317.56, 318.13, 318.00, 316.39, 314.65, …

Data Preprocessing

# check for missing data
colSums(is.na(co2_data))
## index value 
##     0     0
# inspect preprocessed data
glimpse(co2_data)
## Rows: 468
## Columns: 2
## $ index <mth> 1959 Jan, 1959 Feb, 1959 Mar, 1959 Apr, 1959 May, 1959 Jun, 1959…
## $ value <dbl> 315.42, 316.31, 316.50, 317.56, 318.13, 318.00, 316.39, 314.65, …

Building the Time Series Model

# set seed
set.seed(123)

# build the time series model
ts_model <- auto.arima(co2_data$value)

# print the model
print(ts_model)
## Series: co2_data$value 
## ARIMA(2,1,1) with drift 
## 
## Coefficients:
##          ar1      ar2      ma1   drift
##       1.5519  -0.8550  -0.9272  0.1055
## s.e.  0.0239   0.0237   0.0129  0.0070
## 
## sigma^2 = 0.3808:  log likelihood = -436.73
## AIC=883.47   AICc=883.6   BIC=904.2

Model Evaluation

# forecast the next 12 months
forecast_values <- forecast(ts_model, h = 12)

# calculate accuracy metrics
accuracy_metrics <- accuracy(forecast_values)
print(accuracy_metrics)
##                       ME     RMSE       MAE          MPE     MAPE      MASE
## Training set -0.00250167 0.613772 0.4985692 -0.002107331 0.148101 0.4632363
##                    ACF1
## Training set -0.0192513

Visualization

# convert forecast to a data frame for plotting
forecast_df <- as.data.frame(forecast_values)
forecast_df$date <- seq(as.Date("1961-01-01"), by = "month", length.out = 12)

# plot the original time series data
ggplot(co2_data, aes(x = index, y = value)) + 
  geom_line(color = "blue") +
  geom_line(data = forecast_df, aes(x = date, y = `Point Forecast`), color = "red") +
  labs(title = "CO2 Time Series Forecast",
       x = "Date",
       y = "CO2 Concentrations") +
  theme_minimal()