Has forecasting accuracy improved over time?

Lorem ipsum dolor sit amet.

## Warning in geom_line(fill = "steelblue", width = 0.8): Ignoring unknown
## parameters: `fill` and `width`


df <- final_df

df <- df %>%
  mutate(
    actual_date = as.Date(actual_date),
    month = month(actual_date, label=TRUE, abbr=TRUE) # Gives e.g. "Apr"
  )

# Create error columns for min and max temperature
df <- df %>%
  filter(state == "WA") %>%
  mutate(
    error_min_5day = min_pred_5day - minpresent,
    error_max_5day = max_pred_5day - maxpresent
  )

summary_errors <- df %>%
  group_by(state, month) %>%
  summarise(
    MAE_min = mean(abs(error_min_5day), na.rm = TRUE),
    MAE_max = mean(abs(error_max_5day), na.rm = TRUE),
    n = n()
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.
ggplot(summary_errors, aes(x=month, y=MAE_min, fill=state)) +
  geom_bar(stat="identity", position="dodge") +
  labs(title="Forecast Accuracy: 5-day Min Temp by State & Month",
       y="Mean Absolute Error (Degrees)",
       x="Month") +
  theme_minimal()

ggplot(summary_errors, aes(x=month, y=MAE_max, fill=state)) +
  geom_bar(stat="identity", position="dodge") +
  labs(title="Forecast Accuracy: 5-day Max Temp by State & Month",
       y="Mean Absolute Error (Degrees)",
       x="Month") +
  theme_minimal()

ggplot(df, aes(x=min_pred_5day, y=minpresent, color=state)) +
  geom_point(alpha=0.5) +
  facet_wrap(~month) +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  labs(title="5-Day Forecast vs Actual Min Temp",
       x="Predicted Min (5 day)", y="Actual Min") +
  theme_minimal()

ggplot(df, aes(x=max_pred_5day, y=maxpresent, color=state)) +
  geom_point(alpha=0.5) +
  facet_wrap(~month) +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  labs(title="5-Day Forecast vs Actual Max Temp",
       x="Predicted Max (5 day)", y="Actual Max") +
  theme_minimal()

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.