library(readr)
## Warning: package 'readr' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
forecast_data <- read_csv("C:/Users/Tugg99/Downloads/forecast_history.csv")
## New names:
## • `` -> `...1`
## Rows: 14 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Median house price, Westpac: 4 year forecast, Joe Bloggs: 2 year fo...
## dbl (1): ...1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
forecast_data <- forecast_data %>%
  mutate(
    `Median house price` = as.numeric(gsub(",", "", `Median house price`)), # Convert house prices to numeric
    across(contains("forecast"), ~ as.numeric(gsub("[^0-9.-]", "", .)) / 100) # Convert forecasts to numeric
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Median house price = as.numeric(gsub(",", "", `Median house
##   price`))`.
## Caused by warning:
## ! NAs introduced by coercion
forecast_data <- forecast_data %>%
  mutate(
    Westpac_forecasted_price = `Median house price` * (1 + `Westpac: 4 year forecast`),
    Joe_Bloggs_forecasted_price = `Median house price` * (1 + `Joe Bloggs: 2 year forecast`),
    Harry_Spent_forecasted_price = `Median house price` * (1 + `Harry Spent: 5 year forecast`)
  )

forecast_data <- forecast_data %>%
  mutate(
    Westpac_error = abs(Westpac_forecasted_price - `Median house price`),
    Joe_Bloggs_error = abs(Joe_Bloggs_forecasted_price - `Median house price`),
    Harry_Spent_error = abs(Harry_Spent_forecasted_price - `Median house price`)
  )

colnames(forecast_data)
##  [1] "...1"                         "Median house price"          
##  [3] "Westpac: 4 year forecast"     "Joe Bloggs: 2 year forecast" 
##  [5] "Harry Spent: 5 year forecast" "Westpac_forecasted_price"    
##  [7] "Joe_Bloggs_forecasted_price"  "Harry_Spent_forecasted_price"
##  [9] "Westpac_error"                "Joe_Bloggs_error"            
## [11] "Harry_Spent_error"
mae_westpac <- mean(forecast_data$Westpac_error, na.rm = TRUE)
mae_joe_bloggs <- mean(forecast_data$Joe_Bloggs_error, na.rm = TRUE)
mae_harry_spent <- mean(forecast_data$Harry_Spent_error, na.rm = TRUE)

print(paste("MAE Westpac:", mae_westpac))
## [1] "MAE Westpac: 132970"
print(paste("MAE Joe Bloggs:", mae_joe_bloggs))
## [1] "MAE Joe Bloggs: 686207.692307692"
print(paste("MAE Harry Spent:", mae_harry_spent))
## [1] "MAE Harry Spent: 326608.333333333"
forecast_data_long <- forecast_data %>%
  pivot_longer(
    cols = contains("error"),
    names_to = "Forecaster",
    values_to = "Error"
  )

# Update `...1` with the correct year column name based on your data
year_column_name <- colnames(forecast_data)[1]

ggplot(forecast_data_long, aes_string(x = year_column_name, y = "Error", color = "Forecaster")) +
  geom_line() +
  labs(title = "Forecast Accuracy Over Time", x = "Year", y = "Absolute Error") +
  theme_minimal()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).