library(readr)
## Warning: package 'readr' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
forecast_data <- read_csv("C:/Users/Tugg99/Downloads/forecast_history.csv")
## New names:
## • `` -> `...1`
## Rows: 14 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Median house price, Westpac: 4 year forecast, Joe Bloggs: 2 year fo...
## dbl (1): ...1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
forecast_data <- forecast_data %>%
mutate(
`Median house price` = as.numeric(gsub(",", "", `Median house price`)), # Convert house prices to numeric
across(contains("forecast"), ~ as.numeric(gsub("[^0-9.-]", "", .)) / 100) # Convert forecasts to numeric
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Median house price = as.numeric(gsub(",", "", `Median house
## price`))`.
## Caused by warning:
## ! NAs introduced by coercion
forecast_data <- forecast_data %>%
mutate(
Westpac_forecasted_price = `Median house price` * (1 + `Westpac: 4 year forecast`),
Joe_Bloggs_forecasted_price = `Median house price` * (1 + `Joe Bloggs: 2 year forecast`),
Harry_Spent_forecasted_price = `Median house price` * (1 + `Harry Spent: 5 year forecast`)
)
forecast_data <- forecast_data %>%
mutate(
Westpac_error = abs(Westpac_forecasted_price - `Median house price`),
Joe_Bloggs_error = abs(Joe_Bloggs_forecasted_price - `Median house price`),
Harry_Spent_error = abs(Harry_Spent_forecasted_price - `Median house price`)
)
colnames(forecast_data)
## [1] "...1" "Median house price"
## [3] "Westpac: 4 year forecast" "Joe Bloggs: 2 year forecast"
## [5] "Harry Spent: 5 year forecast" "Westpac_forecasted_price"
## [7] "Joe_Bloggs_forecasted_price" "Harry_Spent_forecasted_price"
## [9] "Westpac_error" "Joe_Bloggs_error"
## [11] "Harry_Spent_error"
mae_westpac <- mean(forecast_data$Westpac_error, na.rm = TRUE)
mae_joe_bloggs <- mean(forecast_data$Joe_Bloggs_error, na.rm = TRUE)
mae_harry_spent <- mean(forecast_data$Harry_Spent_error, na.rm = TRUE)
print(paste("MAE Westpac:", mae_westpac))
## [1] "MAE Westpac: 132970"
print(paste("MAE Joe Bloggs:", mae_joe_bloggs))
## [1] "MAE Joe Bloggs: 686207.692307692"
print(paste("MAE Harry Spent:", mae_harry_spent))
## [1] "MAE Harry Spent: 326608.333333333"
forecast_data_long <- forecast_data %>%
pivot_longer(
cols = contains("error"),
names_to = "Forecaster",
values_to = "Error"
)
# Update `...1` with the correct year column name based on your data
year_column_name <- colnames(forecast_data)[1]
ggplot(forecast_data_long, aes_string(x = year_column_name, y = "Error", color = "Forecaster")) +
geom_line() +
labs(title = "Forecast Accuracy Over Time", x = "Year", y = "Absolute Error") +
theme_minimal()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).
