#Overview #This article (https://projects.fivethirtyeight.com/2023-nba-predictions/) are the 538 2022-23 NBA Predictions for win totals and also strength / Elo rankings.
#Findings and Recommendations #We can take these projections and compare them with the actual outcomes of the 2022-2023 season in order to test the accurateness of 538s model
# Install necessary packages if not already installed
if (!require(tidyverse)) install.packages("tidyverse", dependencies = TRUE, repos = "https://cloud.r-project.org")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require(readr)) install.packages("readr", dependencies = TRUE, repos = "https://cloud.r-project.org")
install.packages(c("tidyverse", "readr"))
## Warning: packages 'tidyverse', 'readr' are in use and will not be installed
library(tidyverse)
library(readr)
github_url <- "https://raw.githubusercontent.com/DanielSLastname/Data607/main/nba_elo_latest.csv"
nba_data <- read_csv(github_url)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 1320 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): team1, team2
## dbl (17): season, neutral, elo1_pre, elo2_pre, elo_prob1, elo_prob2, elo1_p...
## lgl (7): playoff, carm-elo1_pre, carm-elo2_pre, carm-elo_prob1, carm-elo_p...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nba_clean <- nba_data %>%
select(date, season, team1, team2, score1, score2, elo1_post, elo2_post) %>%
rename(
Date = date,
Season = season,
Team_1 = team1,
Team_2 = team2,
Score_Team_1 = score1,
Score_Team_2 = score2,
Elo_Team_1 = elo1_post,
Elo_Team_2 = elo2_post
)
nba_clean <- nba_clean %>%
mutate(Date = as.Date(Date))
print("Cleaned Dataset Preview:")
## [1] "Cleaned Dataset Preview:"
head(nba_clean)
## # A tibble: 6 × 8
## Date Season Team_1 Team_2 Score_Team_1 Score_Team_2 Elo_Team_1
## <date> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2022-10-18 2023 BOS PHI 126 117 1662.
## 2 2022-10-18 2023 GSW LAL 123 109 1663.
## 3 2022-10-19 2023 IND WAS 107 114 1389.
## 4 2022-10-19 2023 DET ORL 113 109 1397.
## 5 2022-10-19 2023 ATL HOU 117 107 1538.
## 6 2022-10-19 2023 MEM NYK 115 112 1608.
## # ℹ 1 more variable: Elo_Team_2 <dbl>
summary(nba_clean)
## Date Season Team_1 Team_2
## Min. :2022-10-18 Min. :2023 Length:1320 Length:1320
## 1st Qu.:2022-12-02 1st Qu.:2023 Class :character Class :character
## Median :2023-01-16 Median :2023 Mode :character Mode :character
## Mean :2023-01-19 Mean :2023
## 3rd Qu.:2023-03-08 3rd Qu.:2023
## Max. :2023-06-12 Max. :2023
## Score_Team_1 Score_Team_2 Elo_Team_1 Elo_Team_2
## Min. : 80.0 Min. : 79 Min. :1257 Min. :1271
## 1st Qu.:108.0 1st Qu.:105 1st Qu.:1461 1st Qu.:1460
## Median :116.0 Median :113 Median :1522 Median :1525
## Mean :115.6 Mean :113 Mean :1511 Mean :1513
## 3rd Qu.:124.0 3rd Qu.:121 3rd Qu.:1577 3rd Qu.:1578
## Max. :175.0 Max. :176 Max. :1705 Max. :1719
write_csv(nba_clean, "nba_cleaned.csv")
print("Cleaned dataset saved as 'nba_cleaned.csv'.")
## [1] "Cleaned dataset saved as 'nba_cleaned.csv'."