assignment--1-Daniel-Hanasab.knit

#Overview #This article (https://projects.fivethirtyeight.com/2023-nba-predictions/) are the 538 2022-23 NBA Predictions for win totals and also strength / Elo rankings.

#Findings and Recommendations #We can take these projections and compare them with the actual outcomes of the 2022-2023 season in order to test the accurateness of 538s model

# Install necessary packages if not already installed
if (!require(tidyverse)) install.packages("tidyverse", dependencies = TRUE, repos = "https://cloud.r-project.org")

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

if (!require(readr)) install.packages("readr", dependencies = TRUE, repos = "https://cloud.r-project.org")



install.packages(c("tidyverse", "readr"))

## Warning: packages 'tidyverse', 'readr' are in use and will not be installed

library(tidyverse)
library(readr)


github_url <- "https://raw.githubusercontent.com/DanielSLastname/Data607/main/nba_elo_latest.csv"
nba_data <- read_csv(github_url)

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 1320 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (2): team1, team2
## dbl  (17): season, neutral, elo1_pre, elo2_pre, elo_prob1, elo_prob2, elo1_p...
## lgl   (7): playoff, carm-elo1_pre, carm-elo2_pre, carm-elo_prob1, carm-elo_p...
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

nba_clean <- nba_data %>%
  select(date, season, team1, team2, score1, score2, elo1_post, elo2_post) %>%
  rename(
    Date = date,
    Season = season,
    Team_1 = team1,
    Team_2 = team2,
    Score_Team_1 = score1,
    Score_Team_2 = score2,
    Elo_Team_1 = elo1_post,
    Elo_Team_2 = elo2_post
  )

nba_clean <- nba_clean %>%
  mutate(Date = as.Date(Date))

print("Cleaned Dataset Preview:")

## [1] "Cleaned Dataset Preview:"

head(nba_clean)

## # A tibble: 6 × 8
##   Date       Season Team_1 Team_2 Score_Team_1 Score_Team_2 Elo_Team_1
##   <date>      <dbl> <chr>  <chr>         <dbl>        <dbl>      <dbl>
## 1 2022-10-18   2023 BOS    PHI             126          117      1662.
## 2 2022-10-18   2023 GSW    LAL             123          109      1663.
## 3 2022-10-19   2023 IND    WAS             107          114      1389.
## 4 2022-10-19   2023 DET    ORL             113          109      1397.
## 5 2022-10-19   2023 ATL    HOU             117          107      1538.
## 6 2022-10-19   2023 MEM    NYK             115          112      1608.
## # ℹ 1 more variable: Elo_Team_2 <dbl>

summary(nba_clean)

##       Date                Season        Team_1             Team_2         
##  Min.   :2022-10-18   Min.   :2023   Length:1320        Length:1320       
##  1st Qu.:2022-12-02   1st Qu.:2023   Class :character   Class :character  
##  Median :2023-01-16   Median :2023   Mode  :character   Mode  :character  
##  Mean   :2023-01-19   Mean   :2023                                        
##  3rd Qu.:2023-03-08   3rd Qu.:2023                                        
##  Max.   :2023-06-12   Max.   :2023                                        
##   Score_Team_1    Score_Team_2   Elo_Team_1     Elo_Team_2  
##  Min.   : 80.0   Min.   : 79   Min.   :1257   Min.   :1271  
##  1st Qu.:108.0   1st Qu.:105   1st Qu.:1461   1st Qu.:1460  
##  Median :116.0   Median :113   Median :1522   Median :1525  
##  Mean   :115.6   Mean   :113   Mean   :1511   Mean   :1513  
##  3rd Qu.:124.0   3rd Qu.:121   3rd Qu.:1577   3rd Qu.:1578  
##  Max.   :175.0   Max.   :176   Max.   :1705   Max.   :1719

write_csv(nba_clean, "nba_cleaned.csv")

print("Cleaned dataset saved as 'nba_cleaned.csv'.")

## [1] "Cleaned dataset saved as 'nba_cleaned.csv'."