# excel file
mydata <- read_excel("../00_data/mydata.xlsx") %>%
janitor::clean_names()
mydata
## # A tibble: 56 × 9
## year winner score runner_up third_place fourth_place location
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 2025 <NA> 0 <NA> <NA> <NA> SanAnto…
## 2 2024 UConn 75.6 Purdue *Alabama *NCState Phoenix
## 3 2023 UConn 76.6 SanDiegoSt. *Miami(FL) *FloridaAtlantic Houston
## 4 2022 Kansas 72.7 NorthCarolina *Villanova *Duke NewOrle…
## 5 2021 Baylor 86.7 Gonzaga *Houston *UCLA Indiana…
## 6 2020 <NA> 0 <NA> <NA> <NA> <NA>
## 7 2019 Virginia 85.8 TexasTech *Auburn *MichiganSt. Minneap…
## 8 2018 Villanova 79.6 Michigan *Kansas *LoyolaChicago SanAnto…
## 9 2017 NorthCarolina 71.6 Gonzaga *Oregon *SouthCarolina Phoenix
## 10 2016 Villanova 77.7 NorthCarolina *Oklahoma *Syracuse Houston
## # ℹ 46 more rows
## # ℹ 2 more variables: most_outstanding_player <chr>, winning_coach <chr>
data_clean <- mydata %>%
select(winner, year, runner_up, score) %>%
slice(2:26)
# set.seed(1234)
# data_clean <- mydata %>%
# select(winner, year, runner_up) %>%
# slice(2:10)
#
# data_clean
mydata
data_clean %>%
pivot_wider(names_from = winner,
values_from = year)
## # A tibble: 25 × 17
## runner_up score UConn Kansas Baylor `NA` Virginia Villanova NorthCarolina
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Purdue 75.6 2024 NA NA NA NA NA NA
## 2 SanDiegoSt. 76.6 2023 NA NA NA NA NA NA
## 3 NorthCaroli… 72.7 NA 2022 NA NA NA NA NA
## 4 Gonzaga 86.7 NA NA 2021 NA NA NA NA
## 5 <NA> 0 NA NA NA 2020 NA NA NA
## 6 TexasTech 85.8 NA NA NA NA 2019 NA NA
## 7 Michigan 79.6 NA NA NA NA NA 2018 NA
## 8 Gonzaga 71.6 NA NA NA NA NA NA 2017
## 9 NorthCaroli… 77.7 NA NA NA NA NA 2016 NA
## 10 Wisconsin 68.6 NA NA NA NA NA NA NA
## # ℹ 15 more rows
## # ℹ 8 more variables: Duke <dbl>, `†Louisville` <dbl>, Kentucky <dbl>,
## # Uconn <dbl>, Florida <dbl>, Syracuse <dbl>, Maryland <dbl>,
## # MichiganSt. <dbl>
data_clean <- data_clean %>%
separate(col = score, into = c("winner_score", "runner_up_score"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [5].
data_clean %>%
unite(col = "score", c(winner_score:runner_up_score), sep = ".", )
## # A tibble: 25 × 4
## winner year runner_up score
## <chr> <dbl> <chr> <chr>
## 1 UConn 2024 Purdue 75.6
## 2 UConn 2023 SanDiegoSt. 76.59
## 3 Kansas 2022 NorthCarolina 72.69
## 4 Baylor 2021 Gonzaga 86.7
## 5 <NA> 2020 <NA> 0.NA
## 6 Virginia 2019 TexasTech 85.77
## 7 Villanova 2018 Michigan 79.62
## 8 NorthCarolina 2017 Gonzaga 71.65
## 9 Villanova 2016 NorthCarolina 77.74
## 10 Duke 2015 Wisconsin 68.63
## # ℹ 15 more rows