mydata <- read_excel("../00_data/mydata.xlsx") %>%
janitor::clean_names()
mydata
## # A tibble: 56 × 9
## year winner score runner_up third_place fourth_place location
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 2025 <NA> 0 <NA> <NA> <NA> SanAnto…
## 2 2024 UConn 75.6 Purdue *Alabama *NCState Phoenix
## 3 2023 UConn 76.6 SanDiegoSt. *Miami(FL) *FloridaAtlantic Houston
## 4 2022 Kansas 72.7 NorthCarolina *Villanova *Duke NewOrle…
## 5 2021 Baylor 86.7 Gonzaga *Houston *UCLA Indiana…
## 6 2020 <NA> 0 <NA> <NA> <NA> <NA>
## 7 2019 Virginia 85.8 TexasTech *Auburn *MichiganSt. Minneap…
## 8 2018 Villanova 79.6 Michigan *Kansas *LoyolaChicago SanAnto…
## 9 2017 NorthCarolina 71.6 Gonzaga *Oregon *SouthCarolina Phoenix
## 10 2016 Villanova 77.7 NorthCarolina *Oklahoma *Syracuse Houston
## # ℹ 46 more rows
## # ℹ 2 more variables: most_outstanding_player <chr>, winning_coach <chr>
data_clean <- mydata %>%
select(winner, year, runner_up, score) %>%
slice(2:26)
mydata$winner
## [1] NA "UConn" "UConn" "Kansas"
## [5] "Baylor" NA "Virginia" "Villanova"
## [9] "NorthCarolina" "Villanova" "Duke" "UConn"
## [13] "†Louisville" "Kentucky" "Uconn" "Duke"
## [17] "NorthCarolina" "Kansas" "Florida" "Florida"
## [21] "NorthCarolina" "UConn" "Syracuse" "Maryland"
## [25] "Duke" "MichiganSt." "UConn" "Kentucky"
## [29] "Arizona" "Kentucky" "UCLA" "Arkansas"
## [33] "NorthCarolina" "Duke" "Duke" "UNLV"
## [37] "Michigan" "Kansas" "Indiana" "Louisville"
## [41] "Villanova" "Georgetown" "NCState" "NorthCarolina"
## [45] "Indiana" "Louisville" "MichiganSt." "Kentucky"
## [49] "Marquette" "Indiana" "UCLA" "NCState"
## [53] "UCLA" "UCLA" "UCLA" "UCLA"
str_detect(data_clean$winner, "UConn")
## [1] TRUE TRUE FALSE FALSE NA FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [25] FALSE
sum(str_detect(data_clean$winner, "UConn"))
## [1] NA
mydata %>%
summarise(num_UConn = sum(str_detect(winner, "UConn"), na.rm = TRUE))
## # A tibble: 1 × 1
## num_UConn
## <int>
## 1 5
mydata %>%
mutate(col_UConn = str_extract(winner, "UConn")) %>%
select(winner, col_UConn) %>%
filter(is.na(col_UConn))
## # A tibble: 51 × 2
## winner col_UConn
## <chr> <chr>
## 1 <NA> <NA>
## 2 Kansas <NA>
## 3 Baylor <NA>
## 4 <NA> <NA>
## 5 Virginia <NA>
## 6 Villanova <NA>
## 7 NorthCarolina <NA>
## 8 Villanova <NA>
## 9 Duke <NA>
## 10 †Louisville <NA>
## # ℹ 41 more rows
mydata %>%
mutate(col_Duke = str_replace(winner, "UConn", "Duke")) %>%
select(winner, col_Duke)
## # A tibble: 56 × 2
## winner col_Duke
## <chr> <chr>
## 1 <NA> <NA>
## 2 UConn Duke
## 3 UConn Duke
## 4 Kansas Kansas
## 5 Baylor Baylor
## 6 <NA> <NA>
## 7 Virginia Virginia
## 8 Villanova Villanova
## 9 NorthCarolina NorthCarolina
## 10 Villanova Villanova
## # ℹ 46 more rows