mydata <- read_excel("../00_data/mydata.xlsx") %>%
janitor::clean_names()
mydata
## # A tibble: 54 × 9
## year winner score runner_up third_place fourth_place location
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 2024 UConn 75.6 Purdue *Alabama *NCState Phoenix
## 2 2023 UConn 76.6 SanDiegoSt. *Miami(FL) *FloridaAtlant… Houston
## 3 2022 Kansas 72.7 NorthCarolina *Villanova *Duke NewOrle…
## 4 2021 Baylor 86.7 Gonzaga *Houston *UCLA Indiana…
## 5 2019 Virginia 85.8 TexasTech *Auburn *MichiganSt. Minneap…
## 6 2018 Villanova 79.6 Michigan *Kansas *LoyolaChicago SanAnto…
## 7 2017 NorthCarolina 71.6 Gonzaga *Oregon *SouthCarolina Phoenix
## 8 2016 Villanova 77.7 NorthCarolina *Oklahoma *Syracuse Houston
## 9 2015 Duke 68.6 Wisconsin *MichiganSt. *Kentucky Indiana…
## 10 2014 UConn 60.5 Kentucky *Florida *Wisconsin Arlingt…
## # ℹ 44 more rows
## # ℹ 2 more variables: most_outstanding_player <chr>, winning_coach <chr>
data_clean <- mydata %>%
select(year, winner, score, runner_up, location ) %>%
slice(1:26)
x1 <- c("UConn","Kansas","Baylor","Virginia")
winner_levels <- c("UConn","Kansas","Baylor","Virginia","Villanova","NorthCarolina","Duke","†Louisville","Kentucky","Uconn ","Florida","Syracuse","Maryland","MichiganSt.")
data_rclean <- data_clean %>%
mutate(winner = winner %>% factor(levels = winner_levels))
Make two bar charts here - one before ordering another after
data_summary <- data_clean %>%
group_by(winner) %>%
summarise(
score = mean(score, na.rm = TRUE),
)
data_summary
## # A tibble: 14 × 2
## winner score
## <chr> <dbl>
## 1 Baylor 86.7
## 2 Duke 71.0
## 3 Florida 79.2
## 4 Kansas 74.2
## 5 Kentucky 73.1
## 6 Maryland 64.5
## 7 MichiganSt. 89.8
## 8 NorthCarolina 79.0
## 9 Syracuse 81.8
## 10 UConn 74.6
## 11 Uconn 53.4
## 12 Villanova 78.7
## 13 Virginia 85.8
## 14 †Louisville 82.8
ggplot(data_summary, aes(score, winner)) + geom_point()
ggplot(data_summary, aes(score, fct_reorder(winner, score))) + geom_point()
Show examples of three functions:
data_clean %>%
mutate(winner = fct_recode(winner,
"Baylor.B" = "Baylor",
"Duke.D" = "Duke",
"Florida.F" = "Florida",
"Kansas.KA" = "Kansas",
"Kentucky.KE" = "Kentucky",
"Maryland.M" = "Maryland",
"MichiganSt..MS" = "MichiganSt.",
"NorthCarolina.NC" = "NorthCarolina",
"Syracuse.S" = "Syracuse",
"UConn.U" = "UConn",
"Uconn.u" = "Uconn",
"Villanova.VN" = "Villanova",
"Virginia.VG" = "Virginia",
"†Louisville.L" = "†Louisville"
)) %>%
count(winner)
## # A tibble: 14 × 2
## winner n
## <fct> <int>
## 1 †Louisville.L 1
## 2 Baylor.B 1
## 3 Duke.D 3
## 4 Florida.F 2
## 5 Kansas.KA 2
## 6 Kentucky.KE 2
## 7 Maryland.M 1
## 8 MichiganSt..MS 1
## 9 NorthCarolina.NC 3
## 10 Syracuse.S 1
## 11 Uconn.u 1
## 12 UConn.U 5
## 13 Villanova.VN 2
## 14 Virginia.VG 1
data_clean %>%
mutate(winner = fct_collapse(winner,
UConn.U = "UConn",
Other = c("†Louisville","Virginia","Villanova","Uconn","Syracuse","NorthCarolina","MichiganSt.","Maryland","Kentucky","Kansas","Florida","Duke","Baylor"))) %>%
count(winner)
## # A tibble: 2 × 2
## winner n
## <fct> <int>
## 1 Other 21
## 2 UConn.U 5
data_clean %>%
mutate(winner = fct_lump(winner, n = 10)) %>%
count(winner, sort = TRUE)
## # A tibble: 14 × 2
## winner n
## <fct> <int>
## 1 UConn 5
## 2 Duke 3
## 3 NorthCarolina 3
## 4 Florida 2
## 5 Kansas 2
## 6 Kentucky 2
## 7 Villanova 2
## 8 †Louisville 1
## 9 Baylor 1
## 10 Maryland 1
## 11 MichiganSt. 1
## 12 Syracuse 1
## 13 Uconn 1
## 14 Virginia 1