team_results <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-03-26/team-results.csv')
## Rows: 236 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TEAM, F4PERCENT, CHAMPPERCENT
## dbl (17): TEAMID, PAKE, PAKERANK, PASE, PASERANK, GAMES, W, L, WINPERCENT, R...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
public_picks <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-03-26/public-picks.csv')
## Rows: 64 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): TEAM, R64, R32, S16, E8, F4, FINALS
## dbl (2): YEAR, TEAMNO
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Teams containing "St." or "State"
state_teams <- team_results |>
filter(str_detect(TEAM, "St\\.|State"))
state_teams |>
select(TEAM, CHAMPPERCENT)
## # A tibble: 46 × 2
## TEAM CHAMPPERCENT
## <chr> <chr>
## 1 Arizona St. 1.60%
## 2 Boise St. 0.20%
## 3 Cal St. Bakersfield 0.00%
## 4 Cal St. Fullerton 0.00%
## 5 Cal St. Northridge 0.00%
## 6 Cleveland St. 0.10%
## 7 Colorado St. 0.30%
## 8 East Tennessee St. 0.00%
## 9 Florida St. 4.60%
## 10 Fresno St. 0.00%
## # ℹ 36 more rows
# Teams whose names begin with North or South
north_south_teams <- public_picks |>
filter(str_detect(TEAM, "^(North|South)"))
north_south_teams |>
select(TEAM, R64, FINALS)
## # A tibble: 5 × 3
## TEAM R64 FINALS
## <chr> <chr> <chr>
## 1 North Carolina 97.55% 12.10%
## 2 North Carolina St. 43.55% 0.07%
## 3 Northwestern 43.89% 0.08%
## 4 South Carolina 50.79% 0.14%
## 5 South Dakota St. 4.02% 0.04%
# Extract numeric part of championship percentage
team_results_numeric <- team_results |>
mutate(
champ_percent_num = str_extract(CHAMPPERCENT, "[0-9.]+") |> as.numeric()
)
team_results_numeric |>
select(TEAM, CHAMPPERCENT, champ_percent_num) |>
head()
## # A tibble: 6 × 3
## TEAM CHAMPPERCENT champ_percent_num
## <chr> <chr> <dbl>
## 1 Abilene Christian 0.00% 0
## 2 Akron 0.00% 0
## 3 Alabama 15.40% 15.4
## 4 Albany 0.00% 0
## 5 American 0.00% 0
## 6 Arizona 36.10% 36.1
# Extract the first word of each team name
public_picks_words <- public_picks |>
mutate(
first_word = str_extract(TEAM, "^[^ ]+")
)
public_picks_words |>
select(TEAM, first_word) |>
head()
## # A tibble: 6 × 2
## TEAM first_word
## <chr> <chr>
## 1 Akron Akron
## 2 Alabama Alabama
## 3 Arizona Arizona
## 4 Auburn Auburn
## 5 Baylor Baylor
## 6 BYU BYU
# Remove % signs from probability columns
public_picks_clean <- public_picks |>
mutate(
across(R64:FINALS,
~ str_replace(.x, "%", "") |> as.numeric())
)
public_picks_clean |>
select(TEAM, R64, FINALS) |>
head()
## # A tibble: 6 × 3
## TEAM R64 FINALS
## <chr> <dbl> <dbl>
## 1 Akron 7.16 0.04
## 2 Alabama 87.1 1.1
## 3 Arizona 95.2 4.06
## 4 Auburn 90.2 1.34
## 5 Baylor 92.3 1.83
## 6 BYU 75.2 0.32
# Replace "St." with "State" in team names
public_picks_standardized <- public_picks |>
mutate(
TEAM = str_replace_all(TEAM, "St\\.", "State")
)
public_picks_standardized |>
select(TEAM) |>
head()
## # A tibble: 6 × 1
## TEAM
## <chr>
## 1 Akron
## 2 Alabama
## 3 Arizona
## 4 Auburn
## 5 Baylor
## 6 BYU