Import your data

team_results <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-03-26/team-results.csv')
## Rows: 236 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): TEAM, F4PERCENT, CHAMPPERCENT
## dbl (17): TEAMID, PAKE, PAKERANK, PASE, PASERANK, GAMES, W, L, WINPERCENT, R...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
public_picks <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-03-26/public-picks.csv')
## Rows: 64 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): TEAM, R64, R32, S16, E8, F4, FINALS
## dbl (2): YEAR, TEAMNO
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 14

Tools

Detect matches

# Teams containing "St." or "State"
state_teams <- team_results |>
  filter(str_detect(TEAM, "St\\.|State"))

state_teams |>
  select(TEAM, CHAMPPERCENT)
## # A tibble: 46 × 2
##    TEAM                CHAMPPERCENT
##    <chr>               <chr>       
##  1 Arizona St.         1.60%       
##  2 Boise St.           0.20%       
##  3 Cal St. Bakersfield 0.00%       
##  4 Cal St. Fullerton   0.00%       
##  5 Cal St. Northridge  0.00%       
##  6 Cleveland St.       0.10%       
##  7 Colorado St.        0.30%       
##  8 East Tennessee St.  0.00%       
##  9 Florida St.         4.60%       
## 10 Fresno St.          0.00%       
## # ℹ 36 more rows
# Teams whose names begin with North or South
north_south_teams <- public_picks |>
  filter(str_detect(TEAM, "^(North|South)"))

north_south_teams |>
  select(TEAM, R64, FINALS)
## # A tibble: 5 × 3
##   TEAM               R64    FINALS
##   <chr>              <chr>  <chr> 
## 1 North Carolina     97.55% 12.10%
## 2 North Carolina St. 43.55% 0.07% 
## 3 Northwestern       43.89% 0.08% 
## 4 South Carolina     50.79% 0.14% 
## 5 South Dakota St.   4.02%  0.04%

Extract matches

# Extract numeric part of championship percentage
team_results_numeric <- team_results |>
  mutate(
    champ_percent_num = str_extract(CHAMPPERCENT, "[0-9.]+") |> as.numeric()
  )

team_results_numeric |>
  select(TEAM, CHAMPPERCENT, champ_percent_num) |>
  head()
## # A tibble: 6 × 3
##   TEAM              CHAMPPERCENT champ_percent_num
##   <chr>             <chr>                    <dbl>
## 1 Abilene Christian 0.00%                      0  
## 2 Akron             0.00%                      0  
## 3 Alabama           15.40%                    15.4
## 4 Albany            0.00%                      0  
## 5 American          0.00%                      0  
## 6 Arizona           36.10%                    36.1
# Extract the first word of each team name
public_picks_words <- public_picks |>
  mutate(
    first_word = str_extract(TEAM, "^[^ ]+")
  )

public_picks_words |>
  select(TEAM, first_word) |>
  head()
## # A tibble: 6 × 2
##   TEAM    first_word
##   <chr>   <chr>     
## 1 Akron   Akron     
## 2 Alabama Alabama   
## 3 Arizona Arizona   
## 4 Auburn  Auburn    
## 5 Baylor  Baylor    
## 6 BYU     BYU

Replacing matches

# Remove % signs from probability columns
public_picks_clean <- public_picks |>
  mutate(
    across(R64:FINALS,
           ~ str_replace(.x, "%", "") |> as.numeric())
  )

public_picks_clean |>
  select(TEAM, R64, FINALS) |>
  head()
## # A tibble: 6 × 3
##   TEAM      R64 FINALS
##   <chr>   <dbl>  <dbl>
## 1 Akron    7.16   0.04
## 2 Alabama 87.1    1.1 
## 3 Arizona 95.2    4.06
## 4 Auburn  90.2    1.34
## 5 Baylor  92.3    1.83
## 6 BYU     75.2    0.32
# Replace "St." with "State" in team names
public_picks_standardized <- public_picks |>
  mutate(
    TEAM = str_replace_all(TEAM, "St\\.", "State")
  )

public_picks_standardized |>
  select(TEAM) |>
  head()
## # A tibble: 6 × 1
##   TEAM   
##   <chr>  
## 1 Akron  
## 2 Alabama
## 3 Arizona
## 4 Auburn 
## 5 Baylor 
## 6 BYU