Import your data

data <- read_excel("../00_data/myData.xlsx")
data
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333     2
##  2      2 Akron  -0.9      179  -1.1      187     4     0     4      0         4
##  3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5       5
##  4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0         3
##  5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0         3
##  6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2       4
##  8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  9     11 Arka…   0         76   0         78     1     0     1      0         1
## 10     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Chapter 14

Tools

Detect matches

data$TEAM
##   [1] "Abilene Christian"      "Akron"                  "Alabama"               
##   [4] "Albany"                 "American"               "Arizona"               
##   [7] "Arizona St."            "Arkansas"               "Arkansas Pine Bluff"   
##  [10] "Auburn"                 "Austin Peay"            "Baylor"                
##  [13] "Belmont"                "Binghamton"             "Boise St."             
##  [16] "Boston College"         "Boston University"      "Bradley"               
##  [19] "Bucknell"               "Buffalo"                "Butler"                
##  [22] "BYU"                    "Cal Poly"               "Cal St. Bakersfield"   
##  [25] "Cal St. Fullerton"      "Cal St. Northridge"     "California"            
##  [28] "Chattanooga"            "Cincinnati"             "Clemson"               
##  [31] "Cleveland St."          "Coastal Carolina"       "Colgate"               
##  [34] "College of Charleston"  "Colorado"               "Colorado St."          
##  [37] "Connecticut"            "Cornell"                "Creighton"             
##  [40] "Davidson"               "Dayton"                 "Delaware"              
##  [43] "Detroit"                "Drake"                  "Drexel"                
##  [46] "Duke"                   "East Tennessee St."     "Eastern Kentucky"      
##  [49] "Eastern Washington"     "Fairleigh Dickinson"    "Florida"               
##  [52] "Florida Atlantic"       "Florida Gulf Coast"     "Florida St."           
##  [55] "Fresno St."             "Furman"                 "Gardner Webb"          
##  [58] "George Mason"           "George Washington"      "Georgetown"            
##  [61] "Georgia"                "Georgia St."            "Georgia Tech"          
##  [64] "Gonzaga"                "Grand Canyon"           "Green Bay"             
##  [67] "Hampton"                "Hartford"               "Harvard"               
##  [70] "Hawaii"                 "Holy Cross"             "Houston"               
##  [73] "Howard"                 "Illinois"               "Indiana"               
##  [76] "Indiana St."            "Iona"                   "Iowa"                  
##  [79] "Iowa St."               "Jacksonville St."       "James Madison"         
##  [82] "Kansas"                 "Kansas St."             "Kennesaw St."          
##  [85] "Kent St."               "Kentucky"               "La Salle"              
##  [88] "Lafayette"              "Lehigh"                 "Liberty"               
##  [91] "Lipscomb"               "Little Rock"            "LIU Brooklyn"          
##  [94] "Long Beach St."         "Longwood"               "Louisiana Lafayette"   
##  [97] "Louisville"             "Loyola Chicago"         "Loyola MD"             
## [100] "LSU"                    "Manhattan"              "Marquette"             
## [103] "Marshall"               "Maryland"               "Massachusetts"         
## [106] "Memphis"                "Mercer"                 "Miami FL"              
## [109] "Michigan"               "Michigan St."           "Middle Tennessee"      
## [112] "Milwaukee"              "Minnesota"              "Mississippi"           
## [115] "Mississippi St."        "Mississippi Valley St." "Missouri"              
## [118] "Montana"                "Montana St."            "Morehead St."          
## [121] "Morgan St."             "Mount St. Mary's"       "Murray St."            
## [124] "Nebraska"               "Nevada"                 "New Mexico"            
## [127] "New Mexico St."         "Norfolk St."            "North Carolina"        
## [130] "North Carolina A&T"     "North Carolina Central" "North Carolina St."    
## [133] "North Dakota"           "North Dakota St."       "North Texas"           
## [136] "Northeastern"           "Northern Colorado"      "Northern Iowa"         
## [139] "Northern Kentucky"      "Northwestern"           "Northwestern St."      
## [142] "Notre Dame"             "Oakland"                "Ohio"                  
## [145] "Ohio St."               "Oklahoma"               "Oklahoma St."          
## [148] "Old Dominion"           "Oral Roberts"           "Oregon"                
## [151] "Oregon St."             "Pacific"                "Penn"                  
## [154] "Penn St."               "Pittsburgh"             "Portland St."          
## [157] "Princeton"              "Providence"             "Purdue"                
## [160] "Radford"                "Rhode Island"           "Richmond"              
## [163] "Robert Morris"          "Rutgers"                "Saint Joseph's"        
## [166] "Saint Louis"            "Saint Mary's"           "Saint Peter's"         
## [169] "Sam Houston St."        "San Diego"              "San Diego St."         
## [172] "San Francisco"          "Seton Hall"             "Siena"                 
## [175] "SMU"                    "South Alabama"          "South Carolina"        
## [178] "South Dakota St."       "South Florida"          "Southern"              
## [181] "Southern Miss"          "St. Bonaventure"        "St. John's"            
## [184] "Stanford"               "Stephen F. Austin"      "Stony Brook"           
## [187] "Syracuse"               "TCU"                    "Temple"                
## [190] "Tennessee"              "Texas"                  "Texas A&M"             
## [193] "Texas A&M Corpus Chris" "Texas Southern"         "Texas Tech"            
## [196] "Troy"                   "Tulsa"                  "UAB"                   
## [199] "UC Davis"               "UC Irvine"              "UC Santa Barbara"      
## [202] "UCF"                    "UCLA"                   "UMBC"                  
## [205] "UNC Asheville"          "UNC Greensboro"         "UNC Wilmington"        
## [208] "UNLV"                   "USC"                    "UT Arlington"          
## [211] "Utah"                   "Utah St."               "UTEP"                  
## [214] "UTSA"                   "Valparaiso"             "Vanderbilt"            
## [217] "VCU"                    "Vermont"                "Villanova"             
## [220] "Virginia"               "Virginia Tech"          "Wake Forest"           
## [223] "Washington"             "Washington St."         "Weber St."             
## [226] "West Virginia"          "Western Kentucky"       "Western Michigan"      
## [229] "Wichita St."            "Winthrop"               "Wisconsin"             
## [232] "Wofford"                "Wright St."             "Wyoming"               
## [235] "Xavier"                 "Yale"
str_detect(data$TEAM, "W")
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [229]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
sum(str_detect(data$TEAM, "W"))
## [1] 17
data %>%
    summarise(num_W = sum(str_detect(TEAM, "W")))
## # A tibble: 1 × 1
##   num_W
##   <int>
## 1    17

Extract matches

data %>% 
    mutate(col_W = str_extract(TEAM, "W")) %>%
    select(TEAM, col_W) %>%
    filter(!is.na(col_W))
## # A tibble: 17 × 2
##    TEAM               col_W
##    <chr>              <chr>
##  1 Eastern Washington W    
##  2 Gardner Webb       W    
##  3 George Washington  W    
##  4 UNC Wilmington     W    
##  5 Wake Forest        W    
##  6 Washington         W    
##  7 Washington St.     W    
##  8 Weber St.          W    
##  9 West Virginia      W    
## 10 Western Kentucky   W    
## 11 Western Michigan   W    
## 12 Wichita St.        W    
## 13 Winthrop           W    
## 14 Wisconsin          W    
## 15 Wofford            W    
## 16 Wright St.         W    
## 17 Wyoming            W

Replacing matches

data %>% 
    mutate(col_A = str_replace(TEAM, "W", "A")) %>%
    select(TEAM, col_A)
## # A tibble: 236 × 2
##    TEAM                col_A              
##    <chr>               <chr>              
##  1 Abilene Christian   Abilene Christian  
##  2 Akron               Akron              
##  3 Alabama             Alabama            
##  4 Albany              Albany             
##  5 American            American           
##  6 Arizona             Arizona            
##  7 Arizona St.         Arizona St.        
##  8 Arkansas            Arkansas           
##  9 Arkansas Pine Bluff Arkansas Pine Bluff
## 10 Auburn              Auburn             
## # ℹ 226 more rows