Import data

data <- read_excel("myData.xlsx")
data
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333     2
##  2      2 Akron  -0.9      179  -1.1      187     4     0     4      0         4
##  3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5       5
##  4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0         3
##  5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0         3
##  6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2       4
##  8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  9     11 Arka…   0         76   0         78     1     0     1      0         1
## 10     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, `2` <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Chapter 14

Tools

Make Data Small

data_small <- data %>%
    select(TEAMID, PAKERANK, PASE, PAKE) %>%
    sample_n(10)
data_small
## # A tibble: 10 × 4
##    TEAMID PAKERANK  PASE  PAKE
##     <dbl>    <dbl> <dbl> <dbl>
##  1     29       91  -0.1  -0.1
##  2    225      225  -5    -3.4
##  3    238      175   0.2  -0.8
##  4     80      118  -0.2  -0.2
##  5     14       41  -0.3   0.9
##  6     10       11   3.5   4.3
##  7    150      160  -0.4  -0.5
##  8    121       76   0     0  
##  9     55       11   5.9   4.3
## 10    192      197  -1.7  -1.3

Detect matches

data %>% 
    summarise(sum(str_detect(PAKE, "8$")))
## # A tibble: 1 × 1
##   `sum(str_detect(PAKE, "8$"))`
##                           <int>
## 1                             9
str_detect(data_small$PAKE, "8$") 
##  [1] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(data_small$PAKE, "8$"))
## [1] 1
mean(str_detect(data_small$PAKE, "8$"))
## [1] 0.1

Extract matches

data <- c("12", "8", "7", "2", "1", "0")
data_match <- str_c(data, collapse = "|")
data_match
## [1] "12|8|7|2|1|0"

Replacing matches

data_small %>% mutate(PAKE = PASE %>% str_replace("[A-Z]", "-"))
## # A tibble: 10 × 4
##    TEAMID PAKERANK  PASE PAKE 
##     <dbl>    <dbl> <dbl> <chr>
##  1     29       91  -0.1 -0.1 
##  2    225      225  -5   -5   
##  3    238      175   0.2 0.2  
##  4     80      118  -0.2 -0.2 
##  5     14       41  -0.3 -0.3 
##  6     10       11   3.5 3.5  
##  7    150      160  -0.4 -0.4 
##  8    121       76   0   0    
##  9     55       11   5.9 5.9  
## 10    192      197  -1.7 -1.7
data_small %>% mutate(PAKE = PASE %>% str_replace_all("[A-Z]", "-"))
## # A tibble: 10 × 4
##    TEAMID PAKERANK  PASE PAKE 
##     <dbl>    <dbl> <dbl> <chr>
##  1     29       91  -0.1 -0.1 
##  2    225      225  -5   -5   
##  3    238      175   0.2 0.2  
##  4     80      118  -0.2 -0.2 
##  5     14       41  -0.3 -0.3 
##  6     10       11   3.5 3.5  
##  7    150      160  -0.4 -0.4 
##  8    121       76   0   0    
##  9     55       11   5.9 5.9  
## 10    192      197  -1.7 -1.7