Import data
data <- read_excel("myData.xlsx")
data
## # A tibble: 236 × 20
## TEAMID TEAM PAKE PAKERANK PASE PASERANK GAMES W L WINPERCENT R64
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Abil… 0.7 45 0.7 52 3 1 2 0.333 2
## 2 2 Akron -0.9 179 -1.1 187 4 0 4 0 4
## 3 3 Alab… -2.1 211 -2.9 220 10 5 5 0.5 5
## 4 4 Alba… -0.4 147 -0.3 138 3 0 3 0 3
## 5 6 Amer… -0.5 160 -0.4 150 3 0 3 0 3
## 6 8 Ariz… -1.7 206 -2.5 216 28 17 11 0.607 11
## 7 9 Ariz… -2 209 -1.9 206 5 1 4 0.2 4
## 8 10 Arka… 4.3 11 3.5 16 18 11 7 0.611 7
## 9 11 Arka… 0 76 0 78 1 0 1 0 1
## 10 12 Aubu… 0.6 53 1.4 30 11 7 4 0.636 4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## # CHAMP <dbl>, `2` <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>
Chapter 14
Tools
Make Data Small
data_small <- data %>%
select(TEAMID, PAKERANK, PASE, PAKE) %>%
sample_n(10)
data_small
## # A tibble: 10 × 4
## TEAMID PAKERANK PASE PAKE
## <dbl> <dbl> <dbl> <dbl>
## 1 29 91 -0.1 -0.1
## 2 225 225 -5 -3.4
## 3 238 175 0.2 -0.8
## 4 80 118 -0.2 -0.2
## 5 14 41 -0.3 0.9
## 6 10 11 3.5 4.3
## 7 150 160 -0.4 -0.5
## 8 121 76 0 0
## 9 55 11 5.9 4.3
## 10 192 197 -1.7 -1.3
Detect matches
data %>%
summarise(sum(str_detect(PAKE, "8$")))
## # A tibble: 1 × 1
## `sum(str_detect(PAKE, "8$"))`
## <int>
## 1 9
str_detect(data_small$PAKE, "8$")
## [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(data_small$PAKE, "8$"))
## [1] 1
mean(str_detect(data_small$PAKE, "8$"))
## [1] 0.1
Replacing matches
data_small %>% mutate(PAKE = PASE %>% str_replace("[A-Z]", "-"))
## # A tibble: 10 × 4
## TEAMID PAKERANK PASE PAKE
## <dbl> <dbl> <dbl> <chr>
## 1 29 91 -0.1 -0.1
## 2 225 225 -5 -5
## 3 238 175 0.2 0.2
## 4 80 118 -0.2 -0.2
## 5 14 41 -0.3 -0.3
## 6 10 11 3.5 3.5
## 7 150 160 -0.4 -0.4
## 8 121 76 0 0
## 9 55 11 5.9 5.9
## 10 192 197 -1.7 -1.7
data_small %>% mutate(PAKE = PASE %>% str_replace_all("[A-Z]", "-"))
## # A tibble: 10 × 4
## TEAMID PAKERANK PASE PAKE
## <dbl> <dbl> <dbl> <chr>
## 1 29 91 -0.1 -0.1
## 2 225 225 -5 -5
## 3 238 175 0.2 0.2
## 4 80 118 -0.2 -0.2
## 5 14 41 -0.3 -0.3
## 6 10 11 3.5 3.5
## 7 150 160 -0.4 -0.4
## 8 121 76 0 0
## 9 55 11 5.9 5.9
## 10 192 197 -1.7 -1.7