Week 8: Apply it to your data 7

Import your data

data <- readr::read_csv("../00_data/myData.csv")

## New names:
## Rows: 236 Columns: 21
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): TEAM, F4PERCENT, CHAMPPERCENT dbl (18): ...1, TEAMID, PAKE, PAKERANK,
## PASE, PASERANK, GAMES, W, L, WINPERC...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

set.seed(1234)
data_small <- data %>%
    
    select(TEAMID, PASERANK, WINPERCENT) %>%
    sample_n(5)

data_small

## # A tibble: 5 × 3
##   TEAMID PASERANK WINPERCENT
##    <dbl>    <dbl>      <dbl>
## 1     31      172      0    
## 2     84      119      0    
## 3    157        8      0.652
## 4    106      138      0    
## 5    116       30      0.5

Pivoting

long to wide form

data_small %>% pivot_wider(names_from = TEAMID, values_from = PASERANK)

## # A tibble: 3 × 6
##   WINPERCENT  `31`  `84` `157` `106` `116`
##        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1      0       172   119    NA   138    NA
## 2      0.652    NA    NA     8    NA    NA
## 3      0.5      NA    NA    NA    NA    30

data %>% slice(-10)

## # A tibble: 235 × 21
##     ...1 TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT
##    <dbl>  <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl>
##  1     1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333
##  2     2      2 Akron  -0.9      179  -1.1      187     4     0     4      0    
##  3     3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5  
##  4     4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0    
##  5     5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0    
##  6     6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607
##  7     7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2  
##  8     8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611
##  9     9     11 Arka…   0         76   0         78     1     0     1      0    
## 10    11     13 Aust…  -0.1       91  -0.1      103     2     0     2      0    
## # ℹ 225 more rows
## # ℹ 10 more variables: R64 <dbl>, R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>,
## #   F2 <dbl>, CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <chr>, CHAMPPERCENT <chr>

data_small %>% slice(-10) %>% arrange(PASERANK, WINPERCENT)

## # A tibble: 5 × 3
##   TEAMID PASERANK WINPERCENT
##    <dbl>    <dbl>      <dbl>
## 1    157        8      0.652
## 2    116       30      0.5  
## 3     84      119      0    
## 4    106      138      0    
## 5     31      172      0

data_small

## # A tibble: 5 × 3
##   TEAMID PASERANK WINPERCENT
##    <dbl>    <dbl>      <dbl>
## 1     31      172      0    
## 2     84      119      0    
## 3    157        8      0.652
## 4    106      138      0    
## 5    116       30      0.5

wide to long form

data_wide <- data_small %>% pivot_wider(names_from = PASERANK, values_from = WINPERCENT)
data_wide

## # A tibble: 5 × 6
##   TEAMID `172` `119`    `8` `138`  `30`
##    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     31     0    NA NA        NA  NA  
## 2     84    NA     0 NA        NA  NA  
## 3    157    NA    NA  0.652    NA  NA  
## 4    106    NA    NA NA         0  NA  
## 5    116    NA    NA NA        NA   0.5

data_wide %>% pivot_longer(`172`:`30`, names_to = "PASERANK", values_to = "WINPERCENT", values_drop_na = TRUE)

## # A tibble: 5 × 3
##   TEAMID PASERANK WINPERCENT
##    <dbl> <chr>         <dbl>
## 1     31 172           0    
## 2     84 119           0    
## 3    157 8             0.652
## 4    106 138           0    
## 5    116 30            0.5

Separating and Uniting

Unite two columns

data_united <- data_small %>%
    
    unite(col = "newName", c(PASERANK, WINPERCENT), sep = "/")

data_united

## # A tibble: 5 × 2
##   TEAMID newName
##    <dbl> <chr>  
## 1     31 172/0  
## 2     84 119/0  
## 3    157 8/0.652
## 4    106 138/0  
## 5    116 30/0.5

Separate a column

data_united %>%
    
    separate(newName, into = c("PASERANK", "WINPERCENT"), sep = "/")

## # A tibble: 5 × 3
##   TEAMID PASERANK WINPERCENT
##    <dbl> <chr>    <chr>     
## 1     31 172      0         
## 2     84 119      0         
## 3    157 8        0.652     
## 4    106 138      0         
## 5    116 30       0.5

Week 8: Apply it to your data 7

Zachary Dolan

2022-10-05

Import your data

Pivoting

long to wide form

wide to long form

Separating and Uniting

Unite two columns

Separate a column

Missing Values