Import data

data <- read_excel("myData.xlsx")
data
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333     2
##  2      2 Akron  -0.9      179  -1.1      187     4     0     4      0         4
##  3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5       5
##  4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0         3
##  5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0         3
##  6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2       4
##  8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  9     11 Arka…   0         76   0         78     1     0     1      0         1
## 10     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, `2` <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Tidy data

data_small <- data %>%
    select(TEAM, PAKERANK, W)
data_small
## # A tibble: 236 × 3
##    TEAM                PAKERANK     W
##    <chr>                  <dbl> <dbl>
##  1 Abilene Christian         45     1
##  2 Akron                    179     0
##  3 Alabama                  211     5
##  4 Albany                   147     0
##  5 American                 160     0
##  6 Arizona                  206    17
##  7 Arizona St.              209     1
##  8 Arkansas                  11    11
##  9 Arkansas Pine Bluff       76     0
## 10 Auburn                    53     7
## # ℹ 226 more rows

long to wide form

data_small
## # A tibble: 236 × 3
##    TEAM                PAKERANK     W
##    <chr>                  <dbl> <dbl>
##  1 Abilene Christian         45     1
##  2 Akron                    179     0
##  3 Alabama                  211     5
##  4 Albany                   147     0
##  5 American                 160     0
##  6 Arizona                  206    17
##  7 Arizona St.              209     1
##  8 Arkansas                  11    11
##  9 Arkansas Pine Bluff       76     0
## 10 Auburn                    53     7
## # ℹ 226 more rows
data_long <- data_small %>%
    
    pivot_longer(cols = c(2,3), 
                 names_to = "Name", 
                 values_to = "Rank_Wins")

data_long
## # A tibble: 472 × 3
##    TEAM              Name     Rank_Wins
##    <chr>             <chr>        <dbl>
##  1 Abilene Christian PAKERANK        45
##  2 Abilene Christian W                1
##  3 Akron             PAKERANK       179
##  4 Akron             W                0
##  5 Alabama           PAKERANK       211
##  6 Alabama           W                5
##  7 Albany            PAKERANK       147
##  8 Albany            W                0
##  9 American          PAKERANK       160
## 10 American          W                0
## # ℹ 462 more rows

wide to long form

data_long %>%
    
    pivot_wider(names_from = TEAM, 
                values_from = Rank_Wins)
## # A tibble: 2 × 237
##   Name   `Abilene Christian` Akron Alabama Albany American Arizona `Arizona St.`
##   <chr>                <dbl> <dbl>   <dbl>  <dbl>    <dbl>   <dbl>         <dbl>
## 1 PAKER…                  45   179     211    147      160     206           209
## 2 W                        1     0       5      0        0      17             1
## # ℹ 229 more variables: Arkansas <dbl>, `Arkansas Pine Bluff` <dbl>,
## #   Auburn <dbl>, `Austin Peay` <dbl>, Baylor <dbl>, Belmont <dbl>,
## #   Binghamton <dbl>, `Boise St.` <dbl>, `Boston College` <dbl>,
## #   `Boston University` <dbl>, Bradley <dbl>, Bucknell <dbl>, Buffalo <dbl>,
## #   Butler <dbl>, BYU <dbl>, `Cal Poly` <dbl>, `Cal St. Bakersfield` <dbl>,
## #   `Cal St. Fullerton` <dbl>, `Cal St. Northridge` <dbl>, California <dbl>,
## #   Chattanooga <dbl>, Cincinnati <dbl>, Clemson <dbl>, …

Separating and Uniting

Separate a column

data_long
## # A tibble: 472 × 3
##    TEAM              Name     Rank_Wins
##    <chr>             <chr>        <dbl>
##  1 Abilene Christian PAKERANK        45
##  2 Abilene Christian W                1
##  3 Akron             PAKERANK       179
##  4 Akron             W                0
##  5 Alabama           PAKERANK       211
##  6 Alabama           W                5
##  7 Albany            PAKERANK       147
##  8 Albany            W                0
##  9 American          PAKERANK       160
## 10 American          W                0
## # ℹ 462 more rows
data_long_sep <- data_long %>%
    
    separate(col = Rank_Wins, into = c("PAKERANK", "W"))

data_long_sep
## # A tibble: 472 × 4
##    TEAM              Name     PAKERANK W    
##    <chr>             <chr>    <chr>    <chr>
##  1 Abilene Christian PAKERANK 45       <NA> 
##  2 Abilene Christian W        1        <NA> 
##  3 Akron             PAKERANK 179      <NA> 
##  4 Akron             W        0        <NA> 
##  5 Alabama           PAKERANK 211      <NA> 
##  6 Alabama           W        5        <NA> 
##  7 Albany            PAKERANK 147      <NA> 
##  8 Albany            W        0        <NA> 
##  9 American          PAKERANK 160      <NA> 
## 10 American          W        0        <NA> 
## # ℹ 462 more rows

Unite two columns

data_long_sep %>%
    
    unite(col = "W", c(TEAM,PAKERANK), sep = "/", )
## # A tibble: 472 × 2
##    W                    Name    
##    <chr>                <chr>   
##  1 Abilene Christian/45 PAKERANK
##  2 Abilene Christian/1  W       
##  3 Akron/179            PAKERANK
##  4 Akron/0              W       
##  5 Alabama/211          PAKERANK
##  6 Alabama/5            W       
##  7 Albany/147           PAKERANK
##  8 Albany/0             W       
##  9 American/160         PAKERANK
## 10 American/0           W       
## # ℹ 462 more rows

Missing Values

data_small %>%
    
    pivot_wider(names_from = TEAM, values_from = PAKERANK)
## # A tibble: 31 × 237
##        W `Abilene Christian` Akron Alabama Albany American Arizona `Arizona St.`
##    <dbl>               <dbl> <dbl>   <dbl>  <dbl>    <dbl>   <dbl>         <dbl>
##  1     1                  45    NA      NA     NA       NA      NA           209
##  2     0                  NA   179      NA    147      160      NA            NA
##  3     5                  NA    NA     211     NA       NA      NA            NA
##  4    17                  NA    NA      NA     NA       NA     206            NA
##  5    11                  NA    NA      NA     NA       NA      NA            NA
##  6     7                  NA    NA      NA     NA       NA      NA            NA
##  7    19                  NA    NA      NA     NA       NA      NA            NA
##  8     2                  NA    NA      NA     NA       NA      NA            NA
##  9     3                  NA    NA      NA     NA       NA      NA            NA
## 10     6                  NA    NA      NA     NA       NA      NA            NA
## # ℹ 21 more rows
## # ℹ 229 more variables: Arkansas <dbl>, `Arkansas Pine Bluff` <dbl>,
## #   Auburn <dbl>, `Austin Peay` <dbl>, Baylor <dbl>, Belmont <dbl>,
## #   Binghamton <dbl>, `Boise St.` <dbl>, `Boston College` <dbl>,
## #   `Boston University` <dbl>, Bradley <dbl>, Bucknell <dbl>, Buffalo <dbl>,
## #   Butler <dbl>, BYU <dbl>, `Cal Poly` <dbl>, `Cal St. Bakersfield` <dbl>,
## #   `Cal St. Fullerton` <dbl>, `Cal St. Northridge` <dbl>, California <dbl>, …
data_small %>%
    
    pivot_wider(names_from = TEAM, values_from = W)
## # A tibble: 75 × 237
##    PAKERANK `Abilene Christian` Akron Alabama Albany American Arizona
##       <dbl>               <dbl> <dbl>   <dbl>  <dbl>    <dbl>   <dbl>
##  1       45                   1    NA      NA     NA       NA      NA
##  2      179                  NA     0      NA     NA       NA      NA
##  3      211                  NA    NA       5     NA       NA      NA
##  4      147                  NA    NA      NA      0       NA      NA
##  5      160                  NA    NA      NA     NA        0      NA
##  6      206                  NA    NA      NA     NA       NA      17
##  7      209                  NA    NA      NA     NA       NA      NA
##  8       11                  NA    NA      NA     NA       NA      NA
##  9       76                  NA    NA      NA     NA       NA      NA
## 10       53                  NA    NA      NA     NA       NA      NA
## # ℹ 65 more rows
## # ℹ 230 more variables: `Arizona St.` <dbl>, Arkansas <dbl>,
## #   `Arkansas Pine Bluff` <dbl>, Auburn <dbl>, `Austin Peay` <dbl>,
## #   Baylor <dbl>, Belmont <dbl>, Binghamton <dbl>, `Boise St.` <dbl>,
## #   `Boston College` <dbl>, `Boston University` <dbl>, Bradley <dbl>,
## #   Bucknell <dbl>, Buffalo <dbl>, Butler <dbl>, BYU <dbl>, `Cal Poly` <dbl>,
## #   `Cal St. Bakersfield` <dbl>, `Cal St. Fullerton` <dbl>, …
data_small %>%
    
    complete(TEAM, PAKERANK)
## # A tibble: 17,700 × 3
##    TEAM              PAKERANK     W
##    <chr>                <dbl> <dbl>
##  1 Abilene Christian        1    NA
##  2 Abilene Christian        2    NA
##  3 Abilene Christian        3    NA
##  4 Abilene Christian        4    NA
##  5 Abilene Christian        6    NA
##  6 Abilene Christian        7    NA
##  7 Abilene Christian        8    NA
##  8 Abilene Christian        9    NA
##  9 Abilene Christian       10    NA
## 10 Abilene Christian       11    NA
## # ℹ 17,690 more rows