Import data

# excel file
data <- read_excel("../00_data/myData.xlsx")
data
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333     2
##  2      2 Akron  -0.9      179  -1.1      187     4     0     4      0         4
##  3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5       5
##  4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0         3
##  5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0         3
##  6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2       4
##  8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  9     11 Arka…   0         76   0         78     1     0     1      0         1
## 10     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Apply the following dplyr verbs to your data

Filter rows

filter(data, GAMES > 10) 
## # A tibble: 49 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  2     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  3     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
##  4     14 Bayl…   0.9       41  -0.3      138    29    19    10      0.655    11
##  5     24 Butl…   7          4   8.7        4    26    17     9      0.654     9
##  6     32 Cinc…  -3.6      228  -3.5      224    15     6     9      0.4       9
##  7     40 Conn…   8.6        2  10.6        1    29    23     6      0.793     9
##  8     43 Crei…   0.7       45   0.9       41    17     9     8      0.529     8
##  9     50 Duke    2         24   0         78    46    34    12      0.739    14
## 10     55 Flor…   4.3       11   5.9        7    28    19     9      0.679     9
## # ℹ 39 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>
filter(data, W > 5)
## # A tibble: 47 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  2     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  3     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
##  4     14 Bayl…   0.9       41  -0.3      138    29    19    10      0.655    11
##  5     24 Butl…   7          4   8.7        4    26    17     9      0.654     9
##  6     32 Cinc…  -3.6      228  -3.5      224    15     6     9      0.4       9
##  7     40 Conn…   8.6        2  10.6        1    29    23     6      0.793     9
##  8     43 Crei…   0.7       45   0.9       41    17     9     8      0.529     8
##  9     50 Duke    2         24   0         78    46    34    12      0.739    14
## 10     55 Flor…   4.3       11   5.9        7    28    19     9      0.679     9
## # ℹ 37 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Arrange rows

arrange(data, desc(PASERANK), desc(PAKERANK)) 
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1     64 Geor…  -6.2      235  -8.5      236    10     3     7      0.3       7
##  2    229 Virg…  -6.7      236  -7.3      235    21    13     8      0.619     9
##  3    225 Vand…  -3.4      225  -5        234     6     1     5      0.167     5
##  4    200 Texas  -5.5      234  -4.6      232    22    10    12      0.455    12
##  5    162 Pitt…  -3.5      227  -4.6      232    16     8     8      0.5       8
##  6    167 Purd…  -4.4      232  -4.4      231    29    16    13      0.552    13
##  7    154 Okla…  -4.1      229  -4.2      230     9     2     7      0.222     7
##  8     78 Illi…  -3.3      224  -4.1      229    10     4     6      0.4       6
##  9    131 New …  -2.7      220  -3.8      228     6     2     4      0.333     4
## 10    221 Utah…  -4.4      232  -3.6      226     6     0     6      0         6
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>

Select columns

select(data, GAMES:PAKE)
## # A tibble: 236 × 5
##    GAMES PASERANK  PASE PAKERANK  PAKE
##    <dbl>    <dbl> <dbl>    <dbl> <dbl>
##  1     3       52   0.7       45   0.7
##  2     4      187  -1.1      179  -0.9
##  3    10      220  -2.9      211  -2.1
##  4     3      138  -0.3      147  -0.4
##  5     3      150  -0.4      160  -0.5
##  6    28      216  -2.5      206  -1.7
##  7     5      206  -1.9      209  -2  
##  8    18       16   3.5       11   4.3
##  9     1       78   0         76   0  
## 10    11       30   1.4       53   0.6
## # ℹ 226 more rows
select(data, R64, R32, E8, F4, F2)
## # A tibble: 236 × 5
##      R64   R32    E8    F4    F2
##    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     2     1     0     0     0
##  2     4     0     0     0     0
##  3     5     3     0     0     0
##  4     3     0     0     0     0
##  5     3     0     0     0     0
##  6    11     7     3     0     0
##  7     4     1     0     0     0
##  8     7     6     2     0     0
##  9     1     0     0     0     0
## 10     4     4     1     1     0
## # ℹ 226 more rows

Add columns

#cumsum()
select(data, GAMES) %>%
    
    mutate(GAMES_cumsum = cumsum(GAMES))
## # A tibble: 236 × 2
##    GAMES GAMES_cumsum
##    <dbl>        <dbl>
##  1     3            3
##  2     4            7
##  3    10           17
##  4     3           20
##  5     3           23
##  6    28           51
##  7     5           56
##  8    18           74
##  9     1           75
## 10    11           86
## # ℹ 226 more rows
#lag()
select(data, PASERANK) %>%
    
    mutate(PASERANK_lag1 = lag(PASERANK))
## # A tibble: 236 × 2
##    PASERANK PASERANK_lag1
##       <dbl>         <dbl>
##  1       52            NA
##  2      187            52
##  3      220           187
##  4      138           220
##  5      150           138
##  6      216           150
##  7      206           216
##  8       16           206
##  9       78            16
## 10       30            78
## # ℹ 226 more rows

Summarize by groups

data
## # A tibble: 236 × 20
##    TEAMID TEAM   PAKE PAKERANK  PASE PASERANK GAMES     W     L WINPERCENT   R64
##     <dbl> <chr> <dbl>    <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>      <dbl> <dbl>
##  1      1 Abil…   0.7       45   0.7       52     3     1     2      0.333     2
##  2      2 Akron  -0.9      179  -1.1      187     4     0     4      0         4
##  3      3 Alab…  -2.1      211  -2.9      220    10     5     5      0.5       5
##  4      4 Alba…  -0.4      147  -0.3      138     3     0     3      0         3
##  5      6 Amer…  -0.5      160  -0.4      150     3     0     3      0         3
##  6      8 Ariz…  -1.7      206  -2.5      216    28    17    11      0.607    11
##  7      9 Ariz…  -2        209  -1.9      206     5     1     4      0.2       4
##  8     10 Arka…   4.3       11   3.5       16    18    11     7      0.611     7
##  9     11 Arka…   0         76   0         78     1     0     1      0         1
## 10     12 Aubu…   0.6       53   1.4       30    11     7     4      0.636     4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## #   CHAMP <dbl>, TOP2 <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>
summarise(data, WINPERCENT = mean(CHAMP, na.rm = TRUE))
## # A tibble: 1 × 1
##   WINPERCENT
##        <dbl>
## 1     0.0636
data %>%
   
    # Group by GAMES
     group_by(GAMES) %>%
    
    # Calculate average WINPERCENT
    summarise(WINPERCENT = mean(WINPERCENT, na.rm = TRUE)) %>%
  
    # Sort it
    arrange(WINPERCENT)
## # A tibble: 38 × 2
##    GAMES WINPERCENT
##    <dbl>      <dbl>
##  1     1      0    
##  2     2      0.141
##  3     3      0.195
##  4     4      0.219
##  5     6      0.244
##  6     7      0.322
##  7    10      0.362
##  8     8      0.375
##  9    13      0.385
## 10     5      0.388
## # ℹ 28 more rows