Import data

# excel file
data <- read_excel("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.xlsx")
data
## # A tibble: 81,525 × 28
##    name            team  game_year game_week rush_att rush_yds rush_avg rush_tds
##    <chr>           <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 Duce Staley     PHI        2000         1       26      201      7.7        1
##  2 Lamar Smith     MIA        2000         1       27      145      5.4        1
##  3 Tiki Barber     NYG        2000         1       13      144     11.1        2
##  4 Stephen Davis   WAS        2000         1       23      133      5.8        1
##  5 Edgerrin James  IND        2000         1       28      124      4.4        1
##  6 Priest Holmes   BAL        2000         1       27      119      4.4        0
##  7 Curtis Martin   NYJ        2000         1       30      110      3.7        1
##  8 Robert Smith    MIN        2000         1       14      109      7.8        0
##  9 Tim Biakabutuka CAR        2000         1       15       88      5.9        0
## 10 Cade McNown     CHI        2000         1       10       87      8.7        1
## # ℹ 81,515 more rows
## # ℹ 20 more variables: rush_fumbles <dbl>, rec <chr>, rec_yds <chr>,
## #   rec_avg <chr>, rec_tds <chr>, rec_fumbles <chr>, pass_att <chr>,
## #   pass_yds <chr>, pass_tds <chr>, int <chr>, sck <chr>, pass_fumbles <chr>,
## #   rate <chr>, position <chr>, total_yards <dbl>, `total tds` <dbl>,
## #   ...25 <lgl>, ...26 <chr>, ...27 <chr>, ...28 <chr>

Apply the following dplyr verbs to your data

Filter rows

filter(data, position == "QB, WR/TE, RB", game_year == 2000)
## # A tibble: 0 × 28
## # ℹ 28 variables: name <chr>, team <chr>, game_year <dbl>, game_week <dbl>,
## #   rush_att <dbl>, rush_yds <dbl>, rush_avg <dbl>, rush_tds <dbl>,
## #   rush_fumbles <dbl>, rec <chr>, rec_yds <chr>, rec_avg <chr>, rec_tds <chr>,
## #   rec_fumbles <chr>, pass_att <chr>, pass_yds <chr>, pass_tds <chr>,
## #   int <chr>, sck <chr>, pass_fumbles <chr>, rate <chr>, position <chr>,
## #   total_yards <dbl>, total tds <dbl>, ...25 <lgl>, ...26 <chr>, ...27 <chr>,
## #   ...28 <chr>

Arrange rows

arrange(data, desc(total_yards))
## # A tibble: 81,525 × 28
##    name            team  game_year game_week rush_att rush_yds rush_avg rush_tds
##    <chr>           <chr>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 Matt Schaub     HOU        2012        11       NA       NA     NA         NA
##  2 Derek Carr      OAK        2016         8        1       13     13          0
##  3 Ben Roethlisbe… PIT        2014         8        1       -1     -1          0
##  4 Tom Brady       NE         2011         1        1        3      3          0
##  5 Matt Ryan       ATL        2016         4        4       14      3.5        0
##  6 Tony Romo       DAL        2013         5        1        7      7          0
##  7 Ben Roethlisbe… PIT        2017        14        1        5      5          0
##  8 Ben Roethlisbe… PIT        2009        15        1        7      7          0
##  9 Drew Brees      NO         2006        11       NA       NA     NA         NA
## 10 Eli Manning     NYG        2012         2        3       -2     -0.7        0
## # ℹ 81,515 more rows
## # ℹ 20 more variables: rush_fumbles <dbl>, rec <chr>, rec_yds <chr>,
## #   rec_avg <chr>, rec_tds <chr>, rec_fumbles <chr>, pass_att <chr>,
## #   pass_yds <chr>, pass_tds <chr>, int <chr>, sck <chr>, pass_fumbles <chr>,
## #   rate <chr>, position <chr>, total_yards <dbl>, `total tds` <dbl>,
## #   ...25 <lgl>, ...26 <chr>, ...27 <chr>, ...28 <chr>

Select columns

select(data, position, rush_yds, rec_yds, pass_yds)
## # A tibble: 81,525 × 4
##    position rush_yds rec_yds pass_yds
##    <chr>       <dbl> <chr>   <chr>   
##  1 RB            201 61      NA      
##  2 RB            145 12      NA      
##  3 RB            144 25      NA      
##  4 RB            133 37      NA      
##  5 RB            124 40      NA      
##  6 RB            119 32      NA      
##  7 RB            110 34      NA      
##  8 RB            109 3       NA      
##  9 RB             88 20      NA      
## 10 QB             87 NA      290     
## # ℹ 81,515 more rows

Add columns

## Add columns
data <- data %>%
  mutate(total_yards = as.numeric(total_yards)) %>%
  group_by(position) %>%
  mutate(yards_by_position = sum(total_yards, na.rm = TRUE))

Summarize by groups

data %>%
    # ensure yards_by_position is numeric 
    mutate(yards_by_position = as.numeric(yards_by_position)) %>%
    
    # group by position 
    group_by(position) %>%
    
    # calculate average total yards
    summarise(avg_total_yards = mean(yards_by_position, na.rm = TRUE)) %>%
    
    # sort it
    arrange(desc(avg_total_yards))
## # A tibble: 3 × 2
##   position avg_total_yards
##   <chr>              <dbl>
## 1 QB               2108084
## 2 WR/TE            1895896
## 3 RB               1021022