Import data
# excel file
data <- read_excel("C:/Users/ejp14/OneDrive/Desktop/PSU_DAT3000_IntroToDA/01_module4/Data/myData.xlsx")
data
## # A tibble: 81,525 × 28
## name team game_year game_week rush_att rush_yds rush_avg rush_tds
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Duce Staley PHI 2000 1 26 201 7.7 1
## 2 Lamar Smith MIA 2000 1 27 145 5.4 1
## 3 Tiki Barber NYG 2000 1 13 144 11.1 2
## 4 Stephen Davis WAS 2000 1 23 133 5.8 1
## 5 Edgerrin James IND 2000 1 28 124 4.4 1
## 6 Priest Holmes BAL 2000 1 27 119 4.4 0
## 7 Curtis Martin NYJ 2000 1 30 110 3.7 1
## 8 Robert Smith MIN 2000 1 14 109 7.8 0
## 9 Tim Biakabutuka CAR 2000 1 15 88 5.9 0
## 10 Cade McNown CHI 2000 1 10 87 8.7 1
## # ℹ 81,515 more rows
## # ℹ 20 more variables: rush_fumbles <dbl>, rec <chr>, rec_yds <chr>,
## # rec_avg <chr>, rec_tds <chr>, rec_fumbles <chr>, pass_att <chr>,
## # pass_yds <chr>, pass_tds <chr>, int <chr>, sck <chr>, pass_fumbles <chr>,
## # rate <chr>, position <chr>, total_yards <dbl>, `total tds` <dbl>,
## # ...25 <lgl>, ...26 <chr>, ...27 <chr>, ...28 <chr>
Apply the following dplyr verbs to your data
Filter rows
filter(data, position == "QB, WR/TE, RB", game_year == 2000)
## # A tibble: 0 × 28
## # ℹ 28 variables: name <chr>, team <chr>, game_year <dbl>, game_week <dbl>,
## # rush_att <dbl>, rush_yds <dbl>, rush_avg <dbl>, rush_tds <dbl>,
## # rush_fumbles <dbl>, rec <chr>, rec_yds <chr>, rec_avg <chr>, rec_tds <chr>,
## # rec_fumbles <chr>, pass_att <chr>, pass_yds <chr>, pass_tds <chr>,
## # int <chr>, sck <chr>, pass_fumbles <chr>, rate <chr>, position <chr>,
## # total_yards <dbl>, total tds <dbl>, ...25 <lgl>, ...26 <chr>, ...27 <chr>,
## # ...28 <chr>
Arrange rows
arrange(data, desc(total_yards))
## # A tibble: 81,525 × 28
## name team game_year game_week rush_att rush_yds rush_avg rush_tds
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Matt Schaub HOU 2012 11 NA NA NA NA
## 2 Derek Carr OAK 2016 8 1 13 13 0
## 3 Ben Roethlisbe… PIT 2014 8 1 -1 -1 0
## 4 Tom Brady NE 2011 1 1 3 3 0
## 5 Matt Ryan ATL 2016 4 4 14 3.5 0
## 6 Tony Romo DAL 2013 5 1 7 7 0
## 7 Ben Roethlisbe… PIT 2017 14 1 5 5 0
## 8 Ben Roethlisbe… PIT 2009 15 1 7 7 0
## 9 Drew Brees NO 2006 11 NA NA NA NA
## 10 Eli Manning NYG 2012 2 3 -2 -0.7 0
## # ℹ 81,515 more rows
## # ℹ 20 more variables: rush_fumbles <dbl>, rec <chr>, rec_yds <chr>,
## # rec_avg <chr>, rec_tds <chr>, rec_fumbles <chr>, pass_att <chr>,
## # pass_yds <chr>, pass_tds <chr>, int <chr>, sck <chr>, pass_fumbles <chr>,
## # rate <chr>, position <chr>, total_yards <dbl>, `total tds` <dbl>,
## # ...25 <lgl>, ...26 <chr>, ...27 <chr>, ...28 <chr>
Select columns
select(data, position, rush_yds, rec_yds, pass_yds)
## # A tibble: 81,525 × 4
## position rush_yds rec_yds pass_yds
## <chr> <dbl> <chr> <chr>
## 1 RB 201 61 NA
## 2 RB 145 12 NA
## 3 RB 144 25 NA
## 4 RB 133 37 NA
## 5 RB 124 40 NA
## 6 RB 119 32 NA
## 7 RB 110 34 NA
## 8 RB 109 3 NA
## 9 RB 88 20 NA
## 10 QB 87 NA 290
## # ℹ 81,515 more rows
Add columns
## Add columns
data <- data %>%
mutate(total_yards = as.numeric(total_yards)) %>%
group_by(position) %>%
mutate(yards_by_position = sum(total_yards, na.rm = TRUE))
Summarize by groups
data %>%
# ensure yards_by_position is numeric
mutate(yards_by_position = as.numeric(yards_by_position)) %>%
# group by position
group_by(position) %>%
# calculate average total yards
summarise(avg_total_yards = mean(yards_by_position, na.rm = TRUE)) %>%
# sort it
arrange(desc(avg_total_yards))
## # A tibble: 3 × 2
## position avg_total_yards
## <chr> <dbl>
## 1 QB 2108084
## 2 WR/TE 1895896
## 3 RB 1021022