Import data

# excel file
data <- read_excel("../00_data/myData_charts.xlsx")
data
## # A tibble: 10,879 × 8
##    team          team_name  year   total   home   away  week weekly_attendance
##    <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 San Francisco 49ers      2000 1057954 541964 515990     1 54626            
##  2 San Francisco 49ers      2000 1057954 541964 515990     2 66879            
##  3 San Francisco 49ers      2000 1057954 541964 515990     3 65945            
##  4 San Francisco 49ers      2000 1057954 541964 515990     4 64127            
##  5 San Francisco 49ers      2000 1057954 541964 515990     5 66985            
##  6 San Francisco 49ers      2000 1057954 541964 515990     6 68344            
##  7 San Francisco 49ers      2000 1057954 541964 515990     7 59870            
##  8 San Francisco 49ers      2000 1057954 541964 515990     8 73169            
##  9 San Francisco 49ers      2000 1057954 541964 515990     9 68109            
## 10 San Francisco 49ers      2000 1057954 541964 515990    10 64900            
## # … with 10,869 more rows

Apply the following dplyr verbs to your data

Filter rows

filter(data, team == "team")
## # A tibble: 0 × 8
## # … with 8 variables: team <chr>, team_name <chr>, year <dbl>, total <dbl>,
## #   home <dbl>, away <dbl>, week <dbl>, weekly_attendance <chr>
data
## # A tibble: 10,879 × 8
##    team          team_name  year   total   home   away  week weekly_attendance
##    <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 San Francisco 49ers      2000 1057954 541964 515990     1 54626            
##  2 San Francisco 49ers      2000 1057954 541964 515990     2 66879            
##  3 San Francisco 49ers      2000 1057954 541964 515990     3 65945            
##  4 San Francisco 49ers      2000 1057954 541964 515990     4 64127            
##  5 San Francisco 49ers      2000 1057954 541964 515990     5 66985            
##  6 San Francisco 49ers      2000 1057954 541964 515990     6 68344            
##  7 San Francisco 49ers      2000 1057954 541964 515990     7 59870            
##  8 San Francisco 49ers      2000 1057954 541964 515990     8 73169            
##  9 San Francisco 49ers      2000 1057954 541964 515990     9 68109            
## 10 San Francisco 49ers      2000 1057954 541964 515990    10 64900            
## # … with 10,869 more rows

Arrange rows

arrange(data, desc(total))
## # A tibble: 10,879 × 8
##    team   team_name  year   total   home   away  week weekly_attendance
##    <chr>  <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 Dallas Cowboys    2016 1322087 740318 581769     1 92867            
##  2 Dallas Cowboys    2016 1322087 740318 581769     2 80612            
##  3 Dallas Cowboys    2016 1322087 740318 581769     3 90554            
##  4 Dallas Cowboys    2016 1322087 740318 581769     4 70178            
##  5 Dallas Cowboys    2016 1322087 740318 581769     5 91653            
##  6 Dallas Cowboys    2016 1322087 740318 581769     6 78481            
##  7 Dallas Cowboys    2016 1322087 740318 581769     7 NA               
##  8 Dallas Cowboys    2016 1322087 740318 581769     8 93103            
##  9 Dallas Cowboys    2016 1322087 740318 581769     9 67431            
## 10 Dallas Cowboys    2016 1322087 740318 581769    10 67737            
## # … with 10,869 more rows
data
## # A tibble: 10,879 × 8
##    team          team_name  year   total   home   away  week weekly_attendance
##    <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 San Francisco 49ers      2000 1057954 541964 515990     1 54626            
##  2 San Francisco 49ers      2000 1057954 541964 515990     2 66879            
##  3 San Francisco 49ers      2000 1057954 541964 515990     3 65945            
##  4 San Francisco 49ers      2000 1057954 541964 515990     4 64127            
##  5 San Francisco 49ers      2000 1057954 541964 515990     5 66985            
##  6 San Francisco 49ers      2000 1057954 541964 515990     6 68344            
##  7 San Francisco 49ers      2000 1057954 541964 515990     7 59870            
##  8 San Francisco 49ers      2000 1057954 541964 515990     8 73169            
##  9 San Francisco 49ers      2000 1057954 541964 515990     9 68109            
## 10 San Francisco 49ers      2000 1057954 541964 515990    10 64900            
## # … with 10,869 more rows

Select columns

select(data, year, total, team_name)
## # A tibble: 10,879 × 3
##     year   total team_name
##    <dbl>   <dbl> <chr>    
##  1  2000 1057954 49ers    
##  2  2000 1057954 49ers    
##  3  2000 1057954 49ers    
##  4  2000 1057954 49ers    
##  5  2000 1057954 49ers    
##  6  2000 1057954 49ers    
##  7  2000 1057954 49ers    
##  8  2000 1057954 49ers    
##  9  2000 1057954 49ers    
## 10  2000 1057954 49ers    
## # … with 10,869 more rows
data
## # A tibble: 10,879 × 8
##    team          team_name  year   total   home   away  week weekly_attendance
##    <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 San Francisco 49ers      2000 1057954 541964 515990     1 54626            
##  2 San Francisco 49ers      2000 1057954 541964 515990     2 66879            
##  3 San Francisco 49ers      2000 1057954 541964 515990     3 65945            
##  4 San Francisco 49ers      2000 1057954 541964 515990     4 64127            
##  5 San Francisco 49ers      2000 1057954 541964 515990     5 66985            
##  6 San Francisco 49ers      2000 1057954 541964 515990     6 68344            
##  7 San Francisco 49ers      2000 1057954 541964 515990     7 59870            
##  8 San Francisco 49ers      2000 1057954 541964 515990     8 73169            
##  9 San Francisco 49ers      2000 1057954 541964 515990     9 68109            
## 10 San Francisco 49ers      2000 1057954 541964 515990    10 64900            
## # … with 10,869 more rows

Add columns

mutated_data <- select(data,
                       team:weekly_attendance)
mutate(mutated_data,
       home_away_difference = home - away)
## # A tibble: 10,879 × 9
##    team          team_name  year   total   home   away  week weekly_at…¹ home_…²
##    <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>         <dbl>
##  1 San Francisco 49ers      2000 1057954 541964 515990     1 54626         25974
##  2 San Francisco 49ers      2000 1057954 541964 515990     2 66879         25974
##  3 San Francisco 49ers      2000 1057954 541964 515990     3 65945         25974
##  4 San Francisco 49ers      2000 1057954 541964 515990     4 64127         25974
##  5 San Francisco 49ers      2000 1057954 541964 515990     5 66985         25974
##  6 San Francisco 49ers      2000 1057954 541964 515990     6 68344         25974
##  7 San Francisco 49ers      2000 1057954 541964 515990     7 59870         25974
##  8 San Francisco 49ers      2000 1057954 541964 515990     8 73169         25974
##  9 San Francisco 49ers      2000 1057954 541964 515990     9 68109         25974
## 10 San Francisco 49ers      2000 1057954 541964 515990    10 64900         25974
## # … with 10,869 more rows, and abbreviated variable names ¹​weekly_attendance,
## #   ²​home_away_difference

Summarize by groups

by_team <- group_by(mutated_data, team_name, year)
summarise(by_team, total = mean(total, na.rm = TRUE))
## # A tibble: 671 × 3
## # Groups:   team_name [65]
##    team_name  year   total
##    <chr>     <dbl>   <dbl>
##  1 49ers      2000 1057954
##  2 49ers      2001 1061811
##  3 49ers      2002 1058751
##  4 49ers      2003 1050337
##  5 49ers      2004 1024975
##  6 49ers      2005 1113073
##  7 49ers      2006 1086093
##  8 49ers      2007 1097841
##  9 49ers      2008 1076358
## 10 49ers      2009 1075599
## # … with 661 more rows