Import data

# excel file
data <- read_excel("../00_data/nfl_attendance.xlsx")
data
## # A tibble: 10,846 × 8
##    team    team_name  year  total   home   away  week weekly_attendance
##    <chr>   <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 Arizona Cardinals  2000 893926 387475 506451     1 77434            
##  2 Arizona Cardinals  2000 893926 387475 506451     2 66009            
##  3 Arizona Cardinals  2000 893926 387475 506451     3 NA               
##  4 Arizona Cardinals  2000 893926 387475 506451     4 71801            
##  5 Arizona Cardinals  2000 893926 387475 506451     5 66985            
##  6 Arizona Cardinals  2000 893926 387475 506451     6 44296            
##  7 Arizona Cardinals  2000 893926 387475 506451     7 38293            
##  8 Arizona Cardinals  2000 893926 387475 506451     8 62981            
##  9 Arizona Cardinals  2000 893926 387475 506451     9 35286            
## 10 Arizona Cardinals  2000 893926 387475 506451    10 52244            
## # ℹ 10,836 more rows

Apply the following dplyr verbs to your data

Filter rows

#Shows data from 2002 only

filter(data, year == "2002")
## # A tibble: 544 × 8
##    team    team_name  year  total   home   away  week weekly_attendance
##    <chr>   <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 Arizona Cardinals  2002 898877 327272 571605     1 85140            
##  2 Arizona Cardinals  2002 898877 327272 571605     2 63104            
##  3 Arizona Cardinals  2002 898877 327272 571605     3 28980            
##  4 Arizona Cardinals  2002 898877 327272 571605     4 30014            
##  5 Arizona Cardinals  2002 898877 327272 571605     5 72286            
##  6 Arizona Cardinals  2002 898877 327272 571605     6 NA               
##  7 Arizona Cardinals  2002 898877 327272 571605     7 59702            
##  8 Arizona Cardinals  2002 898877 327272 571605     8 67173            
##  9 Arizona Cardinals  2002 898877 327272 571605     9 47819            
## 10 Arizona Cardinals  2002 898877 327272 571605    10 29252            
## # ℹ 534 more rows

Arrange rows

#Rank by weekly attendance in desc order

arrange(data, desc(weekly_attendance))
## # A tibble: 10,846 × 8
##    team       team_name  year   total   home   away  week weekly_attendance
##    <chr>      <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 Arizona    Cardinals  2000  893926 387475 506451     3 NA               
##  2 Atlanta    Falcons    2000  964579 422814 541765    15 NA               
##  3 Baltimore  Ravens     2000 1062373 551695 510678    14 NA               
##  4 Buffalo    Bills      2000 1098587 560695 537892     4 NA               
##  5 Carolina   Panthers   2000 1095192 583489 511703     4 NA               
##  6 Chicago    Bears      2000 1080684 535552 545132     9 NA               
##  7 Cincinnati Bengals    2000  967434 469992 497442     1 NA               
##  8 Cleveland  Browns     2000 1057139 581544 475595    17 NA               
##  9 Dallas     Cowboys    2000 1075470 504360 571110     6 NA               
## 10 Denver     Broncos    2000 1140030 604042 535988     9 NA               
## # ℹ 10,836 more rows
#Rank by weekly attendance in asc order

arrange(data, weekly_attendance)
## # A tibble: 10,846 × 8
##    team          team_name  year   total   home   away  week weekly_attendance
##    <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 Dallas        Cowboys    2009 1307231 718055 589176    17 100621           
##  2 Philadelphia  Eagles     2009 1144895 553152 591743    17 100621           
##  3 Arizona       Cardinals  2005  920848 401035 519813     4 103467           
##  4 San Francisco 49ers      2005 1113073 523426 589647     4 103467           
##  5 Dallas        Cowboys    2009 1307231 718055 589176     2 105121           
##  6 New York      Giants     2009 1223927 629615 594312     2 105121           
##  7 Arizona       Cardinals  2003  804401 288499 515902     2 23127            
##  8 Seattle       Seahawks   2003 1005938 512150 493788     2 23127            
##  9 Arizona       Cardinals  2003  804401 288499 515902    15 23217            
## 10 Carolina      Panthers   2003 1078988 582566 496422    15 23217            
## # ℹ 10,836 more rows

Select columns

#Select only the team name, the year, and total attendance

select(data, team_name, year, total)
## # A tibble: 10,846 × 3
##    team_name  year  total
##    <chr>     <dbl>  <dbl>
##  1 Cardinals  2000 893926
##  2 Cardinals  2000 893926
##  3 Cardinals  2000 893926
##  4 Cardinals  2000 893926
##  5 Cardinals  2000 893926
##  6 Cardinals  2000 893926
##  7 Cardinals  2000 893926
##  8 Cardinals  2000 893926
##  9 Cardinals  2000 893926
## 10 Cardinals  2000 893926
## # ℹ 10,836 more rows

Add columns

#Add a variable to show the difference in away and home attendance

mutate(data, 
       difference = away - home)
## # A tibble: 10,846 × 9
##    team  team_name  year  total   home   away  week weekly_attendance difference
##    <chr> <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <chr>                  <dbl>
##  1 Ariz… Cardinals  2000 893926 387475 506451     1 77434                 118976
##  2 Ariz… Cardinals  2000 893926 387475 506451     2 66009                 118976
##  3 Ariz… Cardinals  2000 893926 387475 506451     3 NA                    118976
##  4 Ariz… Cardinals  2000 893926 387475 506451     4 71801                 118976
##  5 Ariz… Cardinals  2000 893926 387475 506451     5 66985                 118976
##  6 Ariz… Cardinals  2000 893926 387475 506451     6 44296                 118976
##  7 Ariz… Cardinals  2000 893926 387475 506451     7 38293                 118976
##  8 Ariz… Cardinals  2000 893926 387475 506451     8 62981                 118976
##  9 Ariz… Cardinals  2000 893926 387475 506451     9 35286                 118976
## 10 Ariz… Cardinals  2000 893926 387475 506451    10 52244                 118976
## # ℹ 10,836 more rows

Summarize by groups

data
## # A tibble: 10,846 × 8
##    team    team_name  year  total   home   away  week weekly_attendance
##    <chr>   <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <chr>            
##  1 Arizona Cardinals  2000 893926 387475 506451     1 77434            
##  2 Arizona Cardinals  2000 893926 387475 506451     2 66009            
##  3 Arizona Cardinals  2000 893926 387475 506451     3 NA               
##  4 Arizona Cardinals  2000 893926 387475 506451     4 71801            
##  5 Arizona Cardinals  2000 893926 387475 506451     5 66985            
##  6 Arizona Cardinals  2000 893926 387475 506451     6 44296            
##  7 Arizona Cardinals  2000 893926 387475 506451     7 38293            
##  8 Arizona Cardinals  2000 893926 387475 506451     8 62981            
##  9 Arizona Cardinals  2000 893926 387475 506451     9 35286            
## 10 Arizona Cardinals  2000 893926 387475 506451    10 52244            
## # ℹ 10,836 more rows
#average total attendance

summarise(data, avg_attendance = mean(total))
## # A tibble: 1 × 1
##   avg_attendance
##            <dbl>
## 1       1080910.
data %>%
    group_by(team_name) %>%
    summarise(avg_attendance = mean(total))
## # A tibble: 32 × 2
##    team_name  avg_attendance
##    <chr>               <dbl>
##  1 49ers            1082398.
##  2 Bears            1042070.
##  3 Bengals          1009985.
##  4 Bills            1083997.
##  5 Broncos          1139041.
##  6 Browns           1077983.
##  7 Buccaneers       1034902.
##  8 Cardinals         983232.
##  9 Chargers         1005781.
## 10 Chiefs           1123300.
## # ℹ 22 more rows