Import data
# excel file
data <- read_excel("../00_data/myData_charts.xlsx")
data
## # A tibble: 10,879 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 San Francisco 49ers 2000 1057954 541964 515990 1 54626
## 2 San Francisco 49ers 2000 1057954 541964 515990 2 66879
## 3 San Francisco 49ers 2000 1057954 541964 515990 3 65945
## 4 San Francisco 49ers 2000 1057954 541964 515990 4 64127
## 5 San Francisco 49ers 2000 1057954 541964 515990 5 66985
## 6 San Francisco 49ers 2000 1057954 541964 515990 6 68344
## 7 San Francisco 49ers 2000 1057954 541964 515990 7 59870
## 8 San Francisco 49ers 2000 1057954 541964 515990 8 73169
## 9 San Francisco 49ers 2000 1057954 541964 515990 9 68109
## 10 San Francisco 49ers 2000 1057954 541964 515990 10 64900
## # … with 10,869 more rows
Apply the following dplyr verbs to your data
Filter rows
filter(data, team == "team")
## # A tibble: 0 × 8
## # … with 8 variables: team <chr>, team_name <chr>, year <dbl>, total <dbl>,
## # home <dbl>, away <dbl>, week <dbl>, weekly_attendance <chr>
data
## # A tibble: 10,879 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 San Francisco 49ers 2000 1057954 541964 515990 1 54626
## 2 San Francisco 49ers 2000 1057954 541964 515990 2 66879
## 3 San Francisco 49ers 2000 1057954 541964 515990 3 65945
## 4 San Francisco 49ers 2000 1057954 541964 515990 4 64127
## 5 San Francisco 49ers 2000 1057954 541964 515990 5 66985
## 6 San Francisco 49ers 2000 1057954 541964 515990 6 68344
## 7 San Francisco 49ers 2000 1057954 541964 515990 7 59870
## 8 San Francisco 49ers 2000 1057954 541964 515990 8 73169
## 9 San Francisco 49ers 2000 1057954 541964 515990 9 68109
## 10 San Francisco 49ers 2000 1057954 541964 515990 10 64900
## # … with 10,869 more rows
Arrange rows
arrange(data, desc(total))
## # A tibble: 10,879 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Dallas Cowboys 2016 1322087 740318 581769 1 92867
## 2 Dallas Cowboys 2016 1322087 740318 581769 2 80612
## 3 Dallas Cowboys 2016 1322087 740318 581769 3 90554
## 4 Dallas Cowboys 2016 1322087 740318 581769 4 70178
## 5 Dallas Cowboys 2016 1322087 740318 581769 5 91653
## 6 Dallas Cowboys 2016 1322087 740318 581769 6 78481
## 7 Dallas Cowboys 2016 1322087 740318 581769 7 NA
## 8 Dallas Cowboys 2016 1322087 740318 581769 8 93103
## 9 Dallas Cowboys 2016 1322087 740318 581769 9 67431
## 10 Dallas Cowboys 2016 1322087 740318 581769 10 67737
## # … with 10,869 more rows
data
## # A tibble: 10,879 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 San Francisco 49ers 2000 1057954 541964 515990 1 54626
## 2 San Francisco 49ers 2000 1057954 541964 515990 2 66879
## 3 San Francisco 49ers 2000 1057954 541964 515990 3 65945
## 4 San Francisco 49ers 2000 1057954 541964 515990 4 64127
## 5 San Francisco 49ers 2000 1057954 541964 515990 5 66985
## 6 San Francisco 49ers 2000 1057954 541964 515990 6 68344
## 7 San Francisco 49ers 2000 1057954 541964 515990 7 59870
## 8 San Francisco 49ers 2000 1057954 541964 515990 8 73169
## 9 San Francisco 49ers 2000 1057954 541964 515990 9 68109
## 10 San Francisco 49ers 2000 1057954 541964 515990 10 64900
## # … with 10,869 more rows
Select columns
select(data, year, total, team_name)
## # A tibble: 10,879 × 3
## year total team_name
## <dbl> <dbl> <chr>
## 1 2000 1057954 49ers
## 2 2000 1057954 49ers
## 3 2000 1057954 49ers
## 4 2000 1057954 49ers
## 5 2000 1057954 49ers
## 6 2000 1057954 49ers
## 7 2000 1057954 49ers
## 8 2000 1057954 49ers
## 9 2000 1057954 49ers
## 10 2000 1057954 49ers
## # … with 10,869 more rows
data
## # A tibble: 10,879 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 San Francisco 49ers 2000 1057954 541964 515990 1 54626
## 2 San Francisco 49ers 2000 1057954 541964 515990 2 66879
## 3 San Francisco 49ers 2000 1057954 541964 515990 3 65945
## 4 San Francisco 49ers 2000 1057954 541964 515990 4 64127
## 5 San Francisco 49ers 2000 1057954 541964 515990 5 66985
## 6 San Francisco 49ers 2000 1057954 541964 515990 6 68344
## 7 San Francisco 49ers 2000 1057954 541964 515990 7 59870
## 8 San Francisco 49ers 2000 1057954 541964 515990 8 73169
## 9 San Francisco 49ers 2000 1057954 541964 515990 9 68109
## 10 San Francisco 49ers 2000 1057954 541964 515990 10 64900
## # … with 10,869 more rows
Add columns
mutated_data <- select(data,
team:weekly_attendance)
mutate(mutated_data,
home_away_difference = home - away)
## # A tibble: 10,879 × 9
## team team_name year total home away week weekly_at…¹ home_…²
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 San Francisco 49ers 2000 1057954 541964 515990 1 54626 25974
## 2 San Francisco 49ers 2000 1057954 541964 515990 2 66879 25974
## 3 San Francisco 49ers 2000 1057954 541964 515990 3 65945 25974
## 4 San Francisco 49ers 2000 1057954 541964 515990 4 64127 25974
## 5 San Francisco 49ers 2000 1057954 541964 515990 5 66985 25974
## 6 San Francisco 49ers 2000 1057954 541964 515990 6 68344 25974
## 7 San Francisco 49ers 2000 1057954 541964 515990 7 59870 25974
## 8 San Francisco 49ers 2000 1057954 541964 515990 8 73169 25974
## 9 San Francisco 49ers 2000 1057954 541964 515990 9 68109 25974
## 10 San Francisco 49ers 2000 1057954 541964 515990 10 64900 25974
## # … with 10,869 more rows, and abbreviated variable names ¹weekly_attendance,
## # ²home_away_difference
Summarize by groups
by_team <- group_by(mutated_data, team_name, year)
summarise(by_team, total = mean(total, na.rm = TRUE))
## # A tibble: 671 × 3
## # Groups: team_name [65]
## team_name year total
## <chr> <dbl> <dbl>
## 1 49ers 2000 1057954
## 2 49ers 2001 1061811
## 3 49ers 2002 1058751
## 4 49ers 2003 1050337
## 5 49ers 2004 1024975
## 6 49ers 2005 1113073
## 7 49ers 2006 1086093
## 8 49ers 2007 1097841
## 9 49ers 2008 1076358
## 10 49ers 2009 1075599
## # … with 661 more rows