Import your data
# excel file
attendance <- read_excel("../00_data/nfl_attendance.xlsx")
attendance
## # A tibble: 10,846 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Arizona Cardinals 2000 893926 387475 506451 1 77434
## 2 Arizona Cardinals 2000 893926 387475 506451 2 66009
## 3 Arizona Cardinals 2000 893926 387475 506451 3 NA
## 4 Arizona Cardinals 2000 893926 387475 506451 4 71801
## 5 Arizona Cardinals 2000 893926 387475 506451 5 66985
## 6 Arizona Cardinals 2000 893926 387475 506451 6 44296
## 7 Arizona Cardinals 2000 893926 387475 506451 7 38293
## 8 Arizona Cardinals 2000 893926 387475 506451 8 62981
## 9 Arizona Cardinals 2000 893926 387475 506451 9 35286
## 10 Arizona Cardinals 2000 893926 387475 506451 10 52244
## # ℹ 10,836 more rows
set.seed(1234)
attendance_small <- attendance %>%
select(team, year, total) %>%
sample_n(10)
attendance_small
## # A tibble: 10 × 3
## team year total
## <chr> <dbl> <dbl>
## 1 Pittsburgh 2013 1025772
## 2 San Diego 2014 1069529
## 3 Cleveland 2013 1122750
## 4 Tampa Bay 2014 1023818
## 5 Indianapolis 2013 1054693
## 6 Tennessee 2016 1016507
## 7 Chicago 2001 1059454
## 8 Pittsburgh 2001 1055127
## 9 Kansas City 2005 1180710
## 10 Arizona 2004 838557
Pivoting
#Long to wide
attendance_wide <- attendance_small %>%
pivot_wider(names_from = year, values_from = total)
attendance_wide
## # A tibble: 9 × 7
## team `2013` `2014` `2016` `2001` `2005` `2004`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Pittsburgh 1025772 NA NA 1055127 NA NA
## 2 San Diego NA 1069529 NA NA NA NA
## 3 Cleveland 1122750 NA NA NA NA NA
## 4 Tampa Bay NA 1023818 NA NA NA NA
## 5 Indianapolis 1054693 NA NA NA NA NA
## 6 Tennessee NA NA 1016507 NA NA NA
## 7 Chicago NA NA NA 1059454 NA NA
## 8 Kansas City NA NA NA NA 1180710 NA
## 9 Arizona NA NA NA NA NA 838557
#Wide to long
attendance_long <- attendance_wide %>%
pivot_longer('2013' : '2004', names_to = "year", values_to = "total", values_drop_na = TRUE)
attendance_long
## # A tibble: 10 × 3
## team year total
## <chr> <chr> <dbl>
## 1 Pittsburgh 2013 1025772
## 2 Pittsburgh 2001 1055127
## 3 San Diego 2014 1069529
## 4 Cleveland 2013 1122750
## 5 Tampa Bay 2014 1023818
## 6 Indianapolis 2013 1054693
## 7 Tennessee 2016 1016507
## 8 Chicago 2001 1059454
## 9 Kansas City 2005 1180710
## 10 Arizona 2004 838557
Separating and Uniting
#Unite columns
attendance_unite <- attendance_small %>%
unite(col = "total by year", c(year,total), sep = "/",)
attendance_unite
## # A tibble: 10 × 2
## team `total by year`
## <chr> <chr>
## 1 Pittsburgh 2013/1025772
## 2 San Diego 2014/1069529
## 3 Cleveland 2013/1122750
## 4 Tampa Bay 2014/1023818
## 5 Indianapolis 2013/1054693
## 6 Tennessee 2016/1016507
## 7 Chicago 2001/1059454
## 8 Pittsburgh 2001/1055127
## 9 Kansas City 2005/1180710
## 10 Arizona 2004/838557
#Seperate columns
attendance_sep <- attendance_unite %>%
separate(col = `total by year`, into = c("year", "total"))
attendance_sep
## # A tibble: 10 × 3
## team year total
## <chr> <chr> <chr>
## 1 Pittsburgh 2013 1025772
## 2 San Diego 2014 1069529
## 3 Cleveland 2013 1122750
## 4 Tampa Bay 2014 1023818
## 5 Indianapolis 2013 1054693
## 6 Tennessee 2016 1016507
## 7 Chicago 2001 1059454
## 8 Pittsburgh 2001 1055127
## 9 Kansas City 2005 1180710
## 10 Arizona 2004 838557