Import your data

data <- read_excel("../00_data/Data.xlsx")
## New names:
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
data
## # A tibble: 10,846 × 14
##    team    `Team City` Population team_name  year  total   home   away  week
##    <chr>   <chr>            <dbl> <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>
##  1 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     1
##  2 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     2
##  3 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     3
##  4 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     4
##  5 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     5
##  6 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     6
##  7 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     7
##  8 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     8
##  9 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     9
## 10 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451    10
## # ℹ 10,836 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## #   ...13 <lgl>, ...14 <dbl>

Pivoting

long to wide form

data_long <- data %>%
    pivot_longer(cols = c(home, away),
        names_to = "game_attendance", values_to = "game_total")

data_long
## # A tibble: 21,692 × 14
##    team    `Team City` Population team_name  year  total  week weekly_attendance
##    <chr>   <chr>            <dbl> <chr>     <dbl>  <dbl> <dbl> <chr>            
##  1 Arizona Phoenix        1608139 Cardinals  2000 893926     1 77434            
##  2 Arizona Phoenix        1608139 Cardinals  2000 893926     1 77434            
##  3 Arizona Phoenix        1608139 Cardinals  2000 893926     2 66009            
##  4 Arizona Phoenix        1608139 Cardinals  2000 893926     2 66009            
##  5 Arizona Phoenix        1608139 Cardinals  2000 893926     3 NA               
##  6 Arizona Phoenix        1608139 Cardinals  2000 893926     3 NA               
##  7 Arizona Phoenix        1608139 Cardinals  2000 893926     4 71801            
##  8 Arizona Phoenix        1608139 Cardinals  2000 893926     4 71801            
##  9 Arizona Phoenix        1608139 Cardinals  2000 893926     5 66985            
## 10 Arizona Phoenix        1608139 Cardinals  2000 893926     5 66985            
## # ℹ 21,682 more rows
## # ℹ 6 more variables: ...11 <lgl>, ...12 <chr>, ...13 <lgl>, ...14 <dbl>,
## #   game_attendance <chr>, game_total <dbl>

wide to long form

data_wide <- data %>%
    pivot_wider(names_from = team_name, values_from = week)
## Warning: Values from `week` are not uniquely identified; output will contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
##   {data} |>
##   dplyr::summarise(n = dplyr::n(), .by = c(team, `Team City`, Population, year,
##   total, home, away, weekly_attendance, ...11, ...12, ...13, ...14, team_name))
##   |>
##   dplyr::filter(n > 1L)

Separating and Uniting

Unite two columns

data_united <- data %>%
    
    unite(col = HomeAway, c(home,away), sep = "/", remove = FALSE)
data_united
## # A tibble: 10,846 × 15
##    team    `Team City` Population team_name  year  total HomeAway    home   away
##    <chr>   <chr>            <dbl> <chr>     <dbl>  <dbl> <chr>      <dbl>  <dbl>
##  1 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  2 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  3 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  4 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  5 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  6 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  7 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  8 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
##  9 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
## 10 Arizona Phoenix        1608139 Cardinals  2000 893926 387475/5… 387475 506451
## # ℹ 10,836 more rows
## # ℹ 6 more variables: week <dbl>, weekly_attendance <chr>, ...11 <lgl>,
## #   ...12 <chr>, ...13 <lgl>, ...14 <dbl>

Separate a column

data_united_2 <- data_united %>%
    
    separate(col = HomeAway, into = c("home", "away"), sep = "/")
data_united_2
## # A tibble: 10,846 × 14
##    team    `Team City` Population team_name  year  total home   away    week
##    <chr>   <chr>            <dbl> <chr>     <dbl>  <dbl> <chr>  <chr>  <dbl>
##  1 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     1
##  2 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     2
##  3 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     3
##  4 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     4
##  5 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     5
##  6 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     6
##  7 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     7
##  8 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     8
##  9 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451     9
## 10 Arizona Phoenix        1608139 Cardinals  2000 893926 387475 506451    10
## # ℹ 10,836 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## #   ...13 <lgl>, ...14 <dbl>

Missing Values

data_2 <- data_united_2 %>%
    
    mutate(home = as.numeric(home), away = as.numeric(away)) %>%
    
    complete(team, team_name) %>%
    
    fill(team, .direction = "down") %>%
    
    replace_na(list(home = 0, away = 0))

data_2
## # A tibble: 11,836 × 14
##    team    team_name  `Team City` Population  year  total   home   away  week
##    <chr>   <chr>      <chr>            <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
##  1 Arizona 49ers      <NA>                NA    NA     NA      0      0    NA
##  2 Arizona Bears      <NA>                NA    NA     NA      0      0    NA
##  3 Arizona Bengals    <NA>                NA    NA     NA      0      0    NA
##  4 Arizona Bills      <NA>                NA    NA     NA      0      0    NA
##  5 Arizona Broncos    <NA>                NA    NA     NA      0      0    NA
##  6 Arizona Browns     <NA>                NA    NA     NA      0      0    NA
##  7 Arizona Buccaneers <NA>                NA    NA     NA      0      0    NA
##  8 Arizona Cardinals  Phoenix        1608139  2000 893926 387475 506451     1
##  9 Arizona Cardinals  Phoenix        1608139  2000 893926 387475 506451     2
## 10 Arizona Cardinals  Phoenix        1608139  2000 893926 387475 506451     3
## # ℹ 11,826 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## #   ...13 <lgl>, ...14 <dbl>