Import your data
data <- read_excel("../00_data/Data.xlsx")
## New names:
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
data
## # A tibble: 10,846 × 14
## team `Team City` Population team_name year total home away week
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 1
## 2 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 2
## 3 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 3
## 4 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 4
## 5 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 5
## 6 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 6
## 7 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 7
## 8 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 8
## 9 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 9
## 10 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 10
## # ℹ 10,836 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## # ...13 <lgl>, ...14 <dbl>
Separating and Uniting
Unite two columns
data_united <- data %>%
unite(col = HomeAway, c(home,away), sep = "/", remove = FALSE)
data_united
## # A tibble: 10,846 × 15
## team `Team City` Population team_name year total HomeAway home away
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 2 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 3 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 4 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 5 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 6 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 7 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 8 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 9 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## 10 Arizona Phoenix 1608139 Cardinals 2000 893926 387475/5… 387475 506451
## # ℹ 10,836 more rows
## # ℹ 6 more variables: week <dbl>, weekly_attendance <chr>, ...11 <lgl>,
## # ...12 <chr>, ...13 <lgl>, ...14 <dbl>
Separate a column
data_united_2 <- data_united %>%
separate(col = HomeAway, into = c("home", "away"), sep = "/")
data_united_2
## # A tibble: 10,846 × 14
## team `Team City` Population team_name year total home away week
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 1
## 2 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 2
## 3 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 3
## 4 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 4
## 5 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 5
## 6 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 6
## 7 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 7
## 8 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 8
## 9 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 9
## 10 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 10
## # ℹ 10,836 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## # ...13 <lgl>, ...14 <dbl>
Missing Values
data_2 <- data_united_2 %>%
mutate(home = as.numeric(home), away = as.numeric(away)) %>%
complete(team, team_name) %>%
fill(team, .direction = "down") %>%
replace_na(list(home = 0, away = 0))
data_2
## # A tibble: 11,836 × 14
## team team_name `Team City` Population year total home away week
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Arizona 49ers <NA> NA NA NA 0 0 NA
## 2 Arizona Bears <NA> NA NA NA 0 0 NA
## 3 Arizona Bengals <NA> NA NA NA 0 0 NA
## 4 Arizona Bills <NA> NA NA NA 0 0 NA
## 5 Arizona Broncos <NA> NA NA NA 0 0 NA
## 6 Arizona Browns <NA> NA NA NA 0 0 NA
## 7 Arizona Buccaneers <NA> NA NA NA 0 0 NA
## 8 Arizona Cardinals Phoenix 1608139 2000 893926 387475 506451 1
## 9 Arizona Cardinals Phoenix 1608139 2000 893926 387475 506451 2
## 10 Arizona Cardinals Phoenix 1608139 2000 893926 387475 506451 3
## # ℹ 11,826 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## # ...13 <lgl>, ...14 <dbl>