Merge Data
# remove bye weeks
att <- att %>% filter(!is.na(weekly_attendance))
# get rid of post season (att does not have postseason data)
games <- games %>% filter(week %in% 1:17)
df1 <- games %>% select(-away_team_name)
df2 <- games %>% select(-home_team_name)
df1$H.A <- "home"
df2$H.A <- "away"
df1 <- df1 %>% rename(team = home_team_name)
df2 <- df2 %>% rename(team = away_team_name)
gm <- rbind(df1,df2) %>% select(-home_team_city,-away_team_city,-home_team,-away_team)
att <- att %>% mutate(id = paste(year,week,team_name))
gm <- gm %>% mutate(id = paste(year,week,team))
temp <- merge(att,gm,by = "id")
stand <- stand %>% mutate(id1 = paste(year,team))
temp$id <- NULL
stand <- stand %>% mutate(id = paste(year,team_name))
df <- temp %>% mutate(id = paste(year.x,team_name)) %>%
merge(.,stand,by = "id")
df <- df %>% select(team_name.x, year, week.x, home, away, weekly_attendance, winner,
day, date, time, pts_win, pts_loss, yds_win, yds_loss, turnovers_win, turnovers_loss,
H.A, wins, loss, points_for, points_against, points_differential,
margin_of_victory, strength_of_schedule, simple_rating, offensive_ranking,
defensive_ranking, playoffs, sb_winner)
df <- df %>% rename(team = team_name.x,week = week.x)
head(df)
## team year week home away weekly_attendance winner day
## 1 49ers 2000 1 541964 515990 54626 Atlanta Falcons Sun
## 2 49ers 2000 3 541964 515990 65945 St. Louis Rams Sun
## 3 49ers 2000 2 541964 515990 66879 Carolina Panthers Sun
## 4 49ers 2000 14 541964 515990 57255 San Francisco 49ers Sun
## 5 49ers 2000 10 541964 515990 64900 New Orleans Saints Sun
## 6 49ers 2000 16 541964 515990 68306 San Francisco 49ers Sun
## date time pts_win pts_loss yds_win yds_loss turnovers_win
## 1 September 3 13:02:00 36 28 359 339 1
## 2 September 17 13:02:00 41 24 529 401 2
## 3 September 10 16:15:00 38 22 450 391 1
## 4 December 3 16:05:00 45 17 385 303 0
## 5 November 5 13:02:00 31 15 360 346 0
## 6 December 17 16:00:00 17 0 456 104 1
## turnovers_loss H.A wins loss points_for points_against points_differential
## 1 1 away 6 10 388 422 -34
## 2 2 away 6 10 388 422 -34
## 3 2 home 6 10 388 422 -34
## 4 5 away 6 10 388 422 -34
## 5 2 away 6 10 388 422 -34
## 6 1 home 6 10 388 422 -34
## margin_of_victory strength_of_schedule simple_rating offensive_ranking
## 1 -2.1 -1.7 -3.8 1.7
## 2 -2.1 -1.7 -3.8 1.7
## 3 -2.1 -1.7 -3.8 1.7
## 4 -2.1 -1.7 -3.8 1.7
## 5 -2.1 -1.7 -3.8 1.7
## 6 -2.1 -1.7 -3.8 1.7
## defensive_ranking playoffs sb_winner
## 1 -5.5 No Playoffs No Superbowl
## 2 -5.5 No Playoffs No Superbowl
## 3 -5.5 No Playoffs No Superbowl
## 4 -5.5 No Playoffs No Superbowl
## 5 -5.5 No Playoffs No Superbowl
## 6 -5.5 No Playoffs No Superbowl
conf.look <- data.frame(team = c(unique(as.character(df$team)))
,conf = c("NFC","NFC","AFC","AFC","AFC","NFC","NFC",
"AFC","AFC","AFC","NFC","AFC","NFC","NFC","NFC",
"AFC","AFC","NFC","NFC","NFC","AFC","AFC",
"NFC","AFC","NFC","NFC","NFC","AFC","AFC",
"NFC","AFC","AFC"))
geo.look <- data.frame(team = c(unique(as.character(df$team)))
,geo = c("west","north","east","west","north","south","west",
"west","west","south","east","east","east","south","east",
"south","east","north","north","south","east","west",
"west","north","east","south","west","north","south",
"north","north","south"))
df$conf <- conf.look[match(df$team, conf.look$team),]$conf
df$geo <- geo.look[match(df$team, geo.look$team),]$geo
df$tot.att <- df$home + df$away
df$att.per.gm <- df$tot.att / 16
df <- df %>% arrange(year, week, team)