Questions to answer
Teammate duos with most races against eachother, number of years together
Teammate duos that finished high in the grid most often, average combined finishing position
Teammate duos that finished close to eachother most often, average difference in finishing positions
Data Read
data = read.csv("https://raw.githubusercontent.com/lennymd/jmm309_fall2020/main/data/race_results_statsf1.csv", stringsAsFactors = FALSE)
Clean Data to show results by Teammate Duos
#install.packages('dplyr')
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data1 = data %>%
arrange(year, round, race_name, team, driver)%>%
mutate(Win = ifelse(p_prelim == 1, 1, 0), Podium = ifelse(p_prelim <= 3, 1, 0))%>%
select(year, round, race_name, p_prelim, Win, Podium, driver, team)%>%
group_by(year, round, race_name, team) %>%
summarise(teammates = paste(driver, collapse = ' & '), teamSize = n(), combinedPosition = sum(p_prelim), bestPosition = min(p_prelim), Wins = sum(Win), Podiums = sum(Podium), OneTwo = ifelse(bestPosition == 1 & combinedPosition ==3, 1, 0) , BothPodium = ifelse((bestPosition==1 & combinedPosition <=4) | (bestPosition == 2 & combinedPosition == 5), 1, 0))
## `summarise()` regrouping output by 'year', 'round', 'race_name' (override with `.groups` argument)
head(data1)
teamsizecheck = data1 %>%
group_by(year, team, teammates)%>%
summarise(teamSize = mean(teamSize), races = n())%>%
arrange(year, desc(teamSize))
## `summarise()` regrouping output by 'year', 'team' (override with `.groups` argument)
data2 = data1[data1$teamSize == 2,]
data3 = data2 %>%
left_join(data, by = c("year", "round", "race_name","team", "bestPosition" = "p_prelim" )) %>%
select(year, round, race_name, team, teammates, teamSize, combinedPosition, bestPosition,Wins, Podiums, driver, OneTwo, BothPodium)%>%
arrange(year, round, race_name, team, teammates,driver)
names(data3)[11]="bestTeammate"
head(data3)
Show Duos by number of years/races together
duosYears = data3%>%
group_by(team,teammates,year) %>%
summarise(races = n() )
## `summarise()` regrouping output by 'team', 'teammates' (override with `.groups` argument)
head(duosYears)
duosLength = duosYears %>%
group_by(team,teammates) %>%
summarise(NumberofYears = n(),FirstYear = min(year), LastYear = max(year), NumberofRaces = sum(races))%>%
arrange(desc(NumberofYears), desc(NumberofRaces))
## `summarise()` regrouping output by 'team' (override with `.groups` argument)
head(duosLength)
Show Duos by average combined finishes and by best finishes, both by each duo overall and by each victorious driver of each duo
duosBestFinishers = data3%>%
group_by(team,teammates,bestTeammate)%>%
summarise(AvgCombined = mean(combinedPosition), AvgBest = mean(bestPosition), Wins = sum(Wins), Podiums = sum(Podiums),OneTwos = sum(OneTwo), BothPodiums = sum(BothPodium), NumberofRacesasBest = n()) %>%
arrange(desc(NumberofRacesasBest), AvgBest)
## `summarise()` regrouping output by 'team', 'teammates' (override with `.groups` argument)
duosFinishes = data3%>%
group_by(team,teammates)%>%
summarise(AvgCombined = mean(combinedPosition), AvgBest = mean(bestPosition), Wins = sum(Wins), Podiums = sum(Podiums), OneTwos = sum(OneTwo), BothPodiums = sum(BothPodium)) %>%
arrange(AvgCombined, AvgBest)
## `summarise()` regrouping output by 'team' (override with `.groups` argument)
head(duosBestFinishers)
head(duosFinishes)
Show duos by average difference in finishing position, both by each duo overall and by each victorious driver of each duo
duosDifferencebyDriver = data3%>%
mutate(difference = combinedPosition - 2*bestPosition)%>%
group_by(team,teammates,bestTeammate)%>%
summarise(AvgDifference = mean(difference), Closest = min(difference), Farthest = max(difference)) %>%
arrange(AvgDifference)
## `summarise()` regrouping output by 'team', 'teammates' (override with `.groups` argument)
duosDifference = data3%>%
mutate(difference = combinedPosition - 2*bestPosition)%>%
group_by(team,teammates)%>%
summarise(AvgDifference = mean(difference), Closest = min(difference), Farthest = max(difference)) %>%
arrange(AvgDifference)
## `summarise()` regrouping output by 'team' (override with `.groups` argument)
head(duosDifferencebyDriver)
head(duosDifference)
Look at duos based on number of races within a few positions of eachother
duosDifferenceDistribution = data3%>%
mutate(difference = combinedPosition - 2*bestPosition)%>%
group_by(team,teammates,difference)%>%
tally()%>%
arrange(desc(n))
head(duosDifferenceDistribution)
duosWithinOne = duosDifferenceDistribution[duosDifferenceDistribution$difference==1,] %>% arrange(desc(n)) %>% select(team, teammates, RacesWithinOne = n)
head(duosWithinOne)