Questions to answer

Teammate duos with most races against eachother, number of years together

Teammate duos that finished high in the grid most often, average combined finishing position

Teammate duos that finished close to eachother most often, average difference in finishing positions

Data Read

data = read.csv("https://raw.githubusercontent.com/lennymd/jmm309_fall2020/main/data/race_results_statsf1.csv", stringsAsFactors = FALSE)

Clean Data to show results by Teammate Duos

#install.packages('dplyr')
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data1 = data %>%
  arrange(year, round, race_name, team, driver)%>%
  mutate(Win = ifelse(p_prelim == 1, 1, 0), Podium = ifelse(p_prelim <= 3, 1, 0))%>%
  select(year, round, race_name, p_prelim, Win, Podium, driver, team)%>%
  group_by(year, round, race_name, team) %>%
  summarise(teammates = paste(driver, collapse = ' & '), teamSize = n(), combinedPosition = sum(p_prelim), bestPosition = min(p_prelim), Wins = sum(Win), Podiums = sum(Podium), OneTwo = ifelse(bestPosition == 1 & combinedPosition ==3, 1, 0) , BothPodium = ifelse((bestPosition==1 & combinedPosition <=4) | (bestPosition == 2 & combinedPosition == 5), 1, 0))
## `summarise()` regrouping output by 'year', 'round', 'race_name' (override with `.groups` argument)
head(data1)
teamsizecheck = data1 %>% 
  group_by(year, team, teammates)%>%
  summarise(teamSize = mean(teamSize), races = n())%>%
  arrange(year, desc(teamSize))
## `summarise()` regrouping output by 'year', 'team' (override with `.groups` argument)
data2 = data1[data1$teamSize == 2,]

data3 = data2 %>% 
  left_join(data, by = c("year", "round", "race_name","team", "bestPosition" = "p_prelim" )) %>% 
  select(year, round, race_name, team, teammates, teamSize, combinedPosition, bestPosition,Wins, Podiums, driver, OneTwo, BothPodium)%>%
  arrange(year, round, race_name, team, teammates,driver)


names(data3)[11]="bestTeammate"
head(data3)

Show Duos by number of years/races together

duosYears = data3%>%
  group_by(team,teammates,year) %>%
  summarise(races = n() )
## `summarise()` regrouping output by 'team', 'teammates' (override with `.groups` argument)
head(duosYears)
duosLength = duosYears %>%
  group_by(team,teammates) %>%
  summarise(NumberofYears = n(),FirstYear = min(year), LastYear = max(year), NumberofRaces = sum(races))%>%
  arrange(desc(NumberofYears), desc(NumberofRaces))
## `summarise()` regrouping output by 'team' (override with `.groups` argument)
head(duosLength)

Show Duos by average combined finishes and by best finishes, both by each duo overall and by each victorious driver of each duo

duosBestFinishers = data3%>%
  group_by(team,teammates,bestTeammate)%>%
  summarise(AvgCombined = mean(combinedPosition), AvgBest = mean(bestPosition), Wins = sum(Wins), Podiums = sum(Podiums),OneTwos = sum(OneTwo), BothPodiums = sum(BothPodium), NumberofRacesasBest = n()) %>%
  arrange(desc(NumberofRacesasBest), AvgBest)
## `summarise()` regrouping output by 'team', 'teammates' (override with `.groups` argument)
duosFinishes = data3%>%
  group_by(team,teammates)%>%
  summarise(AvgCombined = mean(combinedPosition), AvgBest = mean(bestPosition), Wins = sum(Wins), Podiums = sum(Podiums), OneTwos = sum(OneTwo), BothPodiums = sum(BothPodium)) %>%
  arrange(AvgCombined, AvgBest)
## `summarise()` regrouping output by 'team' (override with `.groups` argument)
head(duosBestFinishers)
head(duosFinishes)

Show duos by average difference in finishing position, both by each duo overall and by each victorious driver of each duo

duosDifferencebyDriver = data3%>% 
  mutate(difference = combinedPosition - 2*bestPosition)%>%
  group_by(team,teammates,bestTeammate)%>%
  summarise(AvgDifference = mean(difference), Closest = min(difference), Farthest = max(difference)) %>%
  arrange(AvgDifference)
## `summarise()` regrouping output by 'team', 'teammates' (override with `.groups` argument)
duosDifference = data3%>%
  mutate(difference = combinedPosition - 2*bestPosition)%>%
  group_by(team,teammates)%>%
  summarise(AvgDifference = mean(difference), Closest = min(difference), Farthest = max(difference)) %>%
  arrange(AvgDifference)
## `summarise()` regrouping output by 'team' (override with `.groups` argument)
head(duosDifferencebyDriver)
head(duosDifference)

Look at duos based on number of races within a few positions of eachother

duosDifferenceDistribution = data3%>% 
  mutate(difference = combinedPosition - 2*bestPosition)%>%
group_by(team,teammates,difference)%>%
  tally()%>%
  arrange(desc(n))

head(duosDifferenceDistribution)
duosWithinOne = duosDifferenceDistribution[duosDifferenceDistribution$difference==1,] %>% arrange(desc(n)) %>% select(team, teammates, RacesWithinOne = n)
head(duosWithinOne)

Final 2 Data Sets

Combine Information on Duos overall and information on each best driver for the duo

Duos = duosLength %>%
  left_join(duosFinishes)%>%
  left_join(duosDifference) %>%
  left_join(duosWithinOne) %>%
  mutate(WithinOneRate = RacesWithinOne/NumberofRaces, WinRate = Wins/NumberofRaces, OneTwoRate = OneTwos/NumberofRaces, PodiumRate = BothPodiums/NumberofRaces) %>% 
  arrange(desc(Wins))
## Joining, by = c("team", "teammates")
## Joining, by = c("team", "teammates")
## Joining, by = c("team", "teammates")
head(Duos)
DuosbyDriver = duosBestFinishers %>%
  left_join(duosDifferencebyDriver)%>%
  mutate(PodiumsperRace = Podiums/NumberofRacesasBest, WinRate = Wins/NumberofRacesasBest,OneTwoRate = OneTwos/NumberofRacesasBest, PodiumRate = BothPodiums/NumberofRacesasBest)
## Joining, by = c("team", "teammates", "bestTeammate")
head(DuosbyDriver)

teams to analyze

TopDuos = Duos[c(1:3,6),]
TopDuos