table(toss_r$toss_decision)
##
## bat field
## 273 363
toss_r%>%filter(city=="Mumbai"|city=="Bangalore"|city=="Delhi"|city=="Chennai"|city=="Kolkata"|city=="Jaipur"|city=="Hyderabad"|city=="Chandigarh"|city=="Pune")%>%ggplot()+geom_bar(aes(x=toss_decision,fill=toss_winner))+facet_wrap(~city)
CSK loves to bat first on their home ground.
Teams chose to bowl first in Bangalore as its a high scoring ground with small boundaries makes chasing scores easy.
winner=toss_r%>%filter(toss_winner==winner)
match_winner_after_winning_toss=nrow(winner)
loser=toss_r%>%filter(toss_winner!=winner)
loosing_match_after_wining_toss=nrow(loser)
data.frame(match_winner_after_winning_toss,loosing_match_after_wining_toss)
## match_winner_after_winning_toss loosing_match_after_wining_toss
## 1 325 311
data.frame(batting_1st,batting_2nd)
## batting_1st batting_2nd
## 1 284 349
Maximum runs in a season:-
highest_runs=by_players%>%arrange(desc(batsman_run))
head(highest_runs)
## # A tibble: 6 x 5
## # Groups: season [3]
## season batsman batsman_run balls_played srr
## <int> <fct> <int> <int> <dbl>
## 1 2016 V Kohli 973 655 149.
## 2 2016 DA Warner 848 579 146.
## 3 2012 CH Gayle 733 472 155.
## 4 2013 MEK Hussey 733 580 126.
## 5 2013 CH Gayle 720 484 149.
## 6 2016 AB de Villiers 687 415 166.
head(max_scorer)
## # A tibble: 6 x 6
## season.x top season.y batsman balls_played srr
## <int> <int> <int> <fct> <int> <dbl>
## 1 2008 616 2008 SE Marsh 452 136.
## 2 2009 572 2009 ML Hayden 409 140.
## 3 2010 618 2010 SR Tendulkar 489 126.
## 4 2011 608 2011 CH Gayle 342 178.
## 5 2012 733 2012 CH Gayle 472 155.
## 6 2013 733 2013 MEK Hussey 580 126.
ggplot(max_scorer,aes(y=top,x=factor(season.x),color=batsman,size=srr))+geom_count()+geom_text(aes(label=batsman),size=5,nudge_y = 12)+
xlab("season")+ylab("Total Runs")+coord_cartesian()
ggplot(sttr[1:10,],aes(x=run,y=srrt,color=batsman,size=srrt))+geom_count()+
geom_text(aes(label=batsman),size=4,nudge_y = 0.5)
ggplot(sttr,aes(x=balls,y=srrt,size=srrt))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
highest_runs=str%>%arrange(desc(run))
ggplot(highest_runs[1:10,],aes(x=balls,y=run,color=batsman,size=srrt))+geom_count()+
geom_text(aes(label=batsman),size=5,nudge_y = 15)
ggplot(sttr,aes(x=balls,y=run,color=srrt))+geom_point()+geom_smooth()+geom_abline()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
by_match=ipl%>%group_by(season,match_id,inning,batsman)%>%summarise(bat_run=sum(batsman_runs),balls=n())
head(by_match)
## # A tibble: 6 x 6
## # Groups: season, match_id, inning [2]
## season match_id inning batsman bat_run balls
## <int> <int> <int> <fct> <int> <int>
## 1 2008 60 1 BB McCullum 158 77
## 2 2008 60 1 DJ Hussey 12 12
## 3 2008 60 1 Mohammad Hafeez 5 3
## 4 2008 60 1 RT Ponting 20 20
## 5 2008 60 1 SC Ganguly 10 12
## 6 2008 60 2 AA Noffke 9 12
by_match%>%filter(inning<=2)%>%ggplot(aes(x=balls,y=bat_run,color=factor(inning),size=bat_run))+geom_point(alpha=1/2)+geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Highest individual scores are mostly coming in first inning,because of low pressure in first innings and in second innings many times chaseable target is not enough for a big individual score.
In second innings players are starting with better strike rate.
For less than 50 balls 2nd innings players have higher scores,maybe these are finishers.
by_over=ipl%>%group_by(season,match_id,inning,bowler,over)%>%summarise(bat_run=sum(batsman_runs),balls=n(),runs=sum(total_runs))
head(by_over)
## # A tibble: 6 x 8
## # Groups: season, match_id, inning, bowler [3]
## season match_id inning bowler over bat_run balls runs
## <int> <int> <int> <fct> <int> <int> <int> <int>
## 1 2008 60 1 AA Noffke 4 17 7 23
## 2 2008 60 1 AA Noffke 7 7 6 7
## 3 2008 60 1 AA Noffke 16 4 6 4
## 4 2008 60 1 AA Noffke 18 7 6 7
## 5 2008 60 1 CL White 15 22 7 24
## 6 2008 60 1 JH Kallis 9 4 6 4
by_over%>%filter(inning<=2)%>%ggplot(aes(x=over,y=runs,color=factor(inning)))+geom_boxplot(aes(cut_width(over,1)))
1st innings batting team always start slower than 2nd team and scores less runs in powerplay.
Both teams score approx equal number of runs in middle overs.
1st innings batting team score more runs in death overs.
rohit_kohli=by_players%>%filter(batsman=="RG Sharma"|batsman=="V Kohli")
head(rohit_kohli)
## # A tibble: 6 x 5
## # Groups: season [3]
## season batsman batsman_run balls_played srr
## <int> <fct> <int> <int> <dbl>
## 1 2008 RG Sharma 404 276 146.
## 2 2008 V Kohli 165 168 98.2
## 3 2009 RG Sharma 362 323 112.
## 4 2009 V Kohli 246 225 109.
## 5 2010 RG Sharma 404 310 130.
## 6 2010 V Kohli 307 216 142.
ggplot(rohit_kohli,aes(y=batsman_run,x=season))+geom_point()+xlim(c("2008","2017"))+geom_path(aes(size=batsman_run,color=batsman))+
geom_text(aes(label=season),nudge_x = .2)
most_expensive=by_over%>%arrange(desc(runs))
most_expensive[1:15,]%>%ggplot(aes(x=over,y=runs,color=factor(season)))+geom_count()+geom_text(aes(label=bowler),nudge_y = 0.5)+
ylim(c(25,40))
economy_history=by_over%>%group_by(bowler)%>%summarise(overs_bowled=n(),balls_bowled=sum(balls),runs_conceded=sum(runs),eco=(runs_conceded/overs_bowled))
economy_overall=economy_history%>%filter(overs_bowled>=50)%>%arrange(eco)
head(economy_overall)
## # A tibble: 6 x 5
## bowler overs_bowled balls_bowled runs_conceded eco
## <fct> <int> <int> <int> <dbl>
## 1 SP Narine 323 1956 2085 6.46
## 2 R Ashwin 383 2359 2552 6.66
## 3 A Kumble 163 983 1089 6.68
## 4 GD McGrath 54 329 366 6.78
## 5 Rashid Khan 54 328 368 6.81
## 6 DL Vettori 131 785 894 6.82
ggplot(economy_overall[1:15,],aes(y=eco,x=factor(overs_bowled),color=eco))+geom_count()+geom_text(aes(label=bowler),nudge_y=0.02)+
xlab("Overs Bowled ")+ylab("Economy")