Loading Packages and reading data
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("/Users/vidyasagarbhargava/Desktop/kaggle2017/ipl")
df1<-read.csv("deliveries.csv", header = T)
df2<-read.csv("matches.csv", header = T)
df<-merge(df1, df2, by.x = "match_id", by.y = "id")
Best Bowler in last five overs economy wise?
df1 %>%
filter(over > 15) %>%
group_by(bowler) %>%
summarise(total_runs_given = sum(total_runs),
total_overs = round(length(ball)/6,1),
economy_rate = round(total_runs_given/total_overs,2)) %>%
filter(total_overs> 50) %>%
arrange((economy_rate)) %>%
top_n(n = 10, wt = -economy_rate)
## # A tibble: 10 × 4
## bowler total_runs_given total_overs economy_rate
## <fctr> <int> <dbl> <dbl>
## 1 SL Malinga 1172 162.3 7.22
## 2 SP Narine 718 98.3 7.30
## 3 R Ashwin 479 63.2 7.58
## 4 DW Steyn 806 104.0 7.75
## 5 RA Jadeja 521 62.0 8.40
## 6 PP Chawla 449 53.3 8.42
## 7 CH Morris 428 50.5 8.48
## 8 B Lee 465 54.7 8.50
## 9 B Kumar 802 93.5 8.58
## 10 DS Kulkarni 452 52.0 8.69
Most Wicket Taker in depth Overs with Economy Rate?
df1 %>%
filter(over > 15) %>%
group_by(bowler) %>%
summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
"lbw","stumped","caught and bowled",
"hit wicket")]),
total_runs_given = sum(total_runs),
total_overs = round(length(ball)/6,1),
economy_rate = round(total_runs_given/total_overs,2)) %>%
arrange(desc(total_wickets)) %>%
top_n(n = 10, wt = total_wickets) %>%
select(bowler,total_wickets, economy_rate)
## # A tibble: 10 × 3
## bowler total_wickets economy_rate
## <fctr> <int> <dbl>
## 1 SL Malinga 92 7.22
## 2 DJ Bravo 72 8.81
## 3 SP Narine 50 7.30
## 4 DW Steyn 49 7.75
## 5 A Nehra 48 8.83
## 6 B Kumar 45 8.58
## 7 R Vinay Kumar 45 10.13
## 8 RP Singh 45 9.03
## 9 UT Yadav 39 9.49
## 10 SR Watson 37 8.97
Maximum Wkt Taker with Economy in ALl season
df1 %>%
group_by(bowler) %>%
summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
"lbw","stumped","caught and bowled",
"hit wicket")]),
total_runs_given = sum(total_runs),
total_overs = round(length(ball)/6,1),
economy_rate = round(total_runs_given/total_overs,2)) %>%
arrange(desc(total_wickets)) %>%
top_n(n = 10, wt = total_wickets) %>%
select(bowler,total_wickets, economy_rate)
## # A tibble: 10 × 3
## bowler total_wickets economy_rate
## <fctr> <int> <dbl>
## 1 SL Malinga 143 6.60
## 2 A Mishra 124 7.20
## 3 DJ Bravo 122 8.00
## 4 PP Chawla 120 7.61
## 5 Harbhajan Singh 119 6.97
## 6 R Vinay Kumar 101 8.25
## 7 R Ashwin 100 6.49
## 8 A Nehra 98 7.62
## 9 DW Steyn 92 6.60
## 10 Z Khan 92 7.50
Malinga is Highest Wicket Taker.
df1 %>%
filter(bowler=="SL Malinga" & dismissal_kind %in% c("caught","bowled",
"lbw","stumped","caught and bowled",
"hit wicket")) %>%
group_by(dismissal_kind) %>%
summarise(total = n()) %>%
arrange(desc(total)) %>%
top_n(n= 10, wt = total)%>%
ggplot(aes(x = reorder(dismissal_kind, -total), y= total))+
geom_bar(aes(fill= dismissal_kind), stat = "identity")+
labs(list(title = "Lasith Malinga", x = "Dismissal Kind", y = "Total Wickets"))
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
bowl<-function(type){
x<-df1 %>%
filter(bowler==type & dismissal_kind %in% c("caught","bowled",
"lbw","stumped","caught and bowled",
"hit wicket")) %>%
group_by(dismissal_kind) %>%
summarise(total = n()) %>%
arrange(desc(total)) %>%
top_n(n= 10, wt = total)
p<- ggplot(aes(x = reorder(dismissal_kind, -total), y= total), data = x)+
geom_bar(aes(fill= dismissal_kind), stat = "identity")+
labs(list(title = type, x = "Dismissal Kind", y = "Total Wickets"))
return(p)
}
a1<-bowl("SL Malinga")
a2<-bowl("A Mishra")
a3<-bowl("DJ Bravo")
a4<-bowl("PP Chawla")
a5<-bowl("Harbhajan Singh")
a6<-bowl("R Vinay Kumar")
grid.arrange(a1,a2,a3,a4, nrow =2, ncol=2)
Virat Kohli Performances Overall IPL
df %>%
filter(batsman == "V Kohli") %>%
group_by(match_id) %>%
summarise(runs_scored = sum(batsman_runs))%>%
select(match_id,runs_scored) %>%
ggplot(aes(match_id,runs_scored))+
geom_line(stat = "identity")+
scale_y_continuous(breaks = seq(0,150,5))+
geom_smooth(span = 0.5)
## `geom_smooth()` using method = 'loess'
Virat Kohli Impact on RCB?
df %>%
filter(batsman == "V Kohli") %>%
group_by(match_id, winner) %>%
summarise(runs_scored = sum(batsman_runs))%>%
mutate(run_type = if_else(runs_scored>30,"30+","less than 30"),
result_type= if_else(winner == "Royal Challengers Bangalore", 1, 0)) %>%
select(match_id,runs_scored, winner,run_type, result_type) %>%
group_by(run_type) %>%
summarise(total_win = sum(result_type),
total_loss = (length(result_type)-sum(result_type)),
total_matches = length(result_type))
## # A tibble: 2 × 4
## run_type total_win total_loss total_matches
## <chr> <dbl> <dbl> <int>
## 1 30+ 28 30 58
## 2 less than 30 36 37 73
Analysis for RCB
df%>%
filter(team1 == "Royal Challengers Bangalore"| team2 == "Royal Challengers Bangalore") %>%
group_by(batsman) %>%
summarise(runs_scored = sum(batsman_runs)) %>%
arrange(desc(runs_scored)) %>%
top_n(n= 5, wt = runs_scored)
## # A tibble: 5 × 2
## batsman runs_scored
## <fctr> <int>
## 1 V Kohli 4115
## 2 CH Gayle 3049
## 3 AB de Villiers 2714
## 4 JH Kallis 1352
## 5 R Dravid 1075
df%>%
filter(team1 == "Royal Challengers Bangalore"| team2 == "Royal Challengers Bangalore") %>%
group_by(bowler) %>%
summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
"lbw","stumped","caught and bowled",
"hit wicket")])) %>%
arrange(desc(total_wickets)) %>%
top_n(n= 5, wt = total_wickets)
## # A tibble: 5 × 2
## bowler total_wickets
## <fctr> <int>
## 1 R Vinay Kumar 76
## 2 YS Chahal 56
## 3 Z Khan 56
## 4 A Kumble 45
## 5 S Aravind 40
common
library(gridExtra)
best<-function(type){
bats<-df%>%
filter(team1 == type | team2 == type) %>%
group_by(batsman) %>%
summarise(runs_scored = sum(batsman_runs)) %>%
arrange(desc(runs_scored)) %>%
top_n(n= 5, wt = runs_scored)
bowls<-df%>%
filter(team1 == type | team2 == type) %>%
group_by(bowler) %>%
summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
"lbw","stumped","caught and bowled",
"hit wicket")])) %>%
arrange(desc(total_wickets)) %>%
top_n(n= 5, wt = total_wickets)
a1<-ggplot(aes(x = reorder(batsman, -runs_scored),y = runs_scored), data = bats)+
geom_bar(stat = "identity", aes(fill = batsman))+
labs(list(title = type, x = "Batsman", y = "Runs Scored"))
a2<-ggplot(aes(x = reorder(bowler, -total_wickets),y = total_wickets), data = bowls)+
geom_bar(stat = "identity", aes(fill = bowler))+
labs(list(title = type, x = "Bowler", y = "Wickets Taken"))
#return(a1)
return(grid.arrange(a1,a2))
}
#best(Royal Challengers Bangalore")
best("Royal Challengers Bangalore")
best("Kolkata Knight Riders")