Loading Packages and reading data

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("/Users/vidyasagarbhargava/Desktop/kaggle2017/ipl")
df1<-read.csv("deliveries.csv", header = T)
df2<-read.csv("matches.csv", header = T)
df<-merge(df1, df2, by.x = "match_id", by.y = "id")

Best Bowler in last five overs economy wise?

df1 %>% 
  filter(over > 15) %>% 
  group_by(bowler) %>% 
  summarise(total_runs_given = sum(total_runs),
            total_overs = round(length(ball)/6,1),
            economy_rate = round(total_runs_given/total_overs,2)) %>% 
  filter(total_overs> 50) %>% 
  arrange((economy_rate)) %>% 
  top_n(n = 10, wt =  -economy_rate)
## # A tibble: 10 × 4
##         bowler total_runs_given total_overs economy_rate
##         <fctr>            <int>       <dbl>        <dbl>
## 1   SL Malinga             1172       162.3         7.22
## 2    SP Narine              718        98.3         7.30
## 3     R Ashwin              479        63.2         7.58
## 4     DW Steyn              806       104.0         7.75
## 5    RA Jadeja              521        62.0         8.40
## 6    PP Chawla              449        53.3         8.42
## 7    CH Morris              428        50.5         8.48
## 8        B Lee              465        54.7         8.50
## 9      B Kumar              802        93.5         8.58
## 10 DS Kulkarni              452        52.0         8.69

Most Wicket Taker in depth Overs with Economy Rate?

df1 %>% 
  filter(over > 15) %>% 
  group_by(bowler) %>% 
  summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
                                                                       "lbw","stumped","caught and bowled",
                                                                       "hit wicket")]),
            total_runs_given = sum(total_runs),
            total_overs = round(length(ball)/6,1),
            economy_rate = round(total_runs_given/total_overs,2)) %>% 
  arrange(desc(total_wickets)) %>% 
  top_n(n = 10, wt =  total_wickets) %>% 
  select(bowler,total_wickets, economy_rate)
## # A tibble: 10 × 3
##           bowler total_wickets economy_rate
##           <fctr>         <int>        <dbl>
## 1     SL Malinga            92         7.22
## 2       DJ Bravo            72         8.81
## 3      SP Narine            50         7.30
## 4       DW Steyn            49         7.75
## 5        A Nehra            48         8.83
## 6        B Kumar            45         8.58
## 7  R Vinay Kumar            45        10.13
## 8       RP Singh            45         9.03
## 9       UT Yadav            39         9.49
## 10     SR Watson            37         8.97

Maximum Wkt Taker with Economy in ALl season

df1 %>% 
  group_by(bowler) %>% 
  summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
                                                                       "lbw","stumped","caught and bowled",
                                                                       "hit wicket")]),
            total_runs_given = sum(total_runs),
            total_overs = round(length(ball)/6,1),
            economy_rate = round(total_runs_given/total_overs,2)) %>% 
  arrange(desc(total_wickets)) %>% 
  top_n(n = 10, wt =  total_wickets) %>% 
  select(bowler,total_wickets, economy_rate)
## # A tibble: 10 × 3
##             bowler total_wickets economy_rate
##             <fctr>         <int>        <dbl>
## 1       SL Malinga           143         6.60
## 2         A Mishra           124         7.20
## 3         DJ Bravo           122         8.00
## 4        PP Chawla           120         7.61
## 5  Harbhajan Singh           119         6.97
## 6    R Vinay Kumar           101         8.25
## 7         R Ashwin           100         6.49
## 8          A Nehra            98         7.62
## 9         DW Steyn            92         6.60
## 10          Z Khan            92         7.50

Malinga is Highest Wicket Taker.

df1 %>% 
  filter(bowler=="SL Malinga" & dismissal_kind %in% c("caught","bowled",
                                                                       "lbw","stumped","caught and bowled",
                                                                       "hit wicket")) %>% 
  group_by(dismissal_kind) %>% 
  summarise(total = n()) %>% 
  arrange(desc(total)) %>% 
  top_n(n= 10, wt = total)%>% 
 ggplot(aes(x = reorder(dismissal_kind, -total), y= total))+
  geom_bar(aes(fill= dismissal_kind), stat = "identity")+
  labs(list(title = "Lasith Malinga", x = "Dismissal Kind", y = "Total Wickets"))

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
bowl<-function(type){
x<-df1 %>% 
  filter(bowler==type & dismissal_kind %in% c("caught","bowled",
                                                                       "lbw","stumped","caught and bowled",
                                                                       "hit wicket")) %>% 
  group_by(dismissal_kind) %>% 
  summarise(total = n()) %>% 
  arrange(desc(total)) %>% 
  top_n(n= 10, wt = total)
p<- ggplot(aes(x = reorder(dismissal_kind, -total), y= total), data = x)+
  geom_bar(aes(fill= dismissal_kind), stat = "identity")+
  labs(list(title = type, x = "Dismissal Kind", y = "Total Wickets"))
  return(p)
}
a1<-bowl("SL Malinga")
a2<-bowl("A Mishra")
a3<-bowl("DJ Bravo")
a4<-bowl("PP Chawla")
a5<-bowl("Harbhajan Singh")
a6<-bowl("R Vinay Kumar")

grid.arrange(a1,a2,a3,a4, nrow =2, ncol=2)

Virat Kohli Performances Overall IPL

df %>% 
  filter(batsman == "V Kohli") %>% 
  group_by(match_id) %>% 
  summarise(runs_scored = sum(batsman_runs))%>% 
  select(match_id,runs_scored) %>% 
  ggplot(aes(match_id,runs_scored))+
  geom_line(stat = "identity")+
  scale_y_continuous(breaks = seq(0,150,5))+
  geom_smooth(span = 0.5)
## `geom_smooth()` using method = 'loess'

Virat Kohli Impact on RCB?

df %>% 
  filter(batsman == "V Kohli") %>% 
  group_by(match_id, winner) %>% 
  summarise(runs_scored = sum(batsman_runs))%>%
  mutate(run_type = if_else(runs_scored>30,"30+","less than 30"),
         result_type= if_else(winner == "Royal Challengers Bangalore", 1, 0)) %>% 
  select(match_id,runs_scored, winner,run_type, result_type) %>% 
  group_by(run_type) %>% 
  summarise(total_win = sum(result_type),
            total_loss = (length(result_type)-sum(result_type)),
            total_matches = length(result_type))
## # A tibble: 2 × 4
##       run_type total_win total_loss total_matches
##          <chr>     <dbl>      <dbl>         <int>
## 1          30+        28         30            58
## 2 less than 30        36         37            73

Analysis for RCB

df%>% 
  filter(team1 == "Royal Challengers Bangalore"| team2 == "Royal Challengers Bangalore") %>% 
  group_by(batsman) %>% 
  summarise(runs_scored = sum(batsman_runs)) %>% 
  arrange(desc(runs_scored)) %>% 
  top_n(n= 5, wt = runs_scored)
## # A tibble: 5 × 2
##          batsman runs_scored
##           <fctr>       <int>
## 1        V Kohli        4115
## 2       CH Gayle        3049
## 3 AB de Villiers        2714
## 4      JH Kallis        1352
## 5       R Dravid        1075
df%>% 
  filter(team1 == "Royal Challengers Bangalore"| team2 == "Royal Challengers Bangalore") %>% 
  group_by(bowler) %>% 
  summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
                                                                       "lbw","stumped","caught and bowled",
                                                                       "hit wicket")])) %>% 
  arrange(desc(total_wickets)) %>% 
  top_n(n= 5, wt = total_wickets)
## # A tibble: 5 × 2
##          bowler total_wickets
##          <fctr>         <int>
## 1 R Vinay Kumar            76
## 2     YS Chahal            56
## 3        Z Khan            56
## 4      A Kumble            45
## 5     S Aravind            40

common

library(gridExtra)
best<-function(type){
bats<-df%>% 
  filter(team1 == type | team2 == type) %>% 
  group_by(batsman) %>% 
  summarise(runs_scored = sum(batsman_runs)) %>% 
  arrange(desc(runs_scored)) %>% 
  top_n(n= 5, wt = runs_scored)

bowls<-df%>% 
  filter(team1 == type | team2 == type) %>% 
  group_by(bowler) %>% 
  summarise(total_wickets = length(dismissal_kind[dismissal_kind%in% c("caught","bowled",
                                                                       "lbw","stumped","caught and bowled",
                                                                       "hit wicket")])) %>% 
  arrange(desc(total_wickets)) %>% 
  top_n(n= 5, wt = total_wickets)

a1<-ggplot(aes(x = reorder(batsman, -runs_scored),y = runs_scored), data =  bats)+
  geom_bar(stat  = "identity", aes(fill = batsman))+
  labs(list(title = type, x = "Batsman", y = "Runs Scored"))

a2<-ggplot(aes(x = reorder(bowler, -total_wickets),y = total_wickets), data = bowls)+
  geom_bar(stat = "identity", aes(fill = bowler))+
  labs(list(title = type, x = "Bowler", y = "Wickets Taken"))
#return(a1)
return(grid.arrange(a1,a2))

}
#best(Royal Challengers Bangalore")
best("Royal Challengers Bangalore")

best("Kolkata Knight Riders")