Shoaib Nadaf | Learner

The Indian Premier League is a professional Twenty20 cricket league in India contested during March or April and May of every year by eight teams representing eight different cities or states in India. The league was founded by the Board of Control for Cricket in India in 2008.

Load Library :

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.3
## v tibble  3.0.0     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(dplyr)

Importing Data :

matches_df <- read.csv("matches.csv",na.strings = TRUE,stringsAsFactors = FALSE)
delivery_df <- read.csv("deliveries.csv",na.strings = TRUE ,stringsAsFactors = FALSE)

Removing NA values from Data frame :

na.omit(matches_df)
head(matches_df)
head(delivery_df)

Data types

summary(matches_df)
##        id              season         city               date          
##  Min.   :    1.0   Min.   :2008   Length:756         Length:756        
##  1st Qu.:  189.8   1st Qu.:2011   Class :character   Class :character  
##  Median :  378.5   Median :2013   Mode  :character   Mode  :character  
##  Mean   : 1792.2   Mean   :2013                                        
##  3rd Qu.:  567.2   3rd Qu.:2016                                        
##  Max.   :11415.0   Max.   :2019                                        
##     team1              team2           toss_winner        toss_decision     
##  Length:756         Length:756         Length:756         Length:756        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     result            dl_applied         winner           win_by_runs    
##  Length:756         Min.   :0.00000   Length:756         Min.   :  0.00  
##  Class :character   1st Qu.:0.00000   Class :character   1st Qu.:  0.00  
##  Mode  :character   Median :0.00000   Mode  :character   Median :  0.00  
##                     Mean   :0.02513                      Mean   : 13.28  
##                     3rd Qu.:0.00000                      3rd Qu.: 19.00  
##                     Max.   :1.00000                      Max.   :146.00  
##  win_by_wickets   player_of_match       venue             umpire1         
##  Min.   : 0.000   Length:756         Length:756         Length:756        
##  1st Qu.: 0.000   Class :character   Class :character   Class :character  
##  Median : 4.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 3.351                                                           
##  3rd Qu.: 6.000                                                           
##  Max.   :10.000                                                           
##    umpire2            umpire3         
##  Length:756         Length:756        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
str(matches_df)
## 'data.frame':    756 obs. of  18 variables:
##  $ id             : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ season         : int  2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
##  $ city           : chr  "Hyderabad" "Pune" "Rajkot" "Indore" ...
##  $ date           : chr  "05/04/2017" "06/04/2017" "07/04/2017" "08/04/2017" ...
##  $ team1          : chr  "Sunrisers Hyderabad" "Mumbai Indians" "Gujarat Lions" "Rising Pune Supergiant" ...
##  $ team2          : chr  "Royal Challengers Bangalore" "Rising Pune Supergiant" "Kolkata Knight Riders" "Kings XI Punjab" ...
##  $ toss_winner    : chr  "Royal Challengers Bangalore" "Rising Pune Supergiant" "Kolkata Knight Riders" "Kings XI Punjab" ...
##  $ toss_decision  : chr  "field" "field" "field" "field" ...
##  $ result         : chr  "normal" "normal" "normal" "normal" ...
##  $ dl_applied     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ winner         : chr  "Sunrisers Hyderabad" "Rising Pune Supergiant" "Kolkata Knight Riders" "Kings XI Punjab" ...
##  $ win_by_runs    : int  35 0 0 0 15 0 0 0 97 0 ...
##  $ win_by_wickets : int  0 7 10 6 0 9 4 8 0 4 ...
##  $ player_of_match: chr  "Yuvraj Singh" "SPD Smith" "CA Lynn" "GJ Maxwell" ...
##  $ venue          : chr  "Rajiv Gandhi International Stadium, Uppal" "Maharashtra Cricket Association Stadium" "Saurashtra Cricket Association Stadium" "Holkar Cricket Stadium" ...
##  $ umpire1        : chr  "AY Dandekar" "A Nand Kishore" "Nitin Menon" "AK Chaudhary" ...
##  $ umpire2        : chr  "NJ Llong" "S Ravi" "CK Nandan" "C Shamshuddin" ...
##  $ umpire3        : chr  "" "" "" "" ...
summary(delivery_df)
##     match_id         inning      batting_team       bowling_team      
##  Min.   :    1   Min.   :1.000   Length:179078      Length:179078     
##  1st Qu.:  190   1st Qu.:1.000   Class :character   Class :character  
##  Median :  379   Median :1.000   Mode  :character   Mode  :character  
##  Mean   : 1802   Mean   :1.483                                        
##  3rd Qu.:  567   3rd Qu.:2.000                                        
##  Max.   :11415   Max.   :5.000                                        
##       over            ball         batsman          non_striker       
##  Min.   : 1.00   Min.   :1.000   Length:179078      Length:179078     
##  1st Qu.: 5.00   1st Qu.:2.000   Class :character   Class :character  
##  Median :10.00   Median :4.000   Mode  :character   Mode  :character  
##  Mean   :10.16   Mean   :3.616                                        
##  3rd Qu.:15.00   3rd Qu.:5.000                                        
##  Max.   :20.00   Max.   :9.000                                        
##     bowler          is_super_over         wide_runs          bye_runs       
##  Length:179078      Min.   :0.0000000   Min.   :0.00000   Min.   :0.000000  
##  Class :character   1st Qu.:0.0000000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Mode  :character   Median :0.0000000   Median :0.00000   Median :0.000000  
##                     Mean   :0.0004523   Mean   :0.03672   Mean   :0.004936  
##                     3rd Qu.:0.0000000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##                     Max.   :1.0000000   Max.   :5.00000   Max.   :4.000000  
##   legbye_runs       noball_runs        penalty_runs      batsman_runs  
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.0e+00   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.0e+00   1st Qu.:0.000  
##  Median :0.00000   Median :0.000000   Median :0.0e+00   Median :1.000  
##  Mean   :0.02114   Mean   :0.004183   Mean   :5.6e-05   Mean   :1.247  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.0e+00   3rd Qu.:1.000  
##  Max.   :5.00000   Max.   :5.000000   Max.   :5.0e+00   Max.   :7.000  
##    extra_runs        total_runs     player_dismissed   dismissal_kind    
##  Min.   :0.00000   Min.   : 0.000   Length:179078      Length:179078     
##  1st Qu.:0.00000   1st Qu.: 0.000   Class :character   Class :character  
##  Median :0.00000   Median : 1.000   Mode  :character   Mode  :character  
##  Mean   :0.06703   Mean   : 1.314                                        
##  3rd Qu.:0.00000   3rd Qu.: 1.000                                        
##  Max.   :7.00000   Max.   :10.000                                        
##    fielder         
##  Length:179078     
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
str(delivery_df)
## 'data.frame':    179078 obs. of  21 variables:
##  $ match_id        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ inning          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ batting_team    : chr  "Sunrisers Hyderabad" "Sunrisers Hyderabad" "Sunrisers Hyderabad" "Sunrisers Hyderabad" ...
##  $ bowling_team    : chr  "Royal Challengers Bangalore" "Royal Challengers Bangalore" "Royal Challengers Bangalore" "Royal Challengers Bangalore" ...
##  $ over            : int  1 1 1 1 1 1 1 2 2 2 ...
##  $ ball            : int  1 2 3 4 5 6 7 1 2 3 ...
##  $ batsman         : chr  "DA Warner" "DA Warner" "DA Warner" "DA Warner" ...
##  $ non_striker     : chr  "S Dhawan" "S Dhawan" "S Dhawan" "S Dhawan" ...
##  $ bowler          : chr  "TS Mills" "TS Mills" "TS Mills" "TS Mills" ...
##  $ is_super_over   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ wide_runs       : int  0 0 0 0 2 0 0 0 0 0 ...
##  $ bye_runs        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ legbye_runs     : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ noball_runs     : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ penalty_runs    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ batsman_runs    : int  0 0 4 0 0 0 0 1 4 0 ...
##  $ extra_runs      : int  0 0 0 0 2 0 1 0 0 1 ...
##  $ total_runs      : int  0 0 4 0 2 0 1 1 4 1 ...
##  $ player_dismissed: chr  "" "" "" "" ...
##  $ dismissal_kind  : chr  "" "" "" "" ...
##  $ fielder         : chr  "" "" "" "" ...

Total no of matches played so far

count(matches_df)
matches_df %>% summarise(total_matches_played_in_IPL_till_2019 = n())

Which team has won more no of matches

matches_df %>%  group_by(winner) %>%  summarise(No_Of_wins = n())

Max no of winner since 2008 to 2019

matches_df %>%  group_by(winner) %>%  summarise(No_Of_wins = n()) %>% filter(No_Of_wins == max(No_Of_wins))

Show the matches played in 2018 season

matches_2018 <- matches_df %>% group_by(winner) %>% filter(season ==2018)  %>% summarise(No_of_wins = n()) 
matches_2018

Max matches won in season 2019 ?

matches_2019 <- matches_df %>% group_by(winner) %>% filter(season == 2019 ) %>% summarise(No_of_wins =n())
matches_2019 %>% filter(No_of_wins == max(No_of_wins))

Max and Min matches won in season 2018 ?

matches_2018 <- matches_df %>% group_by(winner) %>% filter(season == 2018 ) %>% summarise(No_of_wins_2018 =n())
matches_2018 %>% filter(No_of_wins_2018 == max(No_of_wins_2018))
matches_2018 %>% filter(No_of_wins_2018 == min(No_of_wins_2018))

Which teams have won a match by 10 wickets the most number of times in IPL ?

#winner and wickets = 10

winbywickets_10 <- matches_df %>% select(team1,team2,winner,win_by_wickets ) %>% filter(win_by_wickets == 10 ) 
winbywickets_10
count(winbywickets_10, group_by = winbywickets_10$winner)

Total Wins ploting Graph

matches_df %>% group_by(winner) %>% summarise(wins = n()) %>% ggplot(aes(winner, wins, fill = winner)) + geom_bar(stat = "identity") + coord_flip() + scale_y_continuous("Total matches won")

Who has got maximum and minimum No of Man of the Match award ?

matches_df %>% group_by(player_of_match) %>% summarise(No_of_Man_of_the_match = n()) %>% filter(No_of_Man_of_the_match == max(No_of_Man_of_the_match))
matches_df %>% group_by(player_of_match) %>% summarise(No_of_Man_of_the_match = n()) %>% filter(No_of_Man_of_the_match == min(No_of_Man_of_the_match))

Plot

matches_df %>% group_by(player_of_match) %>% summarise(No_of_Man_of_the_match = n()) %>% top_n(10) %>% ggplot(aes(x = reorder(player_of_match,No_of_Man_of_the_match),y = No_of_Man_of_the_match)) + geom_bar(stat = "identity",fill = "red" )+coord_flip() + scale_x_discrete("players") + scale_y_continuous("No of times Man Of the Match")
## Selecting by No_of_Man_of_the_match

Most successful teams Season-wise

s_match <- matches_df %>% group_by(season,winner) %>% summarise(No_win=n()) %>% filter(No_win == max(No_win))
s_match<- s_match[order(s_match$season),]
s_match