Shoaib Nadaf | Learner
The Indian Premier League is a professional Twenty20 cricket league in India contested during March or April and May of every year by eight teams representing eight different cities or states in India. The league was founded by the Board of Control for Cricket in India in 2008.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.3
## v tibble 3.0.0 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
matches_df <- read.csv("matches.csv",na.strings = TRUE,stringsAsFactors = FALSE)
delivery_df <- read.csv("deliveries.csv",na.strings = TRUE ,stringsAsFactors = FALSE)
na.omit(matches_df)
head(matches_df)
head(delivery_df)
summary(matches_df)
## id season city date
## Min. : 1.0 Min. :2008 Length:756 Length:756
## 1st Qu.: 189.8 1st Qu.:2011 Class :character Class :character
## Median : 378.5 Median :2013 Mode :character Mode :character
## Mean : 1792.2 Mean :2013
## 3rd Qu.: 567.2 3rd Qu.:2016
## Max. :11415.0 Max. :2019
## team1 team2 toss_winner toss_decision
## Length:756 Length:756 Length:756 Length:756
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## result dl_applied winner win_by_runs
## Length:756 Min. :0.00000 Length:756 Min. : 0.00
## Class :character 1st Qu.:0.00000 Class :character 1st Qu.: 0.00
## Mode :character Median :0.00000 Mode :character Median : 0.00
## Mean :0.02513 Mean : 13.28
## 3rd Qu.:0.00000 3rd Qu.: 19.00
## Max. :1.00000 Max. :146.00
## win_by_wickets player_of_match venue umpire1
## Min. : 0.000 Length:756 Length:756 Length:756
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 4.000 Mode :character Mode :character Mode :character
## Mean : 3.351
## 3rd Qu.: 6.000
## Max. :10.000
## umpire2 umpire3
## Length:756 Length:756
## Class :character Class :character
## Mode :character Mode :character
##
##
##
str(matches_df)
## 'data.frame': 756 obs. of 18 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ season : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ city : chr "Hyderabad" "Pune" "Rajkot" "Indore" ...
## $ date : chr "05/04/2017" "06/04/2017" "07/04/2017" "08/04/2017" ...
## $ team1 : chr "Sunrisers Hyderabad" "Mumbai Indians" "Gujarat Lions" "Rising Pune Supergiant" ...
## $ team2 : chr "Royal Challengers Bangalore" "Rising Pune Supergiant" "Kolkata Knight Riders" "Kings XI Punjab" ...
## $ toss_winner : chr "Royal Challengers Bangalore" "Rising Pune Supergiant" "Kolkata Knight Riders" "Kings XI Punjab" ...
## $ toss_decision : chr "field" "field" "field" "field" ...
## $ result : chr "normal" "normal" "normal" "normal" ...
## $ dl_applied : int 0 0 0 0 0 0 0 0 0 0 ...
## $ winner : chr "Sunrisers Hyderabad" "Rising Pune Supergiant" "Kolkata Knight Riders" "Kings XI Punjab" ...
## $ win_by_runs : int 35 0 0 0 15 0 0 0 97 0 ...
## $ win_by_wickets : int 0 7 10 6 0 9 4 8 0 4 ...
## $ player_of_match: chr "Yuvraj Singh" "SPD Smith" "CA Lynn" "GJ Maxwell" ...
## $ venue : chr "Rajiv Gandhi International Stadium, Uppal" "Maharashtra Cricket Association Stadium" "Saurashtra Cricket Association Stadium" "Holkar Cricket Stadium" ...
## $ umpire1 : chr "AY Dandekar" "A Nand Kishore" "Nitin Menon" "AK Chaudhary" ...
## $ umpire2 : chr "NJ Llong" "S Ravi" "CK Nandan" "C Shamshuddin" ...
## $ umpire3 : chr "" "" "" "" ...
summary(delivery_df)
## match_id inning batting_team bowling_team
## Min. : 1 Min. :1.000 Length:179078 Length:179078
## 1st Qu.: 190 1st Qu.:1.000 Class :character Class :character
## Median : 379 Median :1.000 Mode :character Mode :character
## Mean : 1802 Mean :1.483
## 3rd Qu.: 567 3rd Qu.:2.000
## Max. :11415 Max. :5.000
## over ball batsman non_striker
## Min. : 1.00 Min. :1.000 Length:179078 Length:179078
## 1st Qu.: 5.00 1st Qu.:2.000 Class :character Class :character
## Median :10.00 Median :4.000 Mode :character Mode :character
## Mean :10.16 Mean :3.616
## 3rd Qu.:15.00 3rd Qu.:5.000
## Max. :20.00 Max. :9.000
## bowler is_super_over wide_runs bye_runs
## Length:179078 Min. :0.0000000 Min. :0.00000 Min. :0.000000
## Class :character 1st Qu.:0.0000000 1st Qu.:0.00000 1st Qu.:0.000000
## Mode :character Median :0.0000000 Median :0.00000 Median :0.000000
## Mean :0.0004523 Mean :0.03672 Mean :0.004936
## 3rd Qu.:0.0000000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000000 Max. :5.00000 Max. :4.000000
## legbye_runs noball_runs penalty_runs batsman_runs
## Min. :0.00000 Min. :0.000000 Min. :0.0e+00 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.0e+00 1st Qu.:0.000
## Median :0.00000 Median :0.000000 Median :0.0e+00 Median :1.000
## Mean :0.02114 Mean :0.004183 Mean :5.6e-05 Mean :1.247
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.0e+00 3rd Qu.:1.000
## Max. :5.00000 Max. :5.000000 Max. :5.0e+00 Max. :7.000
## extra_runs total_runs player_dismissed dismissal_kind
## Min. :0.00000 Min. : 0.000 Length:179078 Length:179078
## 1st Qu.:0.00000 1st Qu.: 0.000 Class :character Class :character
## Median :0.00000 Median : 1.000 Mode :character Mode :character
## Mean :0.06703 Mean : 1.314
## 3rd Qu.:0.00000 3rd Qu.: 1.000
## Max. :7.00000 Max. :10.000
## fielder
## Length:179078
## Class :character
## Mode :character
##
##
##
str(delivery_df)
## 'data.frame': 179078 obs. of 21 variables:
## $ match_id : int 1 1 1 1 1 1 1 1 1 1 ...
## $ inning : int 1 1 1 1 1 1 1 1 1 1 ...
## $ batting_team : chr "Sunrisers Hyderabad" "Sunrisers Hyderabad" "Sunrisers Hyderabad" "Sunrisers Hyderabad" ...
## $ bowling_team : chr "Royal Challengers Bangalore" "Royal Challengers Bangalore" "Royal Challengers Bangalore" "Royal Challengers Bangalore" ...
## $ over : int 1 1 1 1 1 1 1 2 2 2 ...
## $ ball : int 1 2 3 4 5 6 7 1 2 3 ...
## $ batsman : chr "DA Warner" "DA Warner" "DA Warner" "DA Warner" ...
## $ non_striker : chr "S Dhawan" "S Dhawan" "S Dhawan" "S Dhawan" ...
## $ bowler : chr "TS Mills" "TS Mills" "TS Mills" "TS Mills" ...
## $ is_super_over : int 0 0 0 0 0 0 0 0 0 0 ...
## $ wide_runs : int 0 0 0 0 2 0 0 0 0 0 ...
## $ bye_runs : int 0 0 0 0 0 0 0 0 0 0 ...
## $ legbye_runs : int 0 0 0 0 0 0 1 0 0 0 ...
## $ noball_runs : int 0 0 0 0 0 0 0 0 0 1 ...
## $ penalty_runs : int 0 0 0 0 0 0 0 0 0 0 ...
## $ batsman_runs : int 0 0 4 0 0 0 0 1 4 0 ...
## $ extra_runs : int 0 0 0 0 2 0 1 0 0 1 ...
## $ total_runs : int 0 0 4 0 2 0 1 1 4 1 ...
## $ player_dismissed: chr "" "" "" "" ...
## $ dismissal_kind : chr "" "" "" "" ...
## $ fielder : chr "" "" "" "" ...
Total no of matches played so far
count(matches_df)
matches_df %>% summarise(total_matches_played_in_IPL_till_2019 = n())
Which team has won more no of matches
matches_df %>% group_by(winner) %>% summarise(No_Of_wins = n())
Max no of winner since 2008 to 2019
matches_df %>% group_by(winner) %>% summarise(No_Of_wins = n()) %>% filter(No_Of_wins == max(No_Of_wins))
Show the matches played in 2018 season
matches_2018 <- matches_df %>% group_by(winner) %>% filter(season ==2018) %>% summarise(No_of_wins = n())
matches_2018
Max matches won in season 2019 ?
matches_2019 <- matches_df %>% group_by(winner) %>% filter(season == 2019 ) %>% summarise(No_of_wins =n())
matches_2019 %>% filter(No_of_wins == max(No_of_wins))
Max and Min matches won in season 2018 ?
matches_2018 <- matches_df %>% group_by(winner) %>% filter(season == 2018 ) %>% summarise(No_of_wins_2018 =n())
matches_2018 %>% filter(No_of_wins_2018 == max(No_of_wins_2018))
matches_2018 %>% filter(No_of_wins_2018 == min(No_of_wins_2018))
Which teams have won a match by 10 wickets the most number of times in IPL ?
#winner and wickets = 10
winbywickets_10 <- matches_df %>% select(team1,team2,winner,win_by_wickets ) %>% filter(win_by_wickets == 10 )
winbywickets_10
count(winbywickets_10, group_by = winbywickets_10$winner)
Total Wins ploting Graph
matches_df %>% group_by(winner) %>% summarise(wins = n()) %>% ggplot(aes(winner, wins, fill = winner)) + geom_bar(stat = "identity") + coord_flip() + scale_y_continuous("Total matches won")
Who has got maximum and minimum No of Man of the Match award ?
matches_df %>% group_by(player_of_match) %>% summarise(No_of_Man_of_the_match = n()) %>% filter(No_of_Man_of_the_match == max(No_of_Man_of_the_match))
matches_df %>% group_by(player_of_match) %>% summarise(No_of_Man_of_the_match = n()) %>% filter(No_of_Man_of_the_match == min(No_of_Man_of_the_match))
Plot
matches_df %>% group_by(player_of_match) %>% summarise(No_of_Man_of_the_match = n()) %>% top_n(10) %>% ggplot(aes(x = reorder(player_of_match,No_of_Man_of_the_match),y = No_of_Man_of_the_match)) + geom_bar(stat = "identity",fill = "red" )+coord_flip() + scale_x_discrete("players") + scale_y_continuous("No of times Man Of the Match")
## Selecting by No_of_Man_of_the_match
Most successful teams Season-wise
s_match <- matches_df %>% group_by(season,winner) %>% summarise(No_win=n()) %>% filter(No_win == max(No_win))
s_match<- s_match[order(s_match$season),]
s_match