This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
ipl_matches <- read.csv("IPL Matches 2008-2020.csv")
str(ipl_matches)
## 'data.frame': 816 obs. of 18 variables:
## $ id : int 335982 335983 335984 335985 335986 335987 335988 335989 335990 335991 ...
## $ city : chr "Bangalore" "Chandigarh" "Delhi" "Mumbai" ...
## $ date : chr "18-04-2008" "19-04-2008" "19-04-2008" "20-04-2008" ...
## $ player_of_match: chr "BB McCullum" "MEK Hussey" "MF Maharoof" "MV Boucher" ...
## $ venue : chr "M Chinnaswamy Stadium" "Punjab Cricket Association Stadium, Mohali" "Feroz Shah Kotla" "Wankhede Stadium" ...
## $ neutral_venue : int 0 0 0 0 0 0 0 0 0 0 ...
## $ team1 : chr "Royal Challengers Bangalore" "Kings XI Punjab" "Delhi Daredevils" "Mumbai Indians" ...
## $ team2 : chr "Kolkata Knight Riders" "Chennai Super Kings" "Rajasthan Royals" "Royal Challengers Bangalore" ...
## $ toss_winner : chr "Royal Challengers Bangalore" "Chennai Super Kings" "Rajasthan Royals" "Mumbai Indians" ...
## $ toss_decision : chr "field" "bat" "bat" "bat" ...
## $ winner : chr "Kolkata Knight Riders" "Chennai Super Kings" "Delhi Daredevils" "Royal Challengers Bangalore" ...
## $ result : chr "runs" "runs" "wickets" "wickets" ...
## $ result_margin : int 140 33 9 5 5 6 9 6 3 66 ...
## $ eliminator : chr "N" "N" "N" "N" ...
## $ method : chr NA NA NA NA ...
## $ umpire1 : chr "Asad Rauf" "MR Benson" "Aleem Dar" "SJ Davis" ...
## $ umpire2 : chr "RE Koertzen" "SL Shastri" "GA Pratapkumar" "DJ Harper" ...
## $ loser : chr "Royal Challengers Bangalore" "Kings XI Punjab" "Rajasthan Royals" "Mumbai Indians" ...
head(ipl_matches,10)
## id city date player_of_match
## 1 335982 Bangalore 18-04-2008 BB McCullum
## 2 335983 Chandigarh 19-04-2008 MEK Hussey
## 3 335984 Delhi 19-04-2008 MF Maharoof
## 4 335985 Mumbai 20-04-2008 MV Boucher
## 5 335986 Kolkata 20-04-2008 DJ Hussey
## 6 335987 Jaipur 21-04-2008 SR Watson
## 7 335988 Hyderabad 22-04-2008 V Sehwag
## 8 335989 Chennai 23-04-2008 ML Hayden
## 9 335990 Hyderabad 24-04-2008 YK Pathan
## 10 335991 Chandigarh 25-04-2008 KC Sangakkara
## venue neutral_venue
## 1 M Chinnaswamy Stadium 0
## 2 Punjab Cricket Association Stadium, Mohali 0
## 3 Feroz Shah Kotla 0
## 4 Wankhede Stadium 0
## 5 Eden Gardens 0
## 6 Sawai Mansingh Stadium 0
## 7 Rajiv Gandhi International Stadium, Uppal 0
## 8 MA Chidambaram Stadium, Chepauk 0
## 9 Rajiv Gandhi International Stadium, Uppal 0
## 10 Punjab Cricket Association Stadium, Mohali 0
## team1 team2
## 1 Royal Challengers Bangalore Kolkata Knight Riders
## 2 Kings XI Punjab Chennai Super Kings
## 3 Delhi Daredevils Rajasthan Royals
## 4 Mumbai Indians Royal Challengers Bangalore
## 5 Kolkata Knight Riders Deccan Chargers
## 6 Rajasthan Royals Kings XI Punjab
## 7 Deccan Chargers Delhi Daredevils
## 8 Chennai Super Kings Mumbai Indians
## 9 Deccan Chargers Rajasthan Royals
## 10 Kings XI Punjab Mumbai Indians
## toss_winner toss_decision winner
## 1 Royal Challengers Bangalore field Kolkata Knight Riders
## 2 Chennai Super Kings bat Chennai Super Kings
## 3 Rajasthan Royals bat Delhi Daredevils
## 4 Mumbai Indians bat Royal Challengers Bangalore
## 5 Deccan Chargers bat Kolkata Knight Riders
## 6 Kings XI Punjab bat Rajasthan Royals
## 7 Deccan Chargers bat Delhi Daredevils
## 8 Mumbai Indians field Chennai Super Kings
## 9 Rajasthan Royals field Rajasthan Royals
## 10 Mumbai Indians field Kings XI Punjab
## result result_margin eliminator method umpire1 umpire2
## 1 runs 140 N <NA> Asad Rauf RE Koertzen
## 2 runs 33 N <NA> MR Benson SL Shastri
## 3 wickets 9 N <NA> Aleem Dar GA Pratapkumar
## 4 wickets 5 N <NA> SJ Davis DJ Harper
## 5 wickets 5 N <NA> BF Bowden K Hariharan
## 6 wickets 6 N <NA> Aleem Dar RB Tiffin
## 7 wickets 9 N <NA> IL Howell AM Saheba
## 8 runs 6 N <NA> DJ Harper GA Pratapkumar
## 9 wickets 3 N <NA> Asad Rauf MR Benson
## 10 runs 66 N <NA> Aleem Dar AM Saheba
## loser
## 1 Royal Challengers Bangalore
## 2 Kings XI Punjab
## 3 Rajasthan Royals
## 4 Mumbai Indians
## 5 Deccan Chargers
## 6 Kings XI Punjab
## 7 Deccan Chargers
## 8 Mumbai Indians
## 9 Deccan Chargers
## 10 Mumbai Indians
tail(ipl_matches,10)
## id city date player_of_match
## 807 1216542 Dubai 08-10-2020 JM Bairstow
## 808 1216543 Dubai 14-10-2020 A Nortje
## 809 1216544 Dubai 25-10-2020 RD Gaikwad
## 810 1216545 Abu Dhabi 26-09-2020 Shubman Gill
## 811 1216546 Dubai 20-10-2020 S Dhawan
## 812 1216547 Dubai 28-09-2020 AB de Villiers
## 813 1237177 Dubai 05-11-2020 JJ Bumrah
## 814 1237178 Abu Dhabi 06-11-2020 KS Williamson
## 815 1237180 Abu Dhabi 08-11-2020 MP Stoinis
## 816 1237181 Dubai 10-11-2020 TA Boult
## venue neutral_venue
## 807 Dubai International Cricket Stadium 0
## 808 Dubai International Cricket Stadium 0
## 809 Dubai International Cricket Stadium 0
## 810 Sheikh Zayed Stadium 0
## 811 Dubai International Cricket Stadium 0
## 812 Dubai International Cricket Stadium 0
## 813 Dubai International Cricket Stadium 0
## 814 Sheikh Zayed Stadium 0
## 815 Sheikh Zayed Stadium 0
## 816 Dubai International Cricket Stadium 0
## team1 team2
## 807 Sunrisers Hyderabad Kings XI Punjab
## 808 Delhi Capitals Rajasthan Royals
## 809 Royal Challengers Bangalore Chennai Super Kings
## 810 Sunrisers Hyderabad Kolkata Knight Riders
## 811 Delhi Capitals Kings XI Punjab
## 812 Royal Challengers Bangalore Mumbai Indians
## 813 Mumbai Indians Delhi Capitals
## 814 Royal Challengers Bangalore Sunrisers Hyderabad
## 815 Delhi Capitals Sunrisers Hyderabad
## 816 Delhi Capitals Mumbai Indians
## toss_winner toss_decision winner
## 807 Sunrisers Hyderabad bat Sunrisers Hyderabad
## 808 Delhi Capitals bat Delhi Capitals
## 809 Royal Challengers Bangalore bat Chennai Super Kings
## 810 Sunrisers Hyderabad bat Kolkata Knight Riders
## 811 Delhi Capitals bat Kings XI Punjab
## 812 Mumbai Indians field Royal Challengers Bangalore
## 813 Delhi Capitals field Mumbai Indians
## 814 Sunrisers Hyderabad field Sunrisers Hyderabad
## 815 Delhi Capitals bat Delhi Capitals
## 816 Delhi Capitals bat Mumbai Indians
## result result_margin eliminator method umpire1 umpire2
## 807 runs 69 N <NA> AK Chaudhary Nitin Menon
## 808 runs 13 N <NA> AK Chaudhary Nitin Menon
## 809 wickets 8 N <NA> C Shamshuddin RK Illingworth
## 810 wickets 7 N <NA> CB Gaffaney VK Sharma
## 811 wickets 5 N <NA> C Shamshuddin RK Illingworth
## 812 tie NA Y <NA> Nitin Menon PR Reiffel
## 813 runs 57 N <NA> CB Gaffaney Nitin Menon
## 814 wickets 6 N <NA> PR Reiffel S Ravi
## 815 runs 17 N <NA> PR Reiffel S Ravi
## 816 wickets 5 N <NA> CB Gaffaney Nitin Menon
## loser
## 807 Kings XI Punjab
## 808 Rajasthan Royals
## 809 Royal Challengers Bangalore
## 810 Sunrisers Hyderabad
## 811 Delhi Capitals
## 812 Mumbai Indians
## 813 Delhi Capitals
## 814 Royal Challengers Bangalore
## 815 Sunrisers Hyderabad
## 816 Delhi Capitals
Lets look at the column names
colnames(ipl_matches)
## [1] "id" "city" "date" "player_of_match"
## [5] "venue" "neutral_venue" "team1" "team2"
## [9] "toss_winner" "toss_decision" "winner" "result"
## [13] "result_margin" "eliminator" "method" "umpire1"
## [17] "umpire2" "loser"
plt <- ggplot(data = ipl_matches) +
geom_bar(mapping = aes(y = fct_infreq(winner), fill = winner))
plot_data <- layer_data(plt)
plt + annotate(geom = "text", label = plot_data$count, x = plot_data$x +5, y = plot_data$y ) +
scale_fill_manual(values = c("yellow","lightblue","#00008B","darkred","orange","red","orange","purple","blue","cyan","#D11D9B","magenta","#D1AB3E","#FF822A","grey")) +
labs(title = "Number of matches won by each team from 2008-2020", x = "Number of wins", y = "Team") +
guides(fill=guide_legend(title="Team"))
#geom_vline(xintercept = mean(plot_data$x),colour = "black")
plt <- ggplot(data = ipl_matches) +
geom_bar(mapping = aes(y = fct_infreq(loser), fill = loser))
plot_data <- layer_data(plt)
plt + annotate(geom = "text", label = plot_data$count, x = plot_data$x +5, y = plot_data$y ) +
scale_fill_manual(values = c("yellow","lightblue","#00008B","darkred","orange","red","orange","purple","blue","cyan","#D11D9B","magenta","#D1AB3E","#FF822A","grey")) +
labs(title = "Number of matches lost by each team from 2008-2020", x = "Number of loses", y = "Team") +
guides(fill=guide_legend(title="Team"))
ipl_matches %>%
count(player_of_match,sort = TRUE,name = "Number of MOTMs") %>%
top_n(15)
## Selecting by Number of MOTMs
## player_of_match Number of MOTMs
## 1 AB de Villiers 23
## 2 CH Gayle 22
## 3 RG Sharma 18
## 4 DA Warner 17
## 5 MS Dhoni 17
## 6 SR Watson 16
## 7 YK Pathan 16
## 8 SK Raina 14
## 9 G Gambhir 13
## 10 V Kohli 13
## 11 AM Rahane 12
## 12 MEK Hussey 12
## 13 A Mishra 11
## 14 AD Russell 11
## 15 DR Smith 11
## 16 KA Pollard 11
## 17 V Sehwag 11
ipl_matches %>%
count(toss_decision)
## toss_decision n
## 1 bat 320
## 2 field 496
ipl_ball_by_ball <- read.csv("IPL Ball-by-Ball 2008-2020.csv")
str(ipl_ball_by_ball)
## 'data.frame': 193468 obs. of 18 variables:
## $ id : int 335982 335982 335982 335982 335982 335982 335982 335982 335982 335982 ...
## $ inning : int 1 1 1 1 1 1 1 1 1 1 ...
## $ over : int 6 6 7 7 7 7 7 7 8 8 ...
## $ ball : int 5 6 1 2 3 4 5 6 1 2 ...
## $ batsman : chr "RT Ponting" "BB McCullum" "BB McCullum" "BB McCullum" ...
## $ non_striker : chr "BB McCullum" "RT Ponting" "RT Ponting" "RT Ponting" ...
## $ bowler : chr "AA Noffke" "AA Noffke" "Z Khan" "Z Khan" ...
## $ batsman_runs : int 1 1 0 1 1 1 1 1 0 0 ...
## $ extra_runs : int 0 0 0 0 0 0 0 0 0 0 ...
## $ total_runs : int 1 1 0 1 1 1 1 1 0 0 ...
## $ non_boundary : int 0 0 0 0 0 0 0 0 0 0 ...
## $ is_wicket : int 0 0 0 0 0 0 0 0 0 0 ...
## $ dismissal_kind : chr NA NA NA NA ...
## $ player_dismissed: chr NA NA NA NA ...
## $ fielder : chr NA NA NA NA ...
## $ extras_type : chr NA NA NA NA ...
## $ batting_team : chr "Kolkata Knight Riders" "Kolkata Knight Riders" "Kolkata Knight Riders" "Kolkata Knight Riders" ...
## $ bowling_team : chr "Royal Challengers Bangalore" "Royal Challengers Bangalore" "Royal Challengers Bangalore" "Royal Challengers Bangalore" ...
colnames(ipl_ball_by_ball)
## [1] "id" "inning" "over" "ball"
## [5] "batsman" "non_striker" "bowler" "batsman_runs"
## [9] "extra_runs" "total_runs" "non_boundary" "is_wicket"
## [13] "dismissal_kind" "player_dismissed" "fielder" "extras_type"
## [17] "batting_team" "bowling_team"
head(ipl_ball_by_ball)
## id inning over ball batsman non_striker bowler batsman_runs
## 1 335982 1 6 5 RT Ponting BB McCullum AA Noffke 1
## 2 335982 1 6 6 BB McCullum RT Ponting AA Noffke 1
## 3 335982 1 7 1 BB McCullum RT Ponting Z Khan 0
## 4 335982 1 7 2 BB McCullum RT Ponting Z Khan 1
## 5 335982 1 7 3 RT Ponting BB McCullum Z Khan 1
## 6 335982 1 7 4 BB McCullum RT Ponting Z Khan 1
## extra_runs total_runs non_boundary is_wicket dismissal_kind player_dismissed
## 1 0 1 0 0 <NA> <NA>
## 2 0 1 0 0 <NA> <NA>
## 3 0 0 0 0 <NA> <NA>
## 4 0 1 0 0 <NA> <NA>
## 5 0 1 0 0 <NA> <NA>
## 6 0 1 0 0 <NA> <NA>
## fielder extras_type batting_team bowling_team
## 1 <NA> <NA> Kolkata Knight Riders Royal Challengers Bangalore
## 2 <NA> <NA> Kolkata Knight Riders Royal Challengers Bangalore
## 3 <NA> <NA> Kolkata Knight Riders Royal Challengers Bangalore
## 4 <NA> <NA> Kolkata Knight Riders Royal Challengers Bangalore
## 5 <NA> <NA> Kolkata Knight Riders Royal Challengers Bangalore
## 6 <NA> <NA> Kolkata Knight Riders Royal Challengers Bangalore
tail(ipl_ball_by_ball)
## id inning over ball batsman non_striker bowler
## 193463 1237181 1 12 4 SS Iyer RR Pant NM Coulter-Nile
## 193464 1237181 1 12 5 RR Pant SS Iyer NM Coulter-Nile
## 193465 1237181 1 12 6 RR Pant SS Iyer NM Coulter-Nile
## 193466 1237181 1 13 1 RR Pant SS Iyer KH Pandya
## 193467 1237181 1 13 2 RR Pant SS Iyer KH Pandya
## 193468 1237181 1 13 3 SS Iyer RR Pant KH Pandya
## batsman_runs extra_runs total_runs non_boundary is_wicket dismissal_kind
## 193463 1 0 1 0 0 <NA>
## 193464 0 0 0 0 0 <NA>
## 193465 1 0 1 0 0 <NA>
## 193466 0 1 1 0 0 <NA>
## 193467 1 0 1 0 0 <NA>
## 193468 1 0 1 0 0 <NA>
## player_dismissed fielder extras_type batting_team bowling_team
## 193463 <NA> <NA> <NA> Delhi Capitals Mumbai Indians
## 193464 <NA> <NA> <NA> Delhi Capitals Mumbai Indians
## 193465 <NA> <NA> <NA> Delhi Capitals Mumbai Indians
## 193466 <NA> <NA> wides Delhi Capitals Mumbai Indians
## 193467 <NA> <NA> <NA> Delhi Capitals Mumbai Indians
## 193468 <NA> <NA> <NA> Delhi Capitals Mumbai Indians
The matches in the data seems to be in order but the overs are not in order.
ggplot(data = ipl_ball_by_ball) +
geom_bar(mapping = aes(x = extras_type, fill = extras_type))
ipl_ball_by_ball %>%
filter(!is.na(extras_type)) %>%
ggplot(aes(x = extras_type,fill = extras_type)) +
geom_bar()
ipl_ball_by_ball %>%
filter(!is.na(extras_type)) %>%
ggplot(aes(x = extras_type,fill = extras_type)) +
geom_bar() +
geom_text(aes(label = ..count..), stat = "count", vjust = -0.4) +
labs(title = "Number of extras from 2008-2020", x = "Type of extra", y = "Number of extras") +
guides(fill=guide_legend(title="Type of extra"))
over_data <- ipl_ball_by_ball %>%
group_by(over) %>%
summarise(runs_in_overs = sum(total_runs))
over_data
## # A tibble: 20 × 2
## over runs_in_overs
## <int> <int>
## 1 0 9804
## 2 1 11642
## 3 2 13005
## 4 3 13348
## 5 4 13405
## 6 5 13470
## 7 6 10840
## 8 7 11548
## 9 8 11999
## 10 9 11875
## 11 10 12199
## 12 11 12458
## 13 12 12542
## 14 13 12897
## 15 14 13326
## 16 15 13449
## 17 16 13989
## 18 17 14307
## 19 18 13804
## 20 19 12887
label_data <- over_data
number_of_bar <- nrow(label_data)
# calculate the ANGLE of the labels
angle <- 90 - 360 * (label_data$over-0.5) /number_of_bar
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
label_data$hjust<-ifelse( angle < -90, 1, 0)
# flip angle BY to make them readable
label_data$angle<-ifelse(angle < -90, angle+180, angle)
# Start the plot
p <- ggplot(label_data, aes(x=over, y=runs_in_overs)) + # Note that id is a factor. If x is numeric, there is some space between the first bar
# This add the bars with a blue color
geom_bar(stat="identity", fill=alpha("skyblue", 0.9)) +
# Limits of the plot = very important. The negative value controls the size of the inner circle, the positive one is useful to add size over each bar
ylim(-10000,20000) +
# Custom the theme: no axis title and no cartesian grid
theme_minimal() +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,14), "cm") # Adjust the margin to make in sort labels are not truncated!
) +
# This makes the coordinate polar instead of cartesian.
coord_polar(start = 0) +
# Add the labels, using the label_data dataframe that we have created before
geom_text(data=label_data, aes(x=over, y=runs_in_overs+10, label=over, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE )
p
runs_split <- ipl_ball_by_ball %>%
group_by(over) %>%
summarise(total_batter_runs = sum(batsman_runs), total_extras = sum(extra_runs))
runs_split_data <- data.frame(runs_split)
runs_split_data
## over total_batter_runs total_extras
## 1 0 8894 910
## 2 1 10744 898
## 3 2 12259 746
## 4 3 12674 674
## 5 4 12798 607
## 6 5 12770 700
## 7 6 10205 635
## 8 7 10998 550
## 9 8 11448 551
## 10 9 11344 531
## 11 10 11728 471
## 12 11 11966 492
## 13 12 11956 586
## 14 13 12367 530
## 15 14 12781 545
## 16 15 12864 585
## 17 16 13322 667
## 18 17 13604 703
## 19 18 13082 722
## 20 19 12141 746
ggplot(data = runs_split_data, aes(fill=total_batter_runs, y=total_batter_runs, x=over)) +
geom_bar(position="stack", stat="identity")
ggplot(data = runs_split_data, aes(fill=total_extras, y=total_extras, x=over)) +
geom_bar(position="stack", stat="identity")
normalize <- function(x) {
return ((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE)))}
norm_runs_split <- as.data.frame(apply(runs_split_data[2:3], 2, normalize))
norm_runs_split
## total_batter_runs total_extras
## 1 0.0000000 1.00000000
## 2 0.3927813 0.97266515
## 3 0.7144374 0.62642369
## 4 0.8025478 0.46241458
## 5 0.8288747 0.30979499
## 6 0.8229299 0.52164009
## 7 0.2783439 0.37357631
## 8 0.4467091 0.17995444
## 9 0.5422505 0.18223235
## 10 0.5201699 0.13667426
## 11 0.6016985 0.00000000
## 12 0.6522293 0.04783599
## 13 0.6501062 0.26195900
## 14 0.7373673 0.13439636
## 15 0.8252654 0.16856492
## 16 0.8428875 0.25968109
## 17 0.9401274 0.44646925
## 18 1.0000000 0.52847380
## 19 0.8891720 0.57175399
## 20 0.6893843 0.62642369
overs_data <- ipl_ball_by_ball %>%
group_by(over) %>%
summarise(runs_in_overs = sum(total_runs))
dataf<- data.frame(overs_data)
dataf
## over runs_in_overs
## 1 0 9804
## 2 1 11642
## 3 2 13005
## 4 3 13348
## 5 4 13405
## 6 5 13470
## 7 6 10840
## 8 7 11548
## 9 8 11999
## 10 9 11875
## 11 10 12199
## 12 11 12458
## 13 12 12542
## 14 13 12897
## 15 14 13326
## 16 15 13449
## 17 16 13989
## 18 17 14307
## 19 18 13804
## 20 19 12887
ggplot(dataf,aes(x=over,y=runs_in_overs,fill = over)) +
geom_bar(stat = "identity") +
theme(legend.position = "none") +
ylim(-5000,15000) +
coord_polar(start=0)