iplmatch.df <- read.csv(paste("IPL_MATCH.csv"),sep = ",")
rough.df <- data.frame(table(iplmatch.df$match_winner,iplmatch.df$Season_Year))
colnames(rough.df) <- c("team","season","winning")
dd.df <- data.frame(aggregate(rough.df$winning,by=list(season=rough.df$season),max))
s.df <-data.frame()
for (i in 1:10) {
s.df<- rbind(s.df,(rough.df[which(rough.df$season==(i+2007) & rough.df$winning==dd.df[i,2]),]))
}
s.df
## team season winning
## 10 Rajasthan Royals 2008 13
## 16 Delhi Daredevils 2009 10
## 34 Mumbai Indians 2010 11
## 40 Chennai Super Kings 2011 11
## 59 Kolkata Knight Riders 2012 12
## 73 Mumbai Indians 2013 13
## 83 Kings XI Punjab 2014 12
## 92 Chennai Super Kings 2015 10
## 99 Mumbai Indians 2015 10
## 117 Sunrisers Hyderabad 2016 11
## 125 Mumbai Indians 2017 11
null hypothesis:
there is no corelation between toss winning and match winning
chisq.test(iplmatch.df$Toss_Winner,iplmatch.df$match_winner)
## Warning in chisq.test(iplmatch.df$Toss_Winner, iplmatch.df$match_winner):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: iplmatch.df$Toss_Winner and iplmatch.df$match_winner
## X-squared = 1920, df = 156, p-value < 2.2e-16
since the the p-value is < 0.05 we can discard the null hypothesis. this means that there is some correlation between the two variables. match winning is somehow dependent on toss winning.
barplot(table(iplmatch.df$match_winner))
barplot(tail(sort(table(iplmatch.df$ManOfMach))))
#.d. Visualization of the Teams which have won the matches with the highest run margin , considering all the seasons.
sort.df<- iplmatch.df[order(-iplmatch.df$Win_Margin),]
sort.df<- sort.df[1:10,]
barplot(sort.df$Win_Margin,xlab="MI RCB KKR RCB RCB KXIP RR MI DD CSK")
##. IPL_PLAYER_MATCH
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
iplplyr.df<- read.csv(paste("IPL_PLAYER_MATCH.csv"),sep = ",")
new.df<-left_join(iplmatch.df,iplplyr.df,by="Match_Id")
age.df<-data.frame(table(new.df$Player_team,(new.df$Age_As_on_match)>29))
age.df<- age.df[which(age.df$Var2=='TRUE'),]
age.df <- age.df[order(-age.df$Freq),]
head(age.df)[,1]
## [1] Kolkata Knight Riders Royal Challengers Bangalore
## [3] Mumbai Indians Chennai Super Kings
## [5] Delhi Daredevils Kings XI Punjab
## 13 Levels: Chennai Super Kings Deccan Chargers ... Sunrisers Hyderabad
library("dplyr")
win.df<- new.df[which(new.df$match_winner==new.df$Player_team),]
win2.df<- new.df[which(new.df$match_winner==new.df$Opposit_Team),]
win.df<-data.frame(table(win.df$Player_Captain))
win2.df<-data.frame(table(win2.df$Player_Captain))
win.df<- data.frame(left_join(win.df,win2.df,by="Var1"))
win.df$sum <-win.df$Freq.x+win.df$Freq.y
win.df <- win.df[order(win.df$sum),]
head(win.df$Var1)
## [1] AM Rahane DJ Bravo LRPL Taylor PA Patel JR Hopes KK Nair
## 44 Levels: A Kumble AC Gilchrist AD Mathews AJ Finch ... Z Khan
##. 3. FACT_BALL_BY_BALL
#.a. Top 10 matches with the highest number of extras given in those matches
ball.df<-read.csv(paste("FACT_BALL_BY_BALL.csv"),sep = ",")
gd.df<- data.frame(aggregate(ball.df$Extra_runs,by=list(Match_Id=ball.df$Match_Id),sum))
new.df<-left_join(gd.df,iplplyr.df,by="Match_Id")
ob.df<- data.frame(aggregate(ball.df$Extra_runs,by=list(id=ball.df$Match_Id),sum))
ob.df<-data.frame(ob.df[order(-ob.df$x),])
sd.df <- data.frame()
d.df<-data.frame()
for(i in 1:10) {
sd.df<- rbind(sd.df,data.frame(iplmatch.df[which(iplmatch.df$Match_Id==ob.df$id[i]),c(3,4)]))
d.df <- rbind(d.df,data.frame(ball.df[which(ball.df$Match_Id==ob.df$id[i]),]))}
sd.df$Matchid <- ob.df$id[1:10]
sd.df$extra <-ob.df$x[1:10]
(sd.df)
## Team1 Team2 Matchid extra
## 94 Kolkata Knight Riders Deccan Chargers 335991 38
## 231 Mumbai Indians Kings XI Punjab 419137 38
## 90 Royal Challengers Bangalore Kolkata Knight Riders 335987 36
## 326 Kings XI Punjab Royal Challengers Bangalore 501265 36
## 136 Mumbai Indians Kings XI Punjab 336033 35
## 252 Chennai Super Kings Kolkata Knight Riders 419158 35
## 200 Delhi Daredevils Mumbai Indians 392240 33
## 131 Rajasthan Royals Royal Challengers Bangalore 336028 32
## 241 Chennai Super Kings Mumbai Indians 419147 32
## 97 Chennai Super Kings Mumbai Indians 335994 31
c <- c(sum(d.df$Wides),sum(d.df$Legbyes),sum(d.df$Byes),sum(d.df$Noballs),sum(d.df$Penalty) )
barplot(c,ylim=c(0,200),xlab="wide legbyes byes noballs penalty")
f.df<-(aggregate(ball.df$Runs_Scored,by=list(bowler_id=ball.df$BowlerKey),sum))
f.df<- f.df[order(-f.df$x),]
f.df[1:5,]
## bowler_id x
## 3327 54838500296 60
## 95 33599400435 57
## 1773 41914200109 56
## 555 33603300078 55
## 604 33603800126 54
f.df<- f.df[order(f.df$x),]
f.df[1:5,]
## bowler_id x
## 654 33604300035 0
## 1586 41912600217 0
## 2088 41916800021 0
## 2214 50121000223 0
## 2730 50125400021 0