1. IPL_MATCH data set
  1. Most successful teams season-wise
iplmatch.df <- read.csv(paste("IPL_MATCH.csv"),sep = ",")
rough.df <- data.frame(table(iplmatch.df$match_winner,iplmatch.df$Season_Year))
colnames(rough.df) <- c("team","season","winning")
dd.df <- data.frame(aggregate(rough.df$winning,by=list(season=rough.df$season),max))
s.df <-data.frame()
for (i in 1:10) {
  s.df<- rbind(s.df,(rough.df[which(rough.df$season==(i+2007) & rough.df$winning==dd.df[i,2]),]))
}
s.df
##                      team season winning
## 10       Rajasthan Royals   2008      13
## 16       Delhi Daredevils   2009      10
## 34         Mumbai Indians   2010      11
## 40    Chennai Super Kings   2011      11
## 59  Kolkata Knight Riders   2012      12
## 73         Mumbai Indians   2013      13
## 83        Kings XI Punjab   2014      12
## 92    Chennai Super Kings   2015      10
## 99         Mumbai Indians   2015      10
## 117   Sunrisers Hyderabad   2016      11
## 125        Mumbai Indians   2017      11

b. Correlation between match winning and toss winning (overall for all the seasons)

null hypothesis:

there is no corelation between toss winning and match winning
chisq.test(iplmatch.df$Toss_Winner,iplmatch.df$match_winner)
## Warning in chisq.test(iplmatch.df$Toss_Winner, iplmatch.df$match_winner):
## Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  iplmatch.df$Toss_Winner and iplmatch.df$match_winner
## X-squared = 1920, df = 156, p-value < 2.2e-16

since the the p-value is < 0.05 we can discard the null hypothesis. this means that there is some correlation between the two variables. match winning is somehow dependent on toss winning.

c. Team-wise visualization of number of wins and the top 5 players who won highest number of man-of-the match award.

barplot(table(iplmatch.df$match_winner))

barplot(tail(sort(table(iplmatch.df$ManOfMach))))

#.d. Visualization of the Teams which have won the matches with the highest run margin , considering all the seasons.

sort.df<- iplmatch.df[order(-iplmatch.df$Win_Margin),]
sort.df<- sort.df[1:10,]
barplot(sort.df$Win_Margin,xlab="MI       RCB       KKR        RCB      RCB     KXIP    RR       MI        DD       CSK")

##. IPL_PLAYER_MATCH

    1. Map the Match_ID with the first data set and find top 5 teams with the number of players above the age 30.(considering age_on_match)
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
iplplyr.df<- read.csv(paste("IPL_PLAYER_MATCH.csv"),sep = ",")
new.df<-left_join(iplmatch.df,iplplyr.df,by="Match_Id")
age.df<-data.frame(table(new.df$Player_team,(new.df$Age_As_on_match)>29))
age.df<- age.df[which(age.df$Var2=='TRUE'),]
age.df <- age.df[order(-age.df$Freq),]
head(age.df)[,1]
## [1] Kolkata Knight Riders       Royal Challengers Bangalore
## [3] Mumbai Indians              Chennai Super Kings        
## [5] Delhi Daredevils            Kings XI Punjab            
## 13 Levels: Chennai Super Kings Deccan Chargers ... Sunrisers Hyderabad
    1. Top 5 Most successful captains in terms of the wins in all the seasons
library("dplyr")
win.df<- new.df[which(new.df$match_winner==new.df$Player_team),]
win2.df<- new.df[which(new.df$match_winner==new.df$Opposit_Team),]
win.df<-data.frame(table(win.df$Player_Captain))
win2.df<-data.frame(table(win2.df$Player_Captain))
win.df<- data.frame(left_join(win.df,win2.df,by="Var1"))
win.df$sum <-win.df$Freq.x+win.df$Freq.y
win.df <- win.df[order(win.df$sum),]
head(win.df$Var1)
## [1] AM Rahane   DJ Bravo    LRPL Taylor PA Patel    JR Hopes    KK Nair    
## 44 Levels: A Kumble AC Gilchrist AD Mathews AJ Finch ... Z Khan

##. 3. FACT_BALL_BY_BALL

#.a. Top 10 matches with the highest number of extras given in those matches

ball.df<-read.csv(paste("FACT_BALL_BY_BALL.csv"),sep = ",")
gd.df<- data.frame(aggregate(ball.df$Extra_runs,by=list(Match_Id=ball.df$Match_Id),sum))
new.df<-left_join(gd.df,iplplyr.df,by="Match_Id")

ob.df<- data.frame(aggregate(ball.df$Extra_runs,by=list(id=ball.df$Match_Id),sum))
ob.df<-data.frame(ob.df[order(-ob.df$x),])
sd.df <- data.frame()
d.df<-data.frame()
 for(i in 1:10) {
    sd.df<- rbind(sd.df,data.frame(iplmatch.df[which(iplmatch.df$Match_Id==ob.df$id[i]),c(3,4)]))
    d.df <- rbind(d.df,data.frame(ball.df[which(ball.df$Match_Id==ob.df$id[i]),]))}
sd.df$Matchid <- ob.df$id[1:10]
sd.df$extra <-ob.df$x[1:10]
(sd.df)
##                           Team1                       Team2 Matchid extra
## 94        Kolkata Knight Riders             Deccan Chargers  335991    38
## 231              Mumbai Indians             Kings XI Punjab  419137    38
## 90  Royal Challengers Bangalore       Kolkata Knight Riders  335987    36
## 326             Kings XI Punjab Royal Challengers Bangalore  501265    36
## 136              Mumbai Indians             Kings XI Punjab  336033    35
## 252         Chennai Super Kings       Kolkata Knight Riders  419158    35
## 200            Delhi Daredevils              Mumbai Indians  392240    33
## 131            Rajasthan Royals Royal Challengers Bangalore  336028    32
## 241         Chennai Super Kings              Mumbai Indians  419147    32
## 97          Chennai Super Kings              Mumbai Indians  335994    31
    1. Visualise Extra-types for those 10 matches and the impact of the extras on the result of the match.
c <- c(sum(d.df$Wides),sum(d.df$Legbyes),sum(d.df$Byes),sum(d.df$Noballs),sum(d.df$Penalty) )

barplot(c,ylim=c(0,200),xlab="wide                 legbyes                  byes                noballs                 penalty")  

    1. Top 5 bowlers which has given highest runs and top 5 with the lowest economy rate.
f.df<-(aggregate(ball.df$Runs_Scored,by=list(bowler_id=ball.df$BowlerKey),sum))
f.df<- f.df[order(-f.df$x),]
f.df[1:5,]
##        bowler_id  x
## 3327 54838500296 60
## 95   33599400435 57
## 1773 41914200109 56
## 555  33603300078 55
## 604  33603800126 54
f.df<- f.df[order(f.df$x),]
f.df[1:5,]
##        bowler_id x
## 654  33604300035 0
## 1586 41912600217 0
## 2088 41916800021 0
## 2214 50121000223 0
## 2730 50125400021 0