swetatyagi_term-end assignment

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Step 1: Merge the match and player data based on 'id' and 'match_id'
df_combined <- merge(matches,deliveries , by.x = "id", by.y = "match_id")

print(paste('1. How many matches ended in “tie” and played “super-over”?  '))

## [1] "1. How many matches ended in “tie” and played “super-over”?  "

# Filter data for results
df_filtered <- matches %>%
  filter(grepl("tie", result, ignore.case = TRUE))
numero_filas <- df_filtered %>% nrow()
print(paste('There are', numero_filas, 'rows that have tie.'))

## [1] "There are 7 rows that have tie."

# Assuming df_combined has the columns is_super_over, winner, team1, season, and player_of_match

# Step 1: Filter for matches where a super-over happened (is_super_over == 1) and the team batting first won
super_over_matches <- df_combined %>%
  filter(is_super_over == 1)

# Step 2: Group by season, winner, and player_of_match to avoid duplicates
super_over_winners <- super_over_matches %>%
  group_by(season, winner, player_of_match) %>%
  summarise(count = n(), .groups = 'drop')  # Count the number of matches (optional)

# Step 3: Calculate the number of unique super-over winners
total_super_over_winners <- nrow(super_over_winners)

# Step 4: Print the result
cat("Total number of teams that won the super-over battle while batting first:", total_super_over_winners, "\n")

## Total number of teams that won the super-over battle while batting first: 7

cat("\n")

cat("\n")

print(paste('2. Which team had won by maximum runs? For this match, list the venue, date, player of the match, city, and season?   '))

## [1] "2. Which team had won by maximum runs? For this match, list the venue, date, player of the match, city, and season?   "

data = matches
# Find the maximum runs in 'win_by_runs' column
max_runs <- max(data$win_by_runs)

# Identify the team with the maximum runs
team_with_max_runs <- data$winner[which.max(data$win_by_runs)]
venue_with_max_runs <- data$venue[which.max(data$win_by_runs)]
date_with_max_runs <- data$date[which.max(data$win_by_runs)]
player_of_match_with_max_runs <- data$player_of_match[which.max(data$win_by_runs)]
city_with_max_runs <- data$city[which.max(data$win_by_runs)]
season_with_max_runs <- data$season[which.max(data$win_by_runs)]



# Print the result

# Print the team with the most maximum runs
cat("The most maximum runs is:", max_runs, "\n")

## The most maximum runs is: 146

cat("Team with Maximum Runs:", team_with_max_runs, "\n",
    "Venue:", venue_with_max_runs, "\n",
    "Date:", date_with_max_runs, "\n",
    "Player of the Match:", player_of_match_with_max_runs, "\n",
    "City:", city_with_max_runs, "\n",
    "Season:", season_with_max_runs, "\n")

## Team with Maximum Runs: Mumbai Indians 
##  Venue: Feroz Shah Kotla 
##  Date: 06-05-2017 
##  Player of the Match: LMP Simmons 
##  City: Delhi 
##  Season: 2017

cat("\n")

cat("\n")

print(paste('3.How many toss-winning teams have chosen to field and won the matches? What is the percentage of it? Use a suitable graph to show the result visually.     '))

## [1] "3.How many toss-winning teams have chosen to field and won the matches? What is the percentage of it? Use a suitable graph to show the result visually.     "

data =  matches
# Step 1: Filter the data where toss-winning teams chose to field
field_chosen_matches <- data[data$toss_decision == "field" & data$toss_winner == data$winner, ]

# Step 2: Count the number of such matches
field_and_won_count <- nrow(field_chosen_matches)

# Step 3: Calculate the total number of matches
total_matches <- nrow(data)

# Step 4: Calculate the percentage
field_and_won_percentage <- (field_and_won_count / total_matches) * 100

# Step 5: Print the results
cat("Number of matches where toss-winning teams chose to field and won:", field_and_won_count, "\n")

## Number of matches where toss-winning teams chose to field and won: 203

cat("Percentage of such matches:", round(field_and_won_percentage, 2), "%\n")

## Percentage of such matches: 31.92 %

# Data for plotting
result_df <- data.frame(
  Category = c("Fielded and Won", "Other Matches"),
  Count = c(field_and_won_count, total_matches - field_and_won_count)
)

cat("\n")

cat("\n")

print(paste('4.Which toss-winning  team had played maximum dot balls? What is the match number? Against which team this match was played?     '))

## [1] "4.Which toss-winning  team had played maximum dot balls? What is the match number? Against which team this match was played?     "

# Step 1: Merge the datasets on 'id' and 'match_id'

merged_data <- merge(matches,deliveries,  by.x = "id", by.y = "match_id")

# Step 2: Filter the data for toss-winning team
toss_winning_data <- merged_data[merged_data$toss_winner == merged_data$batting_team,                               ]


# Step 3: Identify dot balls (batsman_runs == 0)
toss_winning_data$dot_ball <- ifelse(toss_winning_data$batsman_runs == 0, 1, 0)

# Step 4: Group by toss-winning team and match id to summarize dot balls
dot_ball_summary <- toss_winning_data %>%
  group_by(toss_winner, id, team2) %>%
  summarize(total_dot_balls = sum(dot_ball), .groups = "drop")  # Using .groups = "drop" to avoid the message 
  
# Step 5: Find the team with the maximum dot balls


max_dot_balls <- dot_ball_summary %>%
  arrange(desc(total_dot_balls)) %>%
  slice(1)
#max_dot_balls <- dot_ball_summary[which.max(dot_ball_summary$total_dot_balls), ]


print(paste("Team had played maximum dot balls", max_dot_balls$toss_winner))

## [1] "Team had played maximum dot balls Kings XI Punjab"

cat("\n")

print(paste("Against Team:", max_dot_balls$team2))

## [1] "Against Team: Mumbai Indians"

cat("\n")

print(paste("Match Id:", max_dot_balls$id))

## [1] "Match Id: 157"

cat("\n")

print(paste("Total dot balls:", max_dot_balls$total_dot_balls))

## [1] "Total dot balls: 77"

cat("\n")

cat("\n")

cat("\n")

print(paste('5. Which team had won by maximum wicket, by applying the condition “player of the match is a batsman”? '))

## [1] "5. Which team had won by maximum wicket, by applying the condition “player of the match is a batsman”? "

# Step 1: Merge the match and player data based on 'id' and 'match_id'
df_combined <- merged_data

df_combinedx <- df_combined %>%
  filter(player_of_match == batsman)

winning_summary <- df_combinedx %>%
  group_by(winner,player_of_match) %>%
  summarise(
    Total_Wins = n(),  # Total de partidos ganados
    Wins_by_Wickets = sum(win_by_wickets > 0, na.rm = TRUE)
    ,.groups = "drop"  # Victorias por wickets
  ) %>%
  arrange(desc(Total_Wins))
   max_wicket_match  <- head(winning_summary, 1)
# Step 5: Display the result
winning_team <- max_wicket_match$winner
player_of_match <- max_wicket_match$player_of_match

cat("The team that won by the maximum wickets is:", winning_team)

## The team that won by the maximum wickets is: Royal Challengers Bangalore

cat("\n")

cat("The player of the match (who is a batsman) is:", player_of_match)

## The player of the match (who is a batsman) is: CH Gayle

cat("\n")

cat("\n")

print(paste('6. Which player has received maximum player of the match awards? Also, show the graphical view taking the franchise into consideration. (Hint: Franchise here means that the player representing the X team in taking all seasons into consideration)  ”? '))

## [1] "6. Which player has received maximum player of the match awards? Also, show the graphical view taking the franchise into consideration. (Hint: Franchise here means that the player representing the X team in taking all seasons into consideration)  ”? "

# Step 1: Merge the match and player data based on 'id' and 'match_id'
df_combined <- merged_data


# Step 2: Count the number of Player of the Match awards each player received for each franchise
player_awards <- df_combined %>%
  group_by(player_of_match, winner) %>%  # 'winner' represents the team (franchise)
  summarize(total_awards = n(), .groups = "drop") %>%  # Adding .groups = "drop" to suppress the warning
  distinct(total_awards,winner,player_of_match)
  

# Step 3: Find the player with the maximum Player of the Match awards
top_player <- player_awards %>%
              arrange(desc(total_awards))        
              most_successful_team  <- head(player_awards, 1)

# Extract the first row from the top_player dataframe
top_player_row <- top_player[1, ]
# Print the result
print(paste("The player with the maximum Player of the Match awards is:", top_player_row$player_of_match ))

## [1] "The player with the maximum Player of the Match awards is: CH Gayle"

cat("\n")

print(paste("Franchise:", top_player_row$winner))

## [1] "Franchise: Royal Challengers Bangalore"

#cat("\n")
#print(paste("Total awards:", top_player_row$total_awards))


cat("\n")

cat("\n")

print(paste('7.Which team has used a maximum number of bowlers in an innings? List their bowling statistics. (Hint: Bowling statistics include bowler name, runs conceded, wickets taken, and bowling economy)   '))

## [1] "7.Which team has used a maximum number of bowlers in an innings? List their bowling statistics. (Hint: Bowling statistics include bowler name, runs conceded, wickets taken, and bowling economy)   "

# Step 2: Count the number of unique bowlers for each team in each innings
# Summarize bowling statistics for each team
df_combined <- merged_data
bowler_per_innings1 <- df_combined %>%
  group_by(id,bowling_team, bowler,inning) %>%
  summarise(bowler_count = n(),.groups = "drop")
bowler_per_innings <- bowler_per_innings1 %>%
  group_by(bowling_team) %>%
  summarise(total_bowler_count = sum(bowler_count),.groups = "drop")



df_combined2 <- merge(bowler_per_innings,df_combined , by.x = "bowling_team", by.y = "bowling_team")

summarized_data <- df_combined2 %>%
  group_by( bowling_team, inning,bowler) %>%
  summarize(
    runs_conceded = sum(total_runs),             # Runs conceded
    wickets_taken = sum(player_dismissed != ""), # Wickets taken
    overs_bowled = total_bowler_count,                     # Overs bowled (each bowler bowls 6 balls)
    bowling_economy = sum(total_runs) / (total_bowler_count

), # Economy rate: runs per over
    .groups = 'drop'  # Avoid regrouping warning
  )

  summarized_datacp <- summarized_data
  top_teams3 <- summarized_data %>%
  distinct(bowler,overs_bowled,runs_conceded,wickets_taken,bowling_economy)
  
  summarized_data <- summarized_datacp %>%
  distinct(bowling_team,overs_bowled)
  
  
  
 

# Sort the summarized data by wickets taken and get the top 5 teams
top_teams <- summarized_data %>%
  arrange(desc(overs_bowled)) %>%
  slice(1)


# Mensaje initial
message <- "Top 1 Teams  maximum number of bowlers in an innings:\n"

#  Función para concatenar cada fila del dataframe con un formato específico
formatted_data <- apply(top_teams, 1, function(row) {
  paste(
    "Bow. team:", row["bowling_team"], 'overs_bowled' ,row["overs_bowled"]
    
     )
})

# Unir el mensaje con los datos formateados
output <- paste(message, paste(formatted_data, collapse = "\n"))
# Imprimir el resultado
cat(output)

## Top 1 Teams  maximum number of bowlers in an innings:
##  Bow. team: Mumbai Indians overs_bowled 18879

cat("See the stats in dataframe: bowler..")

## See the stats in dataframe: bowler..

cat("\n")

# Mensaje initial



cat("\n")

print(paste('8.Which teams had won the “super-over” battle while batting first? For these matches, find the season and Player of the Match.    '))

## [1] "8.Which teams had won the “super-over” battle while batting first? For these matches, find the season and Player of the Match.    "

df_combined <- merged_data
# Step 1: Filter for matches where a super-over happened (is_super_over == 1) and the team batting first won
super_over_matches <- df_combined %>%
  filter(is_super_over == 1, winner == team1)

# Step 2: Group by season, winner, and player_of_match to avoid duplicates
super_over_winners <- super_over_matches %>%
  group_by(id,season, winner, player_of_match) %>%
  summarise(count = n(), .groups = 'drop')  # Count the number of matches (optional)
  
   
super_over_winners <- super_over_winners %>%
  arrange(desc(count))  
  
# Step 3: Print the unique result
super_over_winners  <- head(super_over_winners, 10)



# Mensaje inicial
message <- "Teams that won the super-over battle while batting first (grouped by season and player):\n"

# Función para concatenar cada fila del dataframe con un formato específico
formatted_data <- apply(super_over_winners, 1, function(row) {
  paste("Season:", row["season"], "- Winner:", row["winner"], "- Player:", row["player_of_match"]
            ,'Count the number of matches' ,row["count"] )
})

# Unir el mensaje con los datos formateados
output <- paste(message, paste(formatted_data, collapse = "\n"))

# Imprimir el resultado
cat(output)

## Teams that won the super-over battle while batting first (grouped by season and player):
##  Season: 2014 - Winner: Rajasthan Royals - Player: JP Faulkner Count the number of matches 12
## Season: 2009 - Winner: Rajasthan Royals - Player: YK Pathan Count the number of matches 11
## Season: 2010 - Winner: Kings XI Punjab - Player: J Theron Count the number of matches  9

cat("\n")

cat("\n")

print(paste('9.Considering all seasons, find the highest score in IPL. Show the score distribution using a suitable graph. see the plot for answer  '))

## [1] "9.Considering all seasons, find the highest score in IPL. Show the score distribution using a suitable graph. see the plot for answer  "

# Step 1: Count the number of player_of_match awards for each player
# Step 1: Find the top 10 players with the most 'Player of the Match' awards across all seasons
top_players <- df_combined %>%
  group_by(player_of_match) %>%
  summarise(Awards_Count = n(),.groups = 'drop') %>%  # Count the number of awards
  arrange(desc(Awards_Count)) %>%  # Sort in descending order
  slice(1:10)  # Select top 10 players

# Step 2: Summarize the total runs scored by these top 10 players
# Replace 'batsman_runs' with the actual column name if different
top_players_runs <- df_combined %>%
  filter(player_of_match %in% top_players$player_of_match) %>%  # Filter for top 10 players
  group_by(player_of_match) %>%
  summarise(Total_Runs = sum(batsman_runs, na.rm = TRUE), .groups = 'drop')  # Summarize total runs

# Step 3: Merge the Awards Count with the Runs Data for the top 10 players
top_players_summary <- merge(top_players, top_players_runs, by = "player_of_match")


cat("\n")

cat("\n")

print(paste('10. Which IPL Team is most successful? The most successful team is the one who has one the greatest number of matches, considering all seasons. Also, interpret the result with the help of plots/graphs. see the plot for answer '))

## [1] "10. Which IPL Team is most successful? The most successful team is the one who has one the greatest number of matches, considering all seasons. Also, interpret the result with the help of plots/graphs. see the plot for answer "

# Step 1: Group by 'winner' and calculate the total number of wins
team_wins <- df_combined %>%
  group_by(winner) %>%
  summarise(total_wins = n(), .groups = 'drop') %>%
  distinct(winner, total_wins)
  
  

# Step 2: Find the most successful team
most_successful_team <- team_wins %>%
  arrange(desc(total_wins))
most_successful_team  <- head(most_successful_team, 1)




# Mensaje inicial
message <- "Which IPL Team is most successful?:\n"

# Función para concatenar cada fila del dataframe con un formato específico
formatted_data <- apply(most_successful_team, 1, function(row) {
  paste("- Winner:", row["winner"]
            ,'Count the number of matches' ,row["total_wins"] )
})

# Unir el mensaje con los datos formateados
output <- paste(message, paste(formatted_data, collapse = "\n"))

# Imprimir el resultado
cat(output)

## Which IPL Team is most successful?:
##  - Winner: Mumbai Indians Count the number of matches 22164

swetatyagi_term-end assignment

R Markdown

Including Plots