swetatyagi_term-end assignment

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Step 1: Merge the match and player data based on 'id' and 'match_id'
df_combined <- merge(matches,deliveries , by.x = "id", by.y = "match_id")
merged_data <- df_combined

print(paste('1. How many matches ended in “tie” and played “super-over”?  '))

## [1] "1. How many matches ended in “tie” and played “super-over”?  "

## [1] "1. How many matches ended in “tie” and played “super-over”?  "
# Filter data for results
df_filtered <- df_combined %>%
  filter(grepl("tie", result, ignore.case = TRUE) & is_super_over == 1)
  
df_filtered <- df_filtered %>%
            distinct(id)
  



numero_filas <- nrow(df_filtered)
  



print(paste('There are', numero_filas, 'rows that have tie  and is super over.'))

## [1] "There are 7 rows that have tie  and is super over."

## [1] "There are 7 rows that have tie."
# Assuming df_combined has the columns is_super_over, winner, team1, season, and player_of_match

cat("\n")

print(paste('2. Which team had won by maximum runs? For this match, list the venue, date, player of the match, city, and season?   '))

## [1] "2. Which team had won by maximum runs? For this match, list the venue, date, player of the match, city, and season?   "

## [1] "2. Which team had won by maximum runs? For this match, list the venue, date, player of the match, city, and season?   "
data = matches
# Find the maximum runs in 'win_by_runs' column
max_runs <- max(data$win_by_runs)

# Identify the team with the maximum runs
team_with_max_runs <- data$winner[which.max(data$win_by_runs)]
venue_with_max_runs <- data$venue[which.max(data$win_by_runs)]
date_with_max_runs <- data$date[which.max(data$win_by_runs)]
player_of_match_with_max_runs <- data$player_of_match[which.max(data$win_by_runs)]
city_with_max_runs <- data$city[which.max(data$win_by_runs)]
season_with_max_runs <- data$season[which.max(data$win_by_runs)]



# Print the result

# Print the team with the most maximum runs
cat("The most maximum runs is:", max_runs, "\n")

## The most maximum runs is: 146

## The most maximum runs is: 146
cat("Team with Maximum Runs:", team_with_max_runs, "\n",
    "Venue:", venue_with_max_runs, "\n",
    "Date:", date_with_max_runs, "\n",
    "Player of the Match:", player_of_match_with_max_runs, "\n",
    "City:", city_with_max_runs, "\n",
    "Season:", season_with_max_runs, "\n")

## Team with Maximum Runs: Mumbai Indians 
##  Venue: Feroz Shah Kotla 
##  Date: 06-05-2017 
##  Player of the Match: LMP Simmons 
##  City: Delhi 
##  Season: 2017

## Team with Maximum Runs: Mumbai Indians 
##  Venue: Feroz Shah Kotla 
##  Date: 06-05-2017 
##  Player of the Match: LMP Simmons 
##  City: Delhi 
##  Season: 2017
cat("\n")

cat("\n")

print(paste('3.How many toss-winning teams have chosen to field and won the matches? What is the percentage of it? Use a suitable graph to show the result visually.     '))

## [1] "3.How many toss-winning teams have chosen to field and won the matches? What is the percentage of it? Use a suitable graph to show the result visually.     "

## [1] "3.How many toss-winning teams have chosen to field and won the matches? What is the percentage of it? Use a suitable graph to show the result visually.     "
df =  matches

# Create a new column `franchise`
df <- merged_data %>%
  mutate(franchise = ifelse(toss_decision == 'bat', team1, team2))

# Filter data where toss winner chose to field and won
toss_win_field_win <- df %>%
  filter(toss_winner == winner, toss_decision == 'field')

# Count the number of such matches
toss_win_field_win_count <- nrow(toss_win_field_win)

# Filter data where toss winner won the match
toss_win_match_win <- df %>%
  filter(toss_winner == winner)

# Count the number of such matches
toss_win_match_win_count <- nrow(toss_win_match_win)

# Calculate percentage
percentage <- (toss_win_field_win_count / toss_win_match_win_count) * 100

# Data for the pie chart
data <- data.frame(
  category = c("Chose to field and won", "Others"),
  value = c(percentage, 100 - percentage)
)



# Print the results
cat("Number of toss-winning teams who chose to field and won:", toss_win_field_win_count, "\n")

## Number of toss-winning teams who chose to field and won: 46304

cat("Percentage of toss-winning teams who chose to field and won:", round(percentage, 2), "%\n")

## Percentage of toss-winning teams who chose to field and won: 60.42 %

cat("\n")

cat("\n")

print(paste('4.Which toss-winning  team had played maximum dot balls? What is the match number? Against which team this match was played?     '))

## [1] "4.Which toss-winning  team had played maximum dot balls? What is the match number? Against which team this match was played?     "

## [1] "4.Which toss-winning  team had played maximum dot balls? What is the match number? Against which team this match was played?     "
# Step 1: Merge the datasets on 'id' and 'match_id'

merged_data <- merge(matches,deliveries,  by.x = "id", by.y = "match_id")


# Filter for dot balls by toss winners
dot_balls_data <- merged_data  %>%
  filter(toss_winner == batting_team, total_runs == 0)

# Group and count dot balls by toss winner and match ID
dot_balls_by_toss_winner <- dot_balls_data %>%
  group_by(toss_winner, id,team2)   %>%
  summarise(count = n(), .groups = "drop")   %>%
  arrange(desc(count))

# Get the row with the maximum dot balls
max_dot_balls_row <- dot_balls_by_toss_winner[1, ]

# Extract the required information
toss_winner <- max_dot_balls_row$toss_winner
match_id <- max_dot_balls_row$id
team2 <- max_dot_balls_row$team2
opponent_team <- df %>%
  filter(id == match_id &  winner != toss_winner) %>%
  pull(winner) %>%
  first()

# Print the results
cat("Toss winning team with maximum dot balls:", toss_winner, "\n")

## Toss winning team with maximum dot balls: Mumbai Indians

cat("Match ID:", match_id, "\n")

## Match ID: 69

cat("Opponent team:", opponent_team, "\n")

## Opponent team: Kings XI Punjab

cat("\n")

cat("\n")

print(paste('5. Which team had won by maximum wicket, by applying the condition “player of the match is a batsman”? '))

## [1] "5. Which team had won by maximum wicket, by applying the condition “player of the match is a batsman”? "

## [1] "5. Which team had won by maximum wicket, by applying the condition “player of the match is a batsman”? "
# Step 1: Merge the match and player data based on 'id' and 'match_id'
df_combined <- merged_data

df_combinedx <- df_combined %>%
  filter(player_of_match == batsman)

winning_summary <- df_combinedx %>%
  group_by(winner,player_of_match) %>%
  summarise(
    Total_Wins = n(),  # Total de partidos ganados
    Wins_by_Wickets = sum(win_by_wickets > 0, na.rm = TRUE)
    ,.groups = "drop"  # Victorias por wickets
  ) %>%
  arrange(desc(Total_Wins))
   max_wicket_match  <- head(winning_summary, 1)
# Step 5: Display the result
winning_team <- max_wicket_match$winner
player_of_match <- max_wicket_match$player_of_match

cat("The team that won by the maximum wickets is:", winning_team)

## The team that won by the maximum wickets is: Royal Challengers Bangalore

## The team that won by the maximum wickets is: Royal Challengers Bangalore
cat("\n")

cat("The player of the match (who is a batsman) is:", player_of_match)

## The player of the match (who is a batsman) is: CH Gayle

## The player of the match (who is a batsman) is: CH Gayle
cat("\n")

cat("\n")

print(paste('6. Which player has received maximum player of the match awards? Also, show the graphical view taking the franchise into consideration. (Hint: Franchise here means that the player representing the X team in taking all seasons into consideration)  ”? '))

## [1] "6. Which player has received maximum player of the match awards? Also, show the graphical view taking the franchise into consideration. (Hint: Franchise here means that the player representing the X team in taking all seasons into consideration)  ”? "

df_combined <- matches

df_combined <- df_combined %>%
  filter(winner == team1)

# Step 2: Count the number of Player of the Match awards each player received for each franchise
player_awards <- df_combined %>%
  group_by(player_of_match, team1) %>%  # 'winner' represents the team (franchise)
  summarize(total_awards = n(), .groups = "drop") %>%  # Adding .groups = "drop" to suppress the warning
  distinct(total_awards,team1,player_of_match)
  


# Step 3: Find the player with the maximum Player of the Match awards
top_player <- player_awards %>%
              arrange(desc(total_awards))        
              top_player  <- head(top_player, 1)

              
              
              
# Extract the first row from the top_player dataframe
top_player_row <- top_player[1, ]
# Print the result
print(paste("The player with the maximum Player of the Match awards is:", top_player_row$player_of_match ))

## [1] "The player with the maximum Player of the Match awards is: SK Raina"

## [1] "The player with the maximum Player of the Match awards is: CH Gayle"
cat("\n")

print(paste("Franchise:", top_player_row$team1))

## [1] "Franchise: Chennai Super Kings"

## [1] "Franchise: Royal Challengers Bangalore"
#cat("\n")
#print(paste("Total awards:", top_player_row$total_awards))



cat("\n")

cat("\n")

print(paste('7.Which team has used a maximum number of bowlers in an innings? List their bowling statistics. (Hint: Bowling statistics include bowler name, runs conceded, wickets taken, and bowling economy)   '))

## [1] "7.Which team has used a maximum number of bowlers in an innings? List their bowling statistics. (Hint: Bowling statistics include bowler name, runs conceded, wickets taken, and bowling economy)   "

## [1] "7.Which team has used a maximum number of bowlers in an innings? List their bowling statistics. (Hint: Bowling statistics include bowler name, runs conceded, wickets taken, and bowling economy)   "
# Step 2: Count the number of unique bowlers for each team in each innings
# Summarize bowling statistics for each team
df <- merged_data
# Filter for first innings only
first_innings_data <- df %>%
  filter(inning == 1)

# Count unique bowlers used per innings
bowlers_used_per_innings <- first_innings_data %>%
  group_by(id, inning,team1) %>%
  summarise(num_bowlers = n_distinct(bowler) , .groups = "drop")   %>%
  arrange(desc(num_bowlers))

# Innings with the most bowlers used
max_bowlers_innings_id <- bowlers_used_per_innings$team1[1]
max_bowlers_id <- bowlers_used_per_innings$id[1]

cat("Innings with maximum bowlers used:\n")

## Innings with maximum bowlers used:

print(head(max_bowlers_innings_id, 1))

## [1] "Chennai Super Kings"

# Bowling statistics for that innings
bowler_stats <- df %>%
  filter(id == max_bowlers_id) %>%
  group_by(bowler) %>%
  summarise(
    Runs_Conceded = sum(total_runs, extra_runs),
    Wickets = sum(!is.na(player_dismissed)), 
    Balls_Bowled = n()
  ) %>%
  mutate(
    Overs = Balls_Bowled / 6,
    Economy = Runs_Conceded / Overs
  )

## `summarise()` ungrouping output (override with `.groups` argument)

cat("\nBowling statistics for the innings see the list:\n")

## 
## Bowling statistics for the innings see the list:

cat("\n")

print(paste('8.Which teams had won the “super-over” battle while batting first? For these matches, find the season and Player of the Match.    '))

## [1] "8.Which teams had won the “super-over” battle while batting first? For these matches, find the season and Player of the Match.    "

## [1] "8.Which teams had won the “super-over” battle while batting first? For these matches, find the season and Player of the Match.    "
df <- merged_data
# Filter for matches with super overs that ended in a tie
super_over_matches <- df %>%
 filter(result == 'tie', is_super_over == 1)

super_over_matches <- super_over_matches %>%
              distinct(season,player_of_match,toss_winner,winner)

# Filter for matches where the toss winner also won the match
super_over_winners <- super_over_matches %>%
  filter(toss_winner == winner)


cat("Teams that won the super over battle while batting first see the list:\n")

## Teams that won the super over battle while batting first see the list:

cat("\n")

cat("\n")

print(paste('9.Considering all seasons, find the highest score in IPL. Show the score distribution using a suitable graph. see the plot for answer  '))

## [1] "9.Considering all seasons, find the highest score in IPL. Show the score distribution using a suitable graph. see the plot for answer  "

## [1] "9.Considering all seasons, find the highest score in IPL. Show the score distribution using a suitable graph. see the plot for answer  "
df_combined <- merged_data


# Calculate total score per match
match_scores <- df %>%
  group_by(id) %>%
  summarise(total_score = sum(total_runs), .groups = "drop")   %>%
  arrange(desc(total_score))

# Display the highest score
cat("Highest score in IPL See the Graph IPL Distribution:\n")

## Highest score in IPL See the Graph IPL Distribution:

cat("\n")

print(paste('10. Which IPL Team is most successful? The most successful team is the one who has one the greatest number of matches, considering all seasons. Also, interpret the result with the help of plots/graphs. see the plot for answer '))

## [1] "10. Which IPL Team is most successful? The most successful team is the one who has one the greatest number of matches, considering all seasons. Also, interpret the result with the help of plots/graphs. see the plot for answer "

## [1] "10. Which IPL Team is most successful? The most successful team is the one who has one the greatest number of matches, considering all seasons. Also, interpret the result with the help of plots/graphs. see the plot for answer "
# Step 1: Group by 'winner' and calculate the total number of wins
df <- matches
  filter <- df %>%
  filter(team1 == winner, team2 == winner)
team_wins <- df_combined %>%
  group_by(winner) %>%
  summarise(total_wins = n(), .groups = 'drop') %>%
  distinct(winner, total_wins)
  
  

# Step 2: Find the most successful team
most_successful_team <- team_wins %>%
  arrange(desc(total_wins))
most_successful_team  <- head(most_successful_team, 1)




# Mensaje inicial
message <- "Which IPL Team is most successful?:\n"

# Función para concatenar cada fila del dataframe con un formato específico
formatted_data <- apply(most_successful_team, 1, function(row) {
  paste("- Winner:", row["winner"]
            ,'Count the number of matches' ,row["total_wins"] )
})

# Unir el mensaje con los datos formateados
output <- paste(message, paste(formatted_data, collapse = "\n"))

# Imprimir el resultado
cat(output)

## Which IPL Team is most successful?:
##  - Winner: Mumbai Indians Count the number of matches 22164

swetatyagi_term-end assignment

R Markdown

Including Plots