big_6_data<-read.csv("big_6_data.csv")
matchesproject<-read.csv("matchesproject.csv")
possession_long_top6<-read.csv("possession_long_top6.csv")
team_category_avg<-read.csv("team_category_avg.csv")
top6_plot_data<-read.csv("top6_plot_data.csv")
others_plot_data<-read.csv("others_plot_data.csv")
data15<-read.csv("data15.csv")
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RColorBrewer)
library(ggpattern)
library(extrafont)
## Registering fonts with R
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(gganimate)
library(shiny)
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
## 
## Attaching package: 'showtextdb'
## The following object is masked from 'package:extrafont':
## 
##     font_install
library(tidyr)

# Add the font
font_add_google("Lato", "lato")

# Enable showtext
showtext_auto()
#Filter out the Data for the 2021-2022 Season (13 August 2021 – 22 May 2022)
# Assuming your dataset is named 'premier_league_data'

# Convert the 'date' column to a Date object
matchesproject$date <- as.Date(matchesproject$date, format = "%Y-%m-%d")

# Define the date range for the 2021-2022 season
start_date <- as.Date("2021-08-13")
end_date <- as.Date("2022-05-22")

# Filter the data for the 2021-2022 season
prem2021_2022_seas <- matchesproject %>%
  filter(date >= start_date & date <= end_date)

#Football, the the most famous sport around the world. The greatest League around, The Premier League. Comprised of the best players around the World, 38 game packed games of high intensity, and the best fans around. Out of the last 20 years in the Premier League Season there has only been one team that has won the Premier League outside of the England Big 6 (Manchester United, Manchester City, Liverpool, Arsenal, Chelsea, Tottenham) and that is no exception when it comes to the Premier League 2021-2022 season. The England Big 6 are the teams with the most culture, history, and usually the largest fan base. But what gives these teams the edge? Winning the Premier League is America’s equivalent of winning the superbowl. Each players’ name and team they represent goes down in history and sets a precedent for the season that follows. Which leaves you wondering, “What goes into winning the Premier League?” Before we attempt to unravel this question let’s first start with what the sport is known for, goals and lots of them.

# Required for adding patterns

# Load the dataset
data <- read.csv("Football Players Stats (Premier League 2021-2022).csv", encoding = "latin1")

# Remove the 'Nation' column
data <- data %>% select(-Nation)

# Normalize Team names (trim whitespace, consistent case)
data$Team <- trimws(data$Team)

# Define the Top 6 teams
top_6_teams <- c("Manchester City", "Liverpool", "Chelsea",
                 "Tottenham Hotspur", "Arsenal", "Manchester United")

# Filter data to include only Top 6 Teams
data <- data %>% filter(Team %in% top_6_teams)

# Calculate Offensive Efficiency
calculate_offensive_efficiency <- function(data) {
  weights <- c(Gls = 0.5, Ast = 0.3, xG = 0.2)  # Weights for metrics
  
  efficiency <- data %>%
    group_by(Team) %>%
    summarise(
      OffensiveEfficiency = sum(
        (Gls * weights["Gls"]) + (Ast * weights["Ast"]) + (xG * weights["xG"]),
        na.rm = TRUE
      )
    )
  
  return(efficiency)
}

offensive_efficiency <- calculate_offensive_efficiency(data)

# Normalize Offensive Efficiency
offensive_efficiency <- offensive_efficiency %>%
  mutate(OffensiveEfficiency = 100 * OffensiveEfficiency / max(OffensiveEfficiency, na.rm = TRUE))

# Add Offensive Efficiency to the main dataset
data <- data %>% left_join(offensive_efficiency, by = "Team")

# Enhanced Plot with Patterns and Off-White Background
p <- ggplot(offensive_efficiency, aes(x = reorder(Team, OffensiveEfficiency), y = OffensiveEfficiency, fill = Team)) +
  geom_bar_pattern(
    stat = "identity",
    pattern = "stripe",
    pattern_fill = "black",
    pattern_angle = 45,
    pattern_density = 0.1,
    pattern_spacing = 0.05,
    color = "black",
    width = 0.7,
    alpha = 0.8
  ) +
  scale_fill_manual(values = c(
    "Manchester City" = "#1f78b4", 
    "Liverpool" = "#e31a1c", 
    "Chelsea" = "#33a02c", 
    "Tottenham Hotspur" = "#ff7f00", 
    "Arsenal" = "#6a3d9a", 
    "Manchester United" = "#b15928"
  )) +  # Custom team colors
  labs(
    title = "Offensive Efficiency of Top 6 Teams",
    subtitle = "2021-2022 Premier League Season",
    x = "Team",
    y = "Offensive Efficiency"
  ) +
  geom_text(aes(label = round(OffensiveEfficiency, 1)), 
            vjust = -0.5, 
            size = 4, 
            fontface = "bold") +  # Add exact values on top of bars
  theme_minimal(base_size = 14) +
  theme(
    plot.background = element_rect(fill = "#F7F7F7", color = NA), # Off-white background
    panel.background = element_rect(fill = "#F7F7F7", color = NA),
    panel.grid.major = element_line(color = "#DADADA"),
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold", size = 18, hjust = 0.5, margin = margin(b = 10)),
    plot.subtitle = element_text(size = 14, hjust = 0.5, margin = margin(b = 15)),
    axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 12),
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    legend.position = "none"  # Remove redundant legend
  )

# Print the plot
print(p)

# Assume `matchesproject` is the dataset provided
matchesproject <- prem2021_2022_seas

# Filter for Big Six teams including Tottenham
big6_teams <- c("Arsenal", "Chelsea", "Liverpool", "Manchester City", "Manchester United", "Tottenham Hotspur")
big6_tot <- matchesproject %>%
  filter(team %in% big6_teams) %>%
  select(team, venue, xg)

# Calculate average xG for home and away
xg_summary <- big6_tot %>%
  group_by(team, venue) %>%
  summarise(average_xg = mean(xg, na.rm = TRUE)) %>%
  spread(key = venue, value = average_xg) %>%
  mutate(Away = abs(Away))  # Ensure away values are positive
## `summarise()` has grouped output by 'team'. You can override using the
## `.groups` argument.
# Reshape the data for plotting
xg_plot_data <- xg_summary %>%
  gather(key = "Venue", value = "xG", -team) %>%
  mutate(Venue = factor(Venue, levels = c("Away", "Home")))

# Sort teams by overall xG (sum of home and away)
xg_plot_data$team <- factor(xg_plot_data$team, levels = xg_summary %>%
  mutate(home_advantage = Home - Away) %>%
  arrange(home_advantage) %>%
  pull(team))

# Set the maximum xG value for scaling
max_xg <- ceiling(max(xg_plot_data$xG, na.rm = TRUE))



# Filter for Big Six teams including Tottenham
big6_teams <- c("Arsenal", "Chelsea", "Liverpool", "Manchester City", "Manchester United", "Tottenham Hotspur")
big6_tot <- matchesproject %>%
  filter(team %in% big6_teams) %>%
  select(team, venue, xg)


# Custom colors for color-blind accessibility
home_color <- "#1E88E5"  # Blue
away_color <- "#F57C00"  # Orange


# Update the ggplot code
ggplot(xg_plot_data, aes(x = xG, y = team, fill = Venue)) +
  geom_bar_pattern(
    stat = "identity",
    position = "identity",
    width = 0.6,
    aes(
      x = ifelse(Venue == "Away", -xG, xG),
      pattern_angle = ifelse(Venue == "Away", 45, -45)  # Angle pattern for Home and Away
    ),
    pattern = "stripe",
    pattern_fill = "black",
    pattern_density = 0.1,
    pattern_spacing = 0.02,
    show.legend = c(fill = TRUE, pattern_angle = FALSE)  # Keep "Match Venue" legend, suppress redundant legend
  ) +
  scale_x_continuous(
    limits = c(-max_xg, max_xg),  # Symmetric x-axis limits
    breaks = seq(-max_xg, max_xg, by = 1),  # Breaks at intervals of 1
    labels = function(x) abs(x)  # Display as positive
  ) +
  scale_fill_manual(
    values = c("Home" = home_color, "Away" = away_color),
    labels = c("Away Matches", "Home Matches"),
    name = "Match Venue"
  ) +
  labs(
    title = "Expected Goals (xG) at Home vs. Away",
    subtitle = "Analysis of Big Six Teams in the Premier League",
    x = "Average Expected Goals (xG)",
    y = NULL  # Removes redundant "Team" label
  ) +
  geom_text(
    aes(label = round(xG, 1), x = ifelse(Venue == "Away", -xG, xG)),
    position = position_stack(vjust = 0.5),
    size = 3.5,
    color = "white"
  ) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", linewidth = 0.8) +
  theme_classic() +
  theme(
    plot.title = element_text(size = 18, face = "bold", family = "lato", hjust = 0.5),
    plot.subtitle = element_text(size = 14, family = "lato", hjust = 0.5),
    axis.title.x = element_text(size = 12, face = "bold", family = "lato"),
    axis.text.x = element_text(size = 10, family = "lato"),
    axis.text.y = element_text(size = 10, family = "lato"),
    legend.title = element_text(size = 12, face = "bold", family = "lato"),
    legend.text = element_text(size = 10, family = "lato"),
    panel.grid.major.x = element_line(color = "grey80", linetype = "dotted"),
    panel.grid.minor.x = element_blank(),
    panel.background = element_rect(fill = "#f9f9f9", color = NA),  # Updated off-white background
    plot.background = element_rect(fill = "#f9f9f9", color = NA)  # Updated off-white background
  )

#So let’s see if there is a difference between home and away. Scoring away and home is an effective skill for a team to possess. Performing well both at home and away is critical because it reflects a team’s consistency, adaptability, and resilience. While home matches offer the advantage of familiarity and crowd support, scoring away demonstrates a team’s ability to overcome adversity, such as hostile environments and travel fatigue. Consistent scoring across venues is often the hallmark of title-winning teams. Liverpool and Manchester City showcase why they are perennial title challengers—not only do they dominate possession, but they consistently translate it into high-quality chances both at home and away. Arsenal significantly struggle to put goals away at away venues leading to poorer results and performance. Chelsea, Manchester City and Tottenham all have a small decline in expected goals which leads to a small dip in performance at away ends.

plot2 <- plot_ly(
  data = big_6_data,
  x = ~gf,
  y = ~xg,
  type = 'scatter',
  mode = 'markers',
  color = ~team,
  colors = brewer.pal(6, "Set2"),  # Color-blind friendly palette
  symbol = ~team,                  # Different shapes for teams
  symbols = c("circle", "square", "diamond", "triangle-up", "triangle-down", "cross"),
  marker = list(size = 12, opacity = 0.9),
  text = ~paste(
    "Team:", team, "<br>",
    "Date:", date, "<br>",
    "Opponent:", opponent, "<br>",
    "Venue:", venue, "<br>",
    "xG:", xg, "<br>",
    "Goals Scored:", gf
  ),
  hoverinfo = 'text'
) %>%
  layout(
    title = "Goals Scored vs. Expected Goals (Big 6: 2021-2022 Premier League Season)",
    xaxis = list(title = "Goals Scored"),
    yaxis = list(title = "Expected Goals"),
    hovermode = "closest",
    legend = list(title = list(text = "Team")),
    plot_bgcolor = 'rgb(245, 245, 245)',  # Off-white background for the plot area
    paper_bgcolor = 'rgb(245, 245, 245)'  # Off-white background for the entire plot
  )

# Show the plot
plot2