##install.packages("dplyr")
##install.packages("readr")
##Name: nombre del jugador
##Club: club en el que juega
##Nationality: nacionalidad del jugador
##Position: posicion en el campo del jugador
##Age: edad del jugador
##Matches: partidos jugados
##Starts: partifos jugados desde el incio
##Mins: minutos jugados
##Goals: goles realizador por el jugador
##Assists: asistencia realizadas por el jugador
##Passes_Attempted: pases realizados
##Perc_Passes_Completed: pases completados correctamente
##Penalty_Goals: goles de penales
##Penalty_Attempted: penales lanzados
##xG
##xA
##Yellow_Cards: tarjetas amarillas
##Red_Cards: tarjetas rojas


##variables cuantitativas: Age, Matches, Starts, Mins, Goals, Assists, Passes_Attempted, Perc_Passes_Completed, Penalty_Goals, Penalty_Attempted, xG, xA, Yellow_Cards, Red_Cards
##variables cualitativas: Name, Club, Nationality, Position
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
library(plotly)
## 
## Adjuntando el paquete: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Carga de archivo EPL_20_21.csv
statsPremier <- read.csv("EPL_20_21.csv")
head(statsPremier)
##                Name    Club Nationality Position Age Matches Starts Mins Goals
## 1       Mason Mount Chelsea         ENG    MF,FW  21      36     32 2890     6
## 2     Edouard Mendy Chelsea         SEN       GK  28      31     31 2745     0
## 3       Timo Werner Chelsea         GER       FW  24      35     29 2602     6
## 4      Ben Chilwell Chelsea         ENG       DF  23      27     27 2286     3
## 5       Reece James Chelsea         ENG       DF  20      32     25 2373     1
## 6 César Azpilicueta Chelsea         ESP       DF  30      26     24 2188     1
##   Assists Passes_Attempted Perc_Passes_Completed Penalty_Goals
## 1       5             1881                  82.3             1
## 2       0             1007                  84.6             0
## 3       8              826                  77.2             0
## 4       5             1806                  78.6             0
## 5       2             1987                  85.0             0
## 6       2             2015                  87.5             0
##   Penalty_Attempted   xG   xA Yellow_Cards Red_Cards
## 1                 1 0.21 0.24            2         0
## 2                 0 0.00 0.00            2         0
## 3                 0 0.41 0.21            2         0
## 4                 0 0.10 0.11            3         0
## 5                 0 0.06 0.12            3         0
## 6                 0 0.03 0.11            5         1
#se realiza el factor sobre la variable Position
graVar <- levels(factor(statsPremier$Position))
print(graVar)
##  [1] "DF"    "DF,FW" "DF,MF" "FW"    "FW,DF" "FW,MF" "GK"    "MF"    "MF,DF"
## [10] "MF,FW"
##se crea tabla statsPremierGK para identificar que jugadores son arqueros, donde
##su posicion dea GK pondra si para el resto pondra no
statsPremierGK <- statsPremier %>%
  mutate(Position = ifelse(Position == "GK", "SI", "NO"))
head(statsPremierGK)
##                Name    Club Nationality Position Age Matches Starts Mins Goals
## 1       Mason Mount Chelsea         ENG       NO  21      36     32 2890     6
## 2     Edouard Mendy Chelsea         SEN       SI  28      31     31 2745     0
## 3       Timo Werner Chelsea         GER       NO  24      35     29 2602     6
## 4      Ben Chilwell Chelsea         ENG       NO  23      27     27 2286     3
## 5       Reece James Chelsea         ENG       NO  20      32     25 2373     1
## 6 César Azpilicueta Chelsea         ESP       NO  30      26     24 2188     1
##   Assists Passes_Attempted Perc_Passes_Completed Penalty_Goals
## 1       5             1881                  82.3             1
## 2       0             1007                  84.6             0
## 3       8              826                  77.2             0
## 4       5             1806                  78.6             0
## 5       2             1987                  85.0             0
## 6       2             2015                  87.5             0
##   Penalty_Attempted   xG   xA Yellow_Cards Red_Cards
## 1                 1 0.21 0.24            2         0
## 2                 0 0.00 0.00            2         0
## 3                 0 0.41 0.21            2         0
## 4                 0 0.10 0.11            3         0
## 5                 0 0.06 0.12            3         0
## 6                 0 0.03 0.11            5         1
tabla_GK <- table(statsPremierGK$Position)

#Crear grafica de barras 
colores <- c("red", "orange")
bp_GK <- barplot(tabla_GK,
                 main="Arqueros en los equipos de la premier league",
                 ylab="Frecuencia",
                 xlab="arqueros",
                 ylim = c(0, max(tabla_GK) * 1.2),
                 col = colores) 

text(bp_GK, tabla_GK, labels = tabla_GK, pos = 3, cex = 0.8, col = "black")

#grafico circular

group_GK <- statsPremierGK %>% group_by(Position) %>% 
summarise(cantidad = n())
totalce <- sum(group_GK$cantidad)

stats1 <- (group_GK$cantidad/totalce) * 100
stats1
## [1] 92.105263  7.894737
pie(tabla_GK, labels = paste0(group_GK$Position, "\n", round(stats1,1), "%"),col = rocket(4), main = "Diagrama circular", border = rocket(4))
legend("bottomright",legend = names(tabla_GK),cex = 0.75,fill=rocket(4))

# Top 10 de jugadores con mas goles 

topGoleadores <- statsPremier %>%
  select(Name, Goals) %>%
  top_n(n =10, wt = Goals) %>%
  arrange(desc(Goals))

topGoleadores$Goals <- as.numeric(gsub(",", "", topGoleadores$Goals))

topGoleadores
##                     Name Goals
## 1             Harry Kane    23
## 2          Mohamed Salah    22
## 3        Bruno Fernandes    18
## 4          Son Heung-min    17
## 5        Patrick Bamford    17
## 6  Dominic Calvert-Lewin    16
## 7            Jamie Vardy    15
## 8          Ollie Watkins    14
## 9         İlkay Gündoğan    13
## 10   Alexandre Lacazette    13
ggplot(topGoleadores, aes(x = reorder(Name, Goals), y = Goals)) +
  geom_bar(stat = "identity", fill = "#4682B4") +
  geom_text(aes(label = Goals), vjust = -0.3, size = 3) +  # Agregar texto a las barras
  coord_flip() +
  labs(title = "Top 10 goleadores",
       x = "jugador",
       y = "goles") +
  scale_y_continuous(labels = scales::comma) +
  theme_minimal()

###Histograma para ver la distribución de la edad de los jugadores.


edadJugadores <- ggplot(statsPremier, aes(x = Age, fill = factor(Age))) +
  geom_histogram(binwidth = 1) +
  labs(title = "Distribución de Edades de los Jugadores", 
       fill = "Edad", 
       y = "Cantidad de Jugadores") +
  scale_fill_brewer(palette = "Set3") +
  scale_x_continuous(breaks = seq(0, max(statsPremier$Age), by = 1))  

edadJugadoresGrapHisto <- ggplotly(edadJugadores)
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors
edadJugadoresGrapHisto
### pases completados por posicion



pasesComPos <- ggplot(statsPremier, aes(x = Position, y = Perc_Passes_Completed, fill = Position)) +
  geom_bar(stat = "summary", fun = "mean") +
  labs(title = "Porcentaje de Pases Completados por Posición",
       x = "Posición",
       y = "Porcentaje de Pases Completados (%)") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2") +  
  scale_y_continuous(limits = c(0, 100), expand = c(0, 0)) +  
  geom_text(stat = "summary", fun = "mean", aes(label = round(..y.., 2)), vjust = -0.5)  #

grapasesComPos <- ggplotly(pasesComPos)
## Warning: The dot-dot notation (`..y..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(y)` instead.
## ℹ The deprecated feature was likely used in the base package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 2 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
grapasesComPos
##goles por minutos jugados de los delanteros


fw_stats <- statsPremier %>% 
  filter(Position %in% c("FW", "MF"))

golesMinutps <- ggplot(fw_stats, aes(x = Mins, y = Goals, color = Position, text = Name)) +
  geom_point(size = 3, alpha = 0.7) +  # Ajustar el tamaño y la transparencia de los puntos
  labs(title = "Relación entre Minutos Jugados y Goles (FW y MF)",
       x = "Minutos Jugados",
       y = "Goles",
       color = "Posición") +  
  theme_minimal() +
  scale_color_brewer(palette = "Set1")  

gragolesMinutps <- ggplotly(golesMinutps, tooltip = "text")

gragolesMinutps
##distribucion de goles por posicion

golesposi <- ggplot(statsPremier, aes(x = Position, y = Goals, fill = Position)) +
  geom_boxplot() +
  labs(title = "Distribución de Goles por Posición",
       x = "Posición",
       y = "Goles") +
  theme_minimal() +
  scale_fill_manual(values = c("FW" = "blue", "MF" = "green", "DF" = "red", "GK" = "purple"))   

gragolesposi <- ggplotly(golesposi)

gragolesposi