library(rvest)
library(stringr)
library(jsonlite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()         masks stats::filter()
## ✖ purrr::flatten()        masks jsonlite::flatten()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag()            masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggsoccer)
library(ggrepel)
#install.packages()
# Defina o diretório onde você deseja procurar arquivos HTML
diretorio <- "~/Downloads/"
setwd("/Users/christiantabilojr/Desktop")
# Liste todos os arquivos com a extensão .html
arquivos_html <- list.files(path = diretorio, pattern = "\\.html$", full.names = TRUE)
# Exibir a lista de arquivos HTML
#print(arquivos_html)
# Define o caminho para o arquivo HTML
html_path <- ("~/Desktop/arsenal vs manchester city.html")
html_content <- read_html(html_path)
# Extrai o conteúdo de texto bruto do HTML
html_text <- html_content %>% html_text()
# Encontra o JSON usando regex
regex_pattern <- '(?<=require\\.config\\.params\\["args"\\].=.)[\\s\\S]*?;'
data_txt <- str_extract(html_text, regex_pattern)
# Faz substituições necessárias para parsear como JSON
data_txt <- gsub("matchId", '"matchId"', data_txt)
data_txt <- gsub("matchCentreData", '"matchCentreData"', data_txt)
data_txt <- gsub("matchCentreEventTypeJson", '"matchCentreEventTypeJson"', data_txt)
data_txt <- gsub("formationIdNameMappings", '"formationIdNameMappings"', data_txt)
# Remove o final }; e substitui por }
data_txt <- gsub("};$", '}', data_txt)
# Converte o texto JSON em um objeto R
data_list <- fromJSON(data_txt)
# Extrair os dados de eventos e jogadores
events <- data_list$matchCentreData$events
players <- data_list$matchCentreData$playerIdNameDictionary
# Converte em data frame
events_df <- as.data.frame(events)
players_df <- as.data.frame(players)
# inserir player df com nomes ao lado da coluna playerid
players_df <- stack(players)
events_df <- merge(events_df, players_df, by.x = "playerId", by.y = "ind", all.x = TRUE)
#mean(events_df$columna, na.rm = TRUE)  # Promedio ignorando NA
#sum(events_df$columna, na.rm = TRUE)   # Suma ignorando NA
# ler o banco de dados para observar.
local = events_df%>%
  filter(teamId=="167") # esto da para elegir los jugadores y las serie con el nombre del jugador sea visitante o local
#names(visitante)
EAD = local$type$displayName

table(EAD)
## EAD
##          Aerial    BallRecovery       BallTouch     BlockedPass       Challenge 
##               9              35              22               5               9 
##       Clearance   CornerAwarded    Dispossessed             End           Error 
##              20               7               9               3               4 
##    FormationSet            Foul            Goal    Interception    KeeperPickup 
##               1              13               1               6               3 
##     MissedShots    OffsideGiven     OffsidePass OffsideProvoked            Pass 
##               3               1               1               3             500 
##           Punch            Save       SavedShot   ShieldBallOpp         Smother 
##               2               5               3               2               1 
##           Start SubstitutionOff  SubstitutionOn          Tackle          TakeOn 
##               2               2               2              13               6
# Asegúrate de extraer correctamente los nombres de los eventos
local <- local %>%
  mutate(displayName = unlist(type$displayName)) # Extraer nombres si es necesario

# Crear tabla resumen por jugador
summary_table <- local %>%
  group_by(values, displayName) %>%  # Agrupar por jugador y tipo de evento
  summarise(event_count = n(), .groups = "drop") %>%  # Contar eventos
  pivot_wider(names_from = displayName, values_from = event_count, values_fill = 0)  # Crear columnas para cada evento

#str(local)
#summary(local)

# Mostrar tabla
head(local)
##   playerId         id eventId minute second teamId    x    y expandedMinute
## 1    73084 2772044625     635     71     38    167 23.4 91.1             73
## 2    73084 2772045683     645     72     58    167 40.2 87.9             74
## 3    73084 2772055459     722     85     49    167 37.0 70.9             87
## 4    73084 2772047439     667     75      1    167 68.4 77.3             77
## 5    73084 2772044305     631     71     17    167  0.0  0.0             73
## 6    73084 2772049585     690     77     59    167 41.2 65.9             79
##   period.value period.displayName type.value type.displayName outcomeType.value
## 1            2         SecondHalf          1             Pass                 0
## 2            2         SecondHalf          1             Pass                 1
## 3            2         SecondHalf         45        Challenge                 0
## 4            2         SecondHalf          1             Pass                 1
## 5            2         SecondHalf         19   SubstitutionOn                 1
## 6            2         SecondHalf          1             Pass                 0
##   outcomeType.displayName
## 1            Unsuccessful
## 2              Successful
## 3            Unsuccessful
## 4              Successful
## 5              Successful
## 6            Unsuccessful
##                                                                                                                           qualifiers
## 1                                      56, 212, 141, 140, 213, Zone, Length, PassEndY, PassEndX, Angle, Back, 17.7, 93.6, 40.2, 0.10
## 2                                       140, 213, 141, 212, 56, PassEndX, Angle, PassEndY, Length, Zone, 33.3, 2.55, 95.1, 8.7, Back
## 3                                                                 56, 285, 233, Zone, Defensive, OppositeRelatedEvent, Back, NA, 572
## 4                                      56, 140, 213, 212, 141, Zone, PassEndX, Angle, Length, PassEndY, Left, 62.3, 1.99, 15.8, 98.5
## 5                              59, 44, 55, 145, JerseyNumber, PlayerPosition, RelatedEventId, FormationSlot, 17, Midfielder, 630, 10
## 6 155, 56, 141, 212, 213, 1, 140, Chipped, Zone, PassEndY, Length, Angle, Longball, PassEndX, NA, Center, 31.9, 51.8, 5.82, NA, 85.3
##                  satisfiedEventsTypes isTouch endX endY relatedEventId
## 1  91, 118, 120, 29, 36, 37, 216, 218    TRUE 40.2 93.6             NA
## 2       91, 117, 30, 35, 37, 215, 218    TRUE 33.3 95.1             NA
## 3                                  55   FALSE   NA   NA             NA
## 4  91, 119, 117, 30, 35, 37, 216, 218    TRUE 62.3 98.5             NA
## 5                                 213   FALSE   NA   NA            630
## 6 91, 120, 124, 128, 36, 38, 217, 218    TRUE 85.3 31.9             NA
##   relatedPlayerId blockedX blockedY goalMouthZ goalMouthY isShot isGoal
## 1              NA       NA       NA         NA         NA     NA     NA
## 2              NA       NA       NA         NA         NA     NA     NA
## 3              NA       NA       NA         NA         NA     NA     NA
## 4              NA       NA       NA         NA         NA     NA     NA
## 5          394749       NA       NA         NA         NA     NA     NA
## 6              NA       NA       NA         NA         NA     NA     NA
##   cardType.value cardType.displayName          values    displayName
## 1             NA                 <NA> Kevin De Bruyne           Pass
## 2             NA                 <NA> Kevin De Bruyne           Pass
## 3             NA                 <NA> Kevin De Bruyne      Challenge
## 4             NA                 <NA> Kevin De Bruyne           Pass
## 5             NA                 <NA> Kevin De Bruyne SubstitutionOn
## 6             NA                 <NA> Kevin De Bruyne           Pass
print(summary_table) 
## # A tibble: 14 × 31
##    values       BallRecovery BallTouch Challenge  Foul  Pass  Save Tackle Aerial
##    <chr>               <int>     <int>     <int> <int> <int> <int>  <int>  <int>
##  1 Bernardo Si…            1         4         2     1    42     1      3      0
##  2 Erling Haal…            0         1         0     3     5     0      0      2
##  3 James McAtee            0         1         1     0     1     0      1      0
##  4 John Stones             2         1         0     0    71     2      0      0
##  5 Josko Gvard…            8         3         1     0    67     0      3      2
##  6 Kevin De Br…            0         0         1     0     4     0      0      0
##  7 Manuel Akan…            3         0         0     1    64     0      0      2
##  8 Mateo Kovac…            6         4         1     3    58     0      4      2
##  9 Matheus Nun…            2         0         2     0    51     0      2      0
## 10 Omar Marmou…            2         4         0     1    26     0      0      1
## 11 Phil Foden              4         3         0     2    41     0      0      0
## 12 Savinho                 7         1         1     2    38     0      0      0
## 13 Stefan Orte…            0         0         0     0    32     2      0      0
## 14 <NA>                    0         0         0     0     0     0      0      0
## # ℹ 22 more variables: BlockedPass <int>, Dispossessed <int>, Goal <int>,
## #   SubstitutionOn <int>, TakeOn <int>, Clearance <int>, OffsidePass <int>,
## #   OffsideProvoked <int>, CornerAwarded <int>, Error <int>,
## #   Interception <int>, SavedShot <int>, MissedShots <int>, OffsideGiven <int>,
## #   ShieldBallOpp <int>, SubstitutionOff <int>, KeeperPickup <int>,
## #   Punch <int>, Smother <int>, End <int>, FormationSet <int>, Start <int>
names(summary_table)
##  [1] "values"          "BallRecovery"    "BallTouch"       "Challenge"      
##  [5] "Foul"            "Pass"            "Save"            "Tackle"         
##  [9] "Aerial"          "BlockedPass"     "Dispossessed"    "Goal"           
## [13] "SubstitutionOn"  "TakeOn"          "Clearance"       "OffsidePass"    
## [17] "OffsideProvoked" "CornerAwarded"   "Error"           "Interception"   
## [21] "SavedShot"       "MissedShots"     "OffsideGiven"    "ShieldBallOpp"  
## [25] "SubstitutionOff" "KeeperPickup"    "Punch"           "Smother"        
## [29] "End"             "FormationSet"    "Start"
library(showtext) # biblioteca de texto 
## Loading required package: sysfonts
## Loading required package: showtextdb
font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote 


# Selecionando os 4 maiores valores de Tackle
dados_top <- summary_table %>% #(filtro de mi summary texto)
  arrange(desc(BallRecovery)) %>% #filtro de la variable que va hablar de ella )
  slice(1:5) #el vector que muestra los 4 primero de esta variable de analisis

# Criando o gráfico no estilo The Athletic
library(ggplot2)
ggplot(dados_top, aes(x = reorder(values, BallRecovery), y = BallRecovery, fill = values)) +
  geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
  geom_text(aes(label = BallRecovery), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
  scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
  labs(
    title = "Top 5 Jogadores com Mais BallRecovery", #para mudar los tipos de graficos 
    x = "Jogadores",
    y = "Número de BallRecovery"
  ) +
  theme_minimal(base_family = "lato") +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
    axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
    axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(color = "#E0E0E0"),
    panel.background = element_rect(fill = "#f8f5ed", colour = NA),  # Define a cor do fundo ao redor do campo
    plot.background = element_rect(fill = "#f8f5ed", colour = NA)   # Define a cor do fundo do gráfico
    
  )

library(showtext) # biblioteca de texto 
font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote 


# Selecionando os 5 maiores valores de Tackle
dados_top <- summary_table %>% #(filtro de mi summary texto)
  arrange(desc(Pass)) %>% #filtro de la variable que va hablar de ella )
  slice(1:5) #el vector que muestra los 5 primero de esta variable de analisis

# Criando o gráfico no estilo The Athletic
library(ggplot2)
ggplot(dados_top, aes(x = reorder(values,Pass), y = Pass, fill = values)) +
  geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
  geom_text(aes(label = Pass), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
  scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
  labs(
    title = "Top 5 Jogadores com Mais Pass", #para mudar los tipos de graficos 
    x = "Jogadores",
    y = "Número de Pass"
  ) +
  theme_minimal(base_family = "lato") +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
    axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
    axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(color = "#E0E0E0"),
    panel.background = element_rect(fill = "#f8f5ed", colour = NA),  # Define a cor do fundo ao redor do campo
    plot.background = element_rect(fill = "#f8f5ed", colour = NA)   # Define a cor do fundo do gráfico
    
  )

fig_2 <-library(showtext) # biblioteca de texto 
font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote 


# Selecionando os 4 maiores valores de Aerial
dados_top <- summary_table %>% #(filtro de mi summary texto)
  arrange(desc(Aerial)) %>% #filtro de la variable que va hablar de ella )
  slice(1:4) #el vector que muestra los 5 primero de esta variable de analisis

# Criando o gráfico no estilo The Athletic
fig_1 <-library(ggplot2)
ggplot(dados_top, aes(x = reorder(values,Aerial), y = Aerial, fill = values)) +
  geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
  geom_text(aes(label = Aerial), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
  scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
  labs(
    title = "Top 4 Jogadores com Mais Aerial", #para mudar los tipos de graficos 
    x = "Jogadores",
    y = "Número de Aerial"
  ) +
  theme_minimal(base_family = "lato") +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
    axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
    axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(color = "#E0E0E0"),
    panel.background = element_rect(fill = "#f8f5ed", colour = NA),  # Define a cor do fundo ao redor do campo
    plot.background = element_rect(fill = "#f8f5ed", colour = NA)   # Define a cor do fundo do gráfico
    
  )

font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote 


# Selecionando os 4 maiores valores de Tackle
dados_top <- summary_table %>% #(filtro de mi summary texto)
  arrange(desc(Aerial)) %>% #filtro de la variable que va hablar de ella )
  slice(1:4) #el vector que muestra los 5 primero de esta variable de analisis

# Criando o gráfico no estilo The Athletic
fig_3 <-library(ggplot2)
ggplot(dados_top, aes(x = reorder(values,Tackle), y = Tackle, fill = values)) +
  geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
  geom_text(aes(label = Aerial), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
  scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
  labs(
    title = "Top 4 Jogadores com Mais Aerial", #para mudar los tipos de graficos 
    x = "Jogadores",
    y = "Número de Tackle"
  ) +
  theme_minimal(base_family = "lato") +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
    axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
    axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(color = "#E0E0E0"),
    panel.background = element_rect(fill = "#f8f5ed", colour = NA),  # Define a cor do fundo ao redor do campo
    plot.background = element_rect(fill = "#f8f5ed", colour = NA)   # Define a cor do fundo do gráfico
    
  )

library(ggplot2)
library(ggrepel)
#install.packages("patchwork")
library(patchwork)

# Configuração de cores e tema
colors <- list(
  background = "#f5f5f5",
  text = "#222222",
  title = "#333333",
  palette = c("Time A" = "#f04b21", "Time B" = "grey50")
)

# Gráfico para a variável Goal
p1 <- ggplot(summary_table, aes(x = values, y = Goal, fill = Goal)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
  labs(title = "Média de Gols por Jogador", x = "Jogador", y = "Média de Gols") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 10, face = "bold", color = colors$title),
    plot.background = element_rect(fill = colors$background),
    panel.background = element_rect(fill = colors$background),
    axis.text = element_text(color = colors$text),
    axis.title = element_text(color = colors$text)
  )

# Gráfico para a variável SubstitutionOn
p2 <- ggplot(summary_table, aes(x = values, y = SubstitutionOn, fill = SubstitutionOn)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
  labs(title = "Substituições por Jogador", x = "Jogador", y = "Substituições") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 10, face = "bold", color = colors$title),
    plot.background = element_rect(fill = colors$background),
    panel.background = element_rect(fill = colors$background),
    axis.text = element_text(color = colors$text),
    axis.title = element_text(color = colors$text)
  )

# Gráfico de dispersão para MissedShots e BlockedPass
p3 <- ggplot(summary_table, aes(x = MissedShots, y = BlockedPass, color = values, size = MissedShots)) +
  geom_point(alpha = 0.7) +
  geom_text_repel(aes(label = values), size = 4, color = "grey30") +
  scale_color_manual(values = colors$palette) +
  labs(
    title = "Distribuição de Chutes Perdidos e Passes Bloqueados",
    x = "Chutes Perdidos",
    y = "Passes Bloqueados"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", color = colors$title),
    panel.background = element_rect(fill = colors$background),
    axis.title = element_text(color = colors$text),
    axis.text = element_text(color = colors$text)
  )

# Combinando todos os gráficos
fig_13 <- (p1 | p2) / p3 + plot_annotation(
  title = "Análise de Futebol - Desempenho dos Jogadores",
  theme = theme(
    plot.title = element_text(size = 14, face = "bold", color = colors$title)
  )
)

fig_13
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Configuração de cores e tema
colors <- list(
  background = "#f5f5f5",
  text = "#222222",
  title = "#333333",
  palette = c("Time A" = "#f04b21", "Time B" = "grey50")
)

# Gráfico para a variável Interception
p1 <- ggplot(summary_table, aes(x = values, y = Interception, fill = Interception)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
  labs(title = "Interception", x = "Jogador", y = "Interception") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 10, face = "bold", color = colors$title),
    plot.background = element_rect(fill = colors$background),
    panel.background = element_rect(fill = colors$background),
    axis.text = element_text(color = colors$text),
    axis.title = element_text(color = colors$text)
  )

# Gráfico para a variável Pass
p2 <- ggplot(summary_table, aes(x = values, y = Pass, fill = Pass)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
  labs(title = "Pass", x = "Jogador", y = "Pass") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 10, face = "bold", color = colors$title),
    plot.background = element_rect(fill = colors$background),
    panel.background = element_rect(fill = colors$background),
    axis.text = element_text(color = colors$text),
    axis.title = element_text(color = colors$text)
  )

# Gráfico de dispersão para Aerial e BlockedPass
p3 <- ggplot(summary_table, aes(x = Aerial, y = BlockedPass, color = values, size = MissedShots)) +
  geom_point(alpha = 0.7) +
  geom_text_repel(aes(label = values), size = 4, color = "grey30") +
  scale_color_manual(values = colors$palette) +
  labs(
    title = "Distribuição de Aerial e Passes Bloqueados",
    x = "Aerial",
    y = "Passes Bloqueados"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", color = colors$title),
    panel.background = element_rect(fill = colors$background),
    axis.title = element_text(color = colors$text),
    axis.text = element_text(color = colors$text)
  )

# Combinando todos os gráficos
fig_13 <- (p1 | p2) / p3 + plot_annotation(
  title = "Análise de Futebol - Desempenho dos Jogadores",
  theme = theme(
    plot.title = element_text(size = 14, face = "bold", color = colors$title)
  )
)

fig_13
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps