library(rvest)
library(stringr)
library(jsonlite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggsoccer)
library(ggrepel)
#install.packages()
# Defina o diretório onde você deseja procurar arquivos HTML
diretorio <- "~/Downloads/"
setwd("/Users/christiantabilojr/Desktop")
# Liste todos os arquivos com a extensão .html
arquivos_html <- list.files(path = diretorio, pattern = "\\.html$", full.names = TRUE)
# Exibir a lista de arquivos HTML
#print(arquivos_html)
# Define o caminho para o arquivo HTML
html_path <- ("~/Desktop/arsenal vs manchester city.html")
html_content <- read_html(html_path)
# Extrai o conteúdo de texto bruto do HTML
html_text <- html_content %>% html_text()
# Encontra o JSON usando regex
regex_pattern <- '(?<=require\\.config\\.params\\["args"\\].=.)[\\s\\S]*?;'
data_txt <- str_extract(html_text, regex_pattern)
# Faz substituições necessárias para parsear como JSON
data_txt <- gsub("matchId", '"matchId"', data_txt)
data_txt <- gsub("matchCentreData", '"matchCentreData"', data_txt)
data_txt <- gsub("matchCentreEventTypeJson", '"matchCentreEventTypeJson"', data_txt)
data_txt <- gsub("formationIdNameMappings", '"formationIdNameMappings"', data_txt)
# Remove o final }; e substitui por }
data_txt <- gsub("};$", '}', data_txt)
# Converte o texto JSON em um objeto R
data_list <- fromJSON(data_txt)
# Extrair os dados de eventos e jogadores
events <- data_list$matchCentreData$events
players <- data_list$matchCentreData$playerIdNameDictionary
# Converte em data frame
events_df <- as.data.frame(events)
players_df <- as.data.frame(players)
# inserir player df com nomes ao lado da coluna playerid
players_df <- stack(players)
events_df <- merge(events_df, players_df, by.x = "playerId", by.y = "ind", all.x = TRUE)
#mean(events_df$columna, na.rm = TRUE) # Promedio ignorando NA
#sum(events_df$columna, na.rm = TRUE) # Suma ignorando NA
# ler o banco de dados para observar.
local = events_df%>%
filter(teamId=="167") # esto da para elegir los jugadores y las serie con el nombre del jugador sea visitante o local
#names(visitante)
EAD = local$type$displayName
table(EAD)
## EAD
## Aerial BallRecovery BallTouch BlockedPass Challenge
## 9 35 22 5 9
## Clearance CornerAwarded Dispossessed End Error
## 20 7 9 3 4
## FormationSet Foul Goal Interception KeeperPickup
## 1 13 1 6 3
## MissedShots OffsideGiven OffsidePass OffsideProvoked Pass
## 3 1 1 3 500
## Punch Save SavedShot ShieldBallOpp Smother
## 2 5 3 2 1
## Start SubstitutionOff SubstitutionOn Tackle TakeOn
## 2 2 2 13 6
# Asegúrate de extraer correctamente los nombres de los eventos
local <- local %>%
mutate(displayName = unlist(type$displayName)) # Extraer nombres si es necesario
# Crear tabla resumen por jugador
summary_table <- local %>%
group_by(values, displayName) %>% # Agrupar por jugador y tipo de evento
summarise(event_count = n(), .groups = "drop") %>% # Contar eventos
pivot_wider(names_from = displayName, values_from = event_count, values_fill = 0) # Crear columnas para cada evento
#str(local)
#summary(local)
# Mostrar tabla
head(local)
## playerId id eventId minute second teamId x y expandedMinute
## 1 73084 2772044625 635 71 38 167 23.4 91.1 73
## 2 73084 2772045683 645 72 58 167 40.2 87.9 74
## 3 73084 2772055459 722 85 49 167 37.0 70.9 87
## 4 73084 2772047439 667 75 1 167 68.4 77.3 77
## 5 73084 2772044305 631 71 17 167 0.0 0.0 73
## 6 73084 2772049585 690 77 59 167 41.2 65.9 79
## period.value period.displayName type.value type.displayName outcomeType.value
## 1 2 SecondHalf 1 Pass 0
## 2 2 SecondHalf 1 Pass 1
## 3 2 SecondHalf 45 Challenge 0
## 4 2 SecondHalf 1 Pass 1
## 5 2 SecondHalf 19 SubstitutionOn 1
## 6 2 SecondHalf 1 Pass 0
## outcomeType.displayName
## 1 Unsuccessful
## 2 Successful
## 3 Unsuccessful
## 4 Successful
## 5 Successful
## 6 Unsuccessful
## qualifiers
## 1 56, 212, 141, 140, 213, Zone, Length, PassEndY, PassEndX, Angle, Back, 17.7, 93.6, 40.2, 0.10
## 2 140, 213, 141, 212, 56, PassEndX, Angle, PassEndY, Length, Zone, 33.3, 2.55, 95.1, 8.7, Back
## 3 56, 285, 233, Zone, Defensive, OppositeRelatedEvent, Back, NA, 572
## 4 56, 140, 213, 212, 141, Zone, PassEndX, Angle, Length, PassEndY, Left, 62.3, 1.99, 15.8, 98.5
## 5 59, 44, 55, 145, JerseyNumber, PlayerPosition, RelatedEventId, FormationSlot, 17, Midfielder, 630, 10
## 6 155, 56, 141, 212, 213, 1, 140, Chipped, Zone, PassEndY, Length, Angle, Longball, PassEndX, NA, Center, 31.9, 51.8, 5.82, NA, 85.3
## satisfiedEventsTypes isTouch endX endY relatedEventId
## 1 91, 118, 120, 29, 36, 37, 216, 218 TRUE 40.2 93.6 NA
## 2 91, 117, 30, 35, 37, 215, 218 TRUE 33.3 95.1 NA
## 3 55 FALSE NA NA NA
## 4 91, 119, 117, 30, 35, 37, 216, 218 TRUE 62.3 98.5 NA
## 5 213 FALSE NA NA 630
## 6 91, 120, 124, 128, 36, 38, 217, 218 TRUE 85.3 31.9 NA
## relatedPlayerId blockedX blockedY goalMouthZ goalMouthY isShot isGoal
## 1 NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 394749 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## cardType.value cardType.displayName values displayName
## 1 NA <NA> Kevin De Bruyne Pass
## 2 NA <NA> Kevin De Bruyne Pass
## 3 NA <NA> Kevin De Bruyne Challenge
## 4 NA <NA> Kevin De Bruyne Pass
## 5 NA <NA> Kevin De Bruyne SubstitutionOn
## 6 NA <NA> Kevin De Bruyne Pass
print(summary_table)
## # A tibble: 14 × 31
## values BallRecovery BallTouch Challenge Foul Pass Save Tackle Aerial
## <chr> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Bernardo Si… 1 4 2 1 42 1 3 0
## 2 Erling Haal… 0 1 0 3 5 0 0 2
## 3 James McAtee 0 1 1 0 1 0 1 0
## 4 John Stones 2 1 0 0 71 2 0 0
## 5 Josko Gvard… 8 3 1 0 67 0 3 2
## 6 Kevin De Br… 0 0 1 0 4 0 0 0
## 7 Manuel Akan… 3 0 0 1 64 0 0 2
## 8 Mateo Kovac… 6 4 1 3 58 0 4 2
## 9 Matheus Nun… 2 0 2 0 51 0 2 0
## 10 Omar Marmou… 2 4 0 1 26 0 0 1
## 11 Phil Foden 4 3 0 2 41 0 0 0
## 12 Savinho 7 1 1 2 38 0 0 0
## 13 Stefan Orte… 0 0 0 0 32 2 0 0
## 14 <NA> 0 0 0 0 0 0 0 0
## # ℹ 22 more variables: BlockedPass <int>, Dispossessed <int>, Goal <int>,
## # SubstitutionOn <int>, TakeOn <int>, Clearance <int>, OffsidePass <int>,
## # OffsideProvoked <int>, CornerAwarded <int>, Error <int>,
## # Interception <int>, SavedShot <int>, MissedShots <int>, OffsideGiven <int>,
## # ShieldBallOpp <int>, SubstitutionOff <int>, KeeperPickup <int>,
## # Punch <int>, Smother <int>, End <int>, FormationSet <int>, Start <int>
names(summary_table)
## [1] "values" "BallRecovery" "BallTouch" "Challenge"
## [5] "Foul" "Pass" "Save" "Tackle"
## [9] "Aerial" "BlockedPass" "Dispossessed" "Goal"
## [13] "SubstitutionOn" "TakeOn" "Clearance" "OffsidePass"
## [17] "OffsideProvoked" "CornerAwarded" "Error" "Interception"
## [21] "SavedShot" "MissedShots" "OffsideGiven" "ShieldBallOpp"
## [25] "SubstitutionOff" "KeeperPickup" "Punch" "Smother"
## [29] "End" "FormationSet" "Start"
library(showtext) # biblioteca de texto
## Loading required package: sysfonts
## Loading required package: showtextdb
font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote
# Selecionando os 4 maiores valores de Tackle
dados_top <- summary_table %>% #(filtro de mi summary texto)
arrange(desc(BallRecovery)) %>% #filtro de la variable que va hablar de ella )
slice(1:5) #el vector que muestra los 4 primero de esta variable de analisis
# Criando o gráfico no estilo The Athletic
library(ggplot2)
ggplot(dados_top, aes(x = reorder(values, BallRecovery), y = BallRecovery, fill = values)) +
geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
geom_text(aes(label = BallRecovery), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
labs(
title = "Top 5 Jogadores com Mais BallRecovery", #para mudar los tipos de graficos
x = "Jogadores",
y = "Número de BallRecovery"
) +
theme_minimal(base_family = "lato") +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "#E0E0E0"),
panel.background = element_rect(fill = "#f8f5ed", colour = NA), # Define a cor do fundo ao redor do campo
plot.background = element_rect(fill = "#f8f5ed", colour = NA) # Define a cor do fundo do gráfico
)

library(showtext) # biblioteca de texto
font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote
# Selecionando os 5 maiores valores de Tackle
dados_top <- summary_table %>% #(filtro de mi summary texto)
arrange(desc(Pass)) %>% #filtro de la variable que va hablar de ella )
slice(1:5) #el vector que muestra los 5 primero de esta variable de analisis
# Criando o gráfico no estilo The Athletic
library(ggplot2)
ggplot(dados_top, aes(x = reorder(values,Pass), y = Pass, fill = values)) +
geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
geom_text(aes(label = Pass), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
labs(
title = "Top 5 Jogadores com Mais Pass", #para mudar los tipos de graficos
x = "Jogadores",
y = "Número de Pass"
) +
theme_minimal(base_family = "lato") +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "#E0E0E0"),
panel.background = element_rect(fill = "#f8f5ed", colour = NA), # Define a cor do fundo ao redor do campo
plot.background = element_rect(fill = "#f8f5ed", colour = NA) # Define a cor do fundo do gráfico
)

fig_2 <-library(showtext) # biblioteca de texto
font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote
# Selecionando os 4 maiores valores de Aerial
dados_top <- summary_table %>% #(filtro de mi summary texto)
arrange(desc(Aerial)) %>% #filtro de la variable que va hablar de ella )
slice(1:4) #el vector que muestra los 5 primero de esta variable de analisis
# Criando o gráfico no estilo The Athletic
fig_1 <-library(ggplot2)
ggplot(dados_top, aes(x = reorder(values,Aerial), y = Aerial, fill = values)) +
geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
geom_text(aes(label = Aerial), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
labs(
title = "Top 4 Jogadores com Mais Aerial", #para mudar los tipos de graficos
x = "Jogadores",
y = "Número de Aerial"
) +
theme_minimal(base_family = "lato") +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "#E0E0E0"),
panel.background = element_rect(fill = "#f8f5ed", colour = NA), # Define a cor do fundo ao redor do campo
plot.background = element_rect(fill = "#f8f5ed", colour = NA) # Define a cor do fundo do gráfico
)

font_add_google("Lato", "lato") # Fonte similar à usada no The Athletic
showtext_auto()#funcion y ejecucion del pacote
# Selecionando os 4 maiores valores de Tackle
dados_top <- summary_table %>% #(filtro de mi summary texto)
arrange(desc(Aerial)) %>% #filtro de la variable que va hablar de ella )
slice(1:4) #el vector que muestra los 5 primero de esta variable de analisis
# Criando o gráfico no estilo The Athletic
fig_3 <-library(ggplot2)
ggplot(dados_top, aes(x = reorder(values,Tackle), y = Tackle, fill = values)) +
geom_bar(stat = "identity", show.legend = FALSE, color = "white", width = 0.5) +
geom_text(aes(label = Aerial), vjust = -0.2, color = "black", size = 4, fontface = "bold") +
scale_fill_manual(values = c("#1D3557", "#457B9D", "#A8DADC", "#F4A261","#ade8f4")) + # Paleta personalizada
labs(
title = "Top 4 Jogadores com Mais Aerial", #para mudar los tipos de graficos
x = "Jogadores",
y = "Número de Tackle"
) +
theme_minimal(base_family = "lato") +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5, color = "#1D3557"),
axis.title = element_text(size = 14, face = "bold", color = "#1D3557"),
axis.text = element_text(size = 7, color = "#1D3557"),#nombre jugadores
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "#E0E0E0"),
panel.background = element_rect(fill = "#f8f5ed", colour = NA), # Define a cor do fundo ao redor do campo
plot.background = element_rect(fill = "#f8f5ed", colour = NA) # Define a cor do fundo do gráfico
)

library(ggplot2)
library(ggrepel)
#install.packages("patchwork")
library(patchwork)
# Configuração de cores e tema
colors <- list(
background = "#f5f5f5",
text = "#222222",
title = "#333333",
palette = c("Time A" = "#f04b21", "Time B" = "grey50")
)
# Gráfico para a variável Goal
p1 <- ggplot(summary_table, aes(x = values, y = Goal, fill = Goal)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
labs(title = "Média de Gols por Jogador", x = "Jogador", y = "Média de Gols") +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", color = colors$title),
plot.background = element_rect(fill = colors$background),
panel.background = element_rect(fill = colors$background),
axis.text = element_text(color = colors$text),
axis.title = element_text(color = colors$text)
)
# Gráfico para a variável SubstitutionOn
p2 <- ggplot(summary_table, aes(x = values, y = SubstitutionOn, fill = SubstitutionOn)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
labs(title = "Substituições por Jogador", x = "Jogador", y = "Substituições") +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", color = colors$title),
plot.background = element_rect(fill = colors$background),
panel.background = element_rect(fill = colors$background),
axis.text = element_text(color = colors$text),
axis.title = element_text(color = colors$text)
)
# Gráfico de dispersão para MissedShots e BlockedPass
p3 <- ggplot(summary_table, aes(x = MissedShots, y = BlockedPass, color = values, size = MissedShots)) +
geom_point(alpha = 0.7) +
geom_text_repel(aes(label = values), size = 4, color = "grey30") +
scale_color_manual(values = colors$palette) +
labs(
title = "Distribuição de Chutes Perdidos e Passes Bloqueados",
x = "Chutes Perdidos",
y = "Passes Bloqueados"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold", color = colors$title),
panel.background = element_rect(fill = colors$background),
axis.title = element_text(color = colors$text),
axis.text = element_text(color = colors$text)
)
# Combinando todos os gráficos
fig_13 <- (p1 | p2) / p3 + plot_annotation(
title = "Análise de Futebol - Desempenho dos Jogadores",
theme = theme(
plot.title = element_text(size = 14, face = "bold", color = colors$title)
)
)
fig_13
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Configuração de cores e tema
colors <- list(
background = "#f5f5f5",
text = "#222222",
title = "#333333",
palette = c("Time A" = "#f04b21", "Time B" = "grey50")
)
# Gráfico para a variável Interception
p1 <- ggplot(summary_table, aes(x = values, y = Interception, fill = Interception)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
labs(title = "Interception", x = "Jogador", y = "Interception") +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", color = colors$title),
plot.background = element_rect(fill = colors$background),
panel.background = element_rect(fill = colors$background),
axis.text = element_text(color = colors$text),
axis.title = element_text(color = colors$text)
)
# Gráfico para a variável Pass
p2 <- ggplot(summary_table, aes(x = values, y = Pass, fill = Pass)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#f5f5f5", high = "#f04b21") +
labs(title = "Pass", x = "Jogador", y = "Pass") +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", color = colors$title),
plot.background = element_rect(fill = colors$background),
panel.background = element_rect(fill = colors$background),
axis.text = element_text(color = colors$text),
axis.title = element_text(color = colors$text)
)
# Gráfico de dispersão para Aerial e BlockedPass
p3 <- ggplot(summary_table, aes(x = Aerial, y = BlockedPass, color = values, size = MissedShots)) +
geom_point(alpha = 0.7) +
geom_text_repel(aes(label = values), size = 4, color = "grey30") +
scale_color_manual(values = colors$palette) +
labs(
title = "Distribuição de Aerial e Passes Bloqueados",
x = "Aerial",
y = "Passes Bloqueados"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold", color = colors$title),
panel.background = element_rect(fill = colors$background),
axis.title = element_text(color = colors$text),
axis.text = element_text(color = colors$text)
)
# Combinando todos os gráficos
fig_13 <- (p1 | p2) / p3 + plot_annotation(
title = "Análise de Futebol - Desempenho dos Jogadores",
theme = theme(
plot.title = element_text(size = 14, face = "bold", color = colors$title)
)
)
fig_13
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
