EXERCICIO

library(rvest)
library(stringr)
library(jsonlite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()         masks stats::filter()
## ✖ purrr::flatten()        masks jsonlite::flatten()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag()            masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggsoccer)
library(ggrepel)
("~/Downloads/Wolves 0-3 Nottingham Forest - Premier League 2024:2025 Live.html")
## [1] "~/Downloads/Wolves 0-3 Nottingham Forest - Premier League 2024:2025 Live.html"
# Defina o diretório onde você deseja procurar arquivos HTML
diretorio <- "~/Downloads/"
# Liste todos os arquivos com a extensão .html
arquivos_html <- list.files(path = diretorio, pattern = "\\.html$", full.names = TRUE)
# Exibir a lista de arquivos HTML
print(arquivos_html)
##  [1] "/Users/christiantabilojr/Downloads//aula-1-10.html"                                                   
##  [2] "/Users/christiantabilojr/Downloads//aula-1-11.html"                                                   
##  [3] "/Users/christiantabilojr/Downloads//aula-1-12.html"                                                   
##  [4] "/Users/christiantabilojr/Downloads//aula-1-13.html"                                                   
##  [5] "/Users/christiantabilojr/Downloads//aula-1-14.html"                                                   
##  [6] "/Users/christiantabilojr/Downloads//aula-1-2.html"                                                    
##  [7] "/Users/christiantabilojr/Downloads//aula-1-3.html"                                                    
##  [8] "/Users/christiantabilojr/Downloads//aula-1-4.html"                                                    
##  [9] "/Users/christiantabilojr/Downloads//aula-1-5.html"                                                    
## [10] "/Users/christiantabilojr/Downloads//aula-1-6.html"                                                    
## [11] "/Users/christiantabilojr/Downloads//aula-1-7.html"                                                    
## [12] "/Users/christiantabilojr/Downloads//aula-1-8.html"                                                    
## [13] "/Users/christiantabilojr/Downloads//aula-1-9.html"                                                    
## [14] "/Users/christiantabilojr/Downloads//aula-1.html"                                                      
## [15] "/Users/christiantabilojr/Downloads//nose.html"                                                        
## [16] "/Users/christiantabilojr/Downloads//plano-de-ensino.html"                                             
## [17] "/Users/christiantabilojr/Downloads//principais-funções-R.html"                                        
## [18] "/Users/christiantabilojr/Downloads//tra vs rsj.html"                                                  
## [19] "/Users/christiantabilojr/Downloads//Wolves 0-3 Nottingham Forest - Premier League 2024:2025 Live.html"
# jogo 1 | johos disponiveis em que eu baixei
# Define o caminho para o arquivo HTML
html_path <- "/Users/christiantabilojr/Downloads//Wolves 0-3 Nottingham Forest - Premier League 2024:2025 Live.html"# Lê o conteúdo do arquivo HTML
html_content <- read_html(html_path)
# Extrai o conteúdo de texto bruto do HTML
html_text <- html_content %>% html_text()
# Encontra o JSON usando regex
regex_pattern <- '(?<=require\\.config\\.params\\["args"\\].=.)[\\s\\S]*?;'
data_txt <- str_extract(html_text, regex_pattern)
# Faz substituições necessárias para parsear como JSON
data_txt <- gsub("matchId", '"matchId"', data_txt)
data_txt <- gsub("matchCentreData", '"matchCentreData"', data_txt)
data_txt <- gsub("matchCentreEventTypeJson", '"matchCentreEventTypeJson"', data_txt)
data_txt <- gsub("formationIdNameMappings", '"formationIdNameMappings"', data_txt)
# Remove o final }; e substitui por }
data_txt <- gsub("};$", '}', data_txt)
# Converte o texto JSON em um objeto R
data_list <- fromJSON(data_txt)
# Extrair os dados de eventos e jogadores
events <- data_list$matchCentreData$events
players <- data_list$matchCentreData$playerIdNameDictionary
# Converte em data frame
events_df <- as.data.frame(events)
players_df <- as.data.frame(players)
# inserir player df com nomes ao lado da coluna playerid
players_df <- stack(players)
events_df <- merge(events_df, players_df, by.x = "playerId", by.y = "ind", all.x = TRUE)

#names(events_df) #names sirve para ver las planillas de datos que tiene el evento
#events_df # solo la da la tabla especifica de lo que estamos buscando.

analisis de datos es la información que se consigue atraves de bases de datos y se va desarollando atravez de ejecuciones de los mismos para tener información o recopilar informacion sobre un tema que queremos aprender o hacer una hipotesis

visitante = events_df%>%
  filter(teamId=="174") # esto da para elegir los jugadores y las serie con el nombre del jugador sea visitante o local
#names(visitante)
EAD = visitante$type$displayName

table(EAD)
## EAD
##          Aerial    BallRecovery       BallTouch     BlockedPass            Card 
##              26              42              33               4               2 
##       Challenge       Clearance   CornerAwarded    Dispossessed             End 
##               4              35               8               6               3 
##           Error FormationChange    FormationSet            Foul            Goal 
##               2               2               1              18               3 
##    Interception    KeeperPickup     MissedShots    OffsideGiven     OffsidePass 
##              10               7               6               4               4 
## OffsideProvoked            Pass           Punch            Save       SavedShot 
##               1             389               3              11               2 
##   ShieldBallOpp         Smother           Start SubstitutionOff  SubstitutionOn 
##               1               1               2               5               5 
##          Tackle          TakeOn 
##              18              13
# Asegúrate de extraer correctamente los nombres de los eventos
visitante <- visitante %>%
  mutate(displayName = unlist(type$displayName)) # Extraer nombres si es necesario

# Crear tabla resumen por jugador
summary_table <- visitante %>%
  group_by(values, displayName) %>%  # Agrupar por jugador y tipo de evento
  summarise(event_count = n(), .groups = "drop") %>%  # Contar eventos
  pivot_wider(names_from = displayName, values_from = event_count, values_fill = 0)  # Crear columnas para cada evento

# Mostrar tabla
print(summary_table)
## # A tibble: 17 × 33
##    values   Aerial BallRecovery BallTouch BlockedPass CornerAwarded Dispossessed
##    <chr>     <int>        <int>     <int>       <int>         <int>        <int>
##  1 Anthony…      1            1         3           1             1            2
##  2 Callum …      1            6         3           0             0            0
##  3 Chris W…      3            0         2           0             0            0
##  4 Elliot …      2            9         5           0             2            0
##  5 James W…      1            0         0           0             0            0
##  6 Jota Si…      1            0         2           0             0            0
##  7 Matz Se…      1            0         0           0             2            0
##  8 Morato        1            0         0           0             0            0
##  9 Morgan …      5            2         4           0             0            2
## 10 Murillo       0            4         3           0             2            1
## 11 Neco Wi…      1            5         5           1             0            0
## 12 Nicolás…      0            2         1           2             0            0
## 13 Nikola …      5            2         0           0             0            0
## 14 Ola Aina      1            9         3           0             1            0
## 15 Ryan Ya…      0            2         0           0             0            1
## 16 Taiwo A…      3            0         2           0             0            0
## 17 <NA>          0            0         0           0             0            0
## # ℹ 26 more variables: Foul <int>, OffsideGiven <int>, Pass <int>,
## #   SubstitutionOff <int>, Tackle <int>, TakeOn <int>, MissedShots <int>,
## #   Goal <int>, OffsidePass <int>, Card <int>, Clearance <int>, Error <int>,
## #   Interception <int>, SavedShot <int>, SubstitutionOn <int>, Challenge <int>,
## #   KeeperPickup <int>, Punch <int>, Save <int>, Smother <int>,
## #   OffsideProvoked <int>, ShieldBallOpp <int>, End <int>,
## #   FormationChange <int>, FormationSet <int>, Start <int>
hist(summary_table$Aerial)

names(summary_table)
##  [1] "values"          "Aerial"          "BallRecovery"    "BallTouch"      
##  [5] "BlockedPass"     "CornerAwarded"   "Dispossessed"    "Foul"           
##  [9] "OffsideGiven"    "Pass"            "SubstitutionOff" "Tackle"         
## [13] "TakeOn"          "MissedShots"     "Goal"            "OffsidePass"    
## [17] "Card"            "Clearance"       "Error"           "Interception"   
## [21] "SavedShot"       "SubstitutionOn"  "Challenge"       "KeeperPickup"   
## [25] "Punch"           "Save"            "Smother"         "OffsideProvoked"
## [29] "ShieldBallOpp"   "End"             "FormationChange" "FormationSet"   
## [33] "Start"

events = BallRecovery

ggplot(summary_table, aes(x = BallRecovery,
                          y =values))+
  geom_point(color="purple",
             size = 4 )+
  theme_minimal()

ggplot(summary_table, aes(x = Interception,
                          y =values))+
  geom_point(color="purple",
             size = 4 )+
             geom_text(aes(label=Interception),vjust = -1, size = 3)+
      theme_minimal()+labs(title="Wolves 0-3 Nottingham Forest",subtitle = "Partido Premier League",caption = "Christian Tabilo")

ggplot(summary_table, aes(x = Goal,
                          y =values))+
  geom_point(color="purple",
             size = 4 )+
             geom_text(aes(label=Goal),vjust = -1, size = 3)+
      theme_minimal()+labs(title="Wolves 0-3 Nottingham Forest",subtitle = "Partido Premier League",caption = "Christian Tabilo")