library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.2.3
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend
library(cluster)
library(purrr)
library(dplyr)
library(dendextend)
## Warning: package 'dendextend' was built under R version 4.2.3
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## 
## Attaching package: 'dendextend'
## 
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## 
## The following object is masked from 'package:stats':
## 
##     cutree

Cluster Analysis

Tenemos información de las posiciones en el campo de juego de 12 jugagores al incio de un partido de soccer de 6v6. El objetivo es inferir que jugadores pertenecen a cada equipo usando conglomerados jerarquicos

lineup <- readRDS("C:/Users/sbadi/Downloads/lineup.rds")
lineup
## # A tibble: 12 × 2
##        x     y
##    <dbl> <dbl>
##  1    -1     1
##  2    -2    -3
##  3     8     6
##  4     7    -8
##  5   -12     8
##  6   -15     0
##  7   -13   -10
##  8    15    16
##  9    21     2
## 10    12   -15
## 11   -25     1
## 12    26     0

Primero calcularemos las distancias de cada jugador en la posicion inicial del partido

dist_plauers <- dist(lineup, method = 'euclidean')
hc_players <-  hclust(dist_plauers, method = 'complete')
# pasa identificar que observacion pertenece a cada cluster usaremos la funcion cutree
# En este caso queremos tener 2 clusters ya que sabemos que hay dos equipos
cluster_assigments <- cutree(hc_players, k = 2)
print(cluster_assigments)
##  [1] 1 1 2 2 1 1 1 2 2 2 1 2
players_clustered <- mutate(lineup, cluster = cluster_assigments)
print((players_clustered))
## # A tibble: 12 × 3
##        x     y cluster
##    <dbl> <dbl>   <int>
##  1    -1     1       1
##  2    -2    -3       1
##  3     8     6       2
##  4     7    -8       2
##  5   -12     8       1
##  6   -15     0       1
##  7   -13   -10       1
##  8    15    16       2
##  9    21     2       2
## 10    12   -15       2
## 11   -25     1       1
## 12    26     0       2

Ahora veamos respresentaciones de lo que hemos calculado. Primero observaremos cuantos jugadores se han asigando a cada cluster y después una representaciòn gràfica de las posiciones en el campo de juego.

# contemos cuantos jugadores hay en cada cluster
count(players_clustered, cluster)
## # A tibble: 2 × 2
##   cluster     n
##     <int> <int>
## 1       1     6
## 2       2     6
# una forma de visualizar los resultados de las posiciones con un plot
ggplot(players_clustered, aes(x=x, y=y, color = factor(cluster))) + geom_point()

Ahora veamos el dendograma de los conglomerados que armamos de cada equipo

dend_players <- as.dendrogram(hc_players)
dend_colored <-  color_branches(dend_players, k = 2)
plot(dend_colored)