library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.2.3
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:cowplot':
##
## get_legend
library(cluster)
library(purrr)
library(dplyr)
library(dendextend)
## Warning: package 'dendextend' was built under R version 4.2.3
##
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
##
## Attaching package: 'dendextend'
##
## The following object is masked from 'package:ggpubr':
##
## rotate
##
## The following object is masked from 'package:stats':
##
## cutree
Tenemos información de las posiciones en el campo de juego de 12 jugagores al incio de un partido de soccer de 6v6. El objetivo es inferir que jugadores pertenecen a cada equipo usando conglomerados jerarquicos
lineup <- readRDS("C:/Users/sbadi/Downloads/lineup.rds")
lineup
## # A tibble: 12 × 2
## x y
## <dbl> <dbl>
## 1 -1 1
## 2 -2 -3
## 3 8 6
## 4 7 -8
## 5 -12 8
## 6 -15 0
## 7 -13 -10
## 8 15 16
## 9 21 2
## 10 12 -15
## 11 -25 1
## 12 26 0
Primero calcularemos las distancias de cada jugador en la posicion inicial del partido
dist_plauers <- dist(lineup, method = 'euclidean')
hc_players <- hclust(dist_plauers, method = 'complete')
# pasa identificar que observacion pertenece a cada cluster usaremos la funcion cutree
# En este caso queremos tener 2 clusters ya que sabemos que hay dos equipos
cluster_assigments <- cutree(hc_players, k = 2)
print(cluster_assigments)
## [1] 1 1 2 2 1 1 1 2 2 2 1 2
players_clustered <- mutate(lineup, cluster = cluster_assigments)
print((players_clustered))
## # A tibble: 12 × 3
## x y cluster
## <dbl> <dbl> <int>
## 1 -1 1 1
## 2 -2 -3 1
## 3 8 6 2
## 4 7 -8 2
## 5 -12 8 1
## 6 -15 0 1
## 7 -13 -10 1
## 8 15 16 2
## 9 21 2 2
## 10 12 -15 2
## 11 -25 1 1
## 12 26 0 2
Ahora veamos respresentaciones de lo que hemos calculado. Primero observaremos cuantos jugadores se han asigando a cada cluster y después una representaciòn gràfica de las posiciones en el campo de juego.
# contemos cuantos jugadores hay en cada cluster
count(players_clustered, cluster)
## # A tibble: 2 × 2
## cluster n
## <int> <int>
## 1 1 6
## 2 2 6
# una forma de visualizar los resultados de las posiciones con un plot
ggplot(players_clustered, aes(x=x, y=y, color = factor(cluster))) + geom_point()
Ahora veamos el dendograma de los conglomerados que armamos de cada
equipo
dend_players <- as.dendrogram(hc_players)
dend_colored <- color_branches(dend_players, k = 2)
plot(dend_colored)