## 0.1 Cargamos las librerias
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(cluster)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.8 v stringr 1.4.0
## v tidyr 1.2.0 v forcats 0.5.1
## v readr 2.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.16.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
##
## Attaching package: 'dendextend'
##
## The following object is masked from 'package:stats':
##
## cutree
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
library(ggdendro)
##
## Attaching package: 'ggdendro'
##
## The following object is masked from 'package:dendextend':
##
## theme_dendro
## 0.2 Preparamos las base de datos
pokemon_raw <- read.csv("https://assets.datacamp.com/production/course_6430/datasets/Pokemon.csv")
rownames(pokemon_raw) <- pokemon_raw[,2] # Names into observation IDs
## 0.3 Seleccionamos las variables
pokemon <- pokemon_raw %>% filter(!grepl('desconocido', Name)&Generation==6)%>% select(6:11)
## 0.4 Inspeccionamos la base de datos
head(pokemon)
## HitPoints Attack Defense SpecialAttack SpecialDefense Speed
## Chespin 56 61 65 48 45 38
## Quilladin 61 78 95 56 58 57
## Chesnaught 88 107 122 74 75 64
## Fennekin 40 45 40 62 60 60
## Braixen 59 59 58 90 70 73
## Delphox 75 69 72 114 100 104
str(pokemon)
## 'data.frame': 82 obs. of 6 variables:
## $ HitPoints : int 56 61 88 40 59 75 41 54 72 38 ...
## $ Attack : int 61 78 107 45 59 69 56 63 95 36 ...
## $ Defense : int 65 95 122 40 58 72 40 52 67 38 ...
## $ SpecialAttack : int 48 56 74 62 90 114 62 83 103 32 ...
## $ SpecialDefense: int 45 58 75 60 70 100 44 56 71 36 ...
## $ Speed : int 38 57 64 60 73 104 71 97 122 57 ...
plot(pokemon)

# 1 K-means
## 1.1 Generamos la agrupaci???n
poke_km01<-kmeans(pokemon,centers=3)
## 1.2 Extraemos los clusters
clust_km1 <- poke_km01$cluster
## 1.3 Incorparamos la nueva varbles
pokemon_CL<-mutate(pokemon,Cluster_01=clust_km1)
## 1.4 plot
clusplot(pokemon,
poke_km01$cluster,
color=TRUE,
shade=TRUE,
labels=2,
lines=0)

##1.5 scree plot
fviz_nbclust(pokemon, kmeans, method = "wss")#'within sum of square'

fviz_nbclust(pokemon, kmeans, method = "silhouette") #silhouette

## 1.6 Generamos la agrupaci???n
poke_km02<-kmeans(pokemon,centers=2)
## 1.7 Extraemos los clusters
clust_km2 <- poke_km02$cluster
## 1.8 Incorparamos la nueva varbles
pokemon_CL<-mutate(pokemon_CL,Cluster_02=clust_km2)
## 1.9 plot
clusplot(pokemon,
poke_km02$cluster,
color=TRUE,
shade=TRUE,
labels=2,
lines=0)

#2.1 Creamos la matris de distancia.
poke_dist<-dist(pokemon)
#2.2 Creamos los clusters seg???n el "linkage"
Hclust_01 <- hclust(poke_dist , method = "complete")
Hclust_02 <- hclust(poke_dist , method = "average")
Hclust_03 <- hclust(poke_dist , method = "single")
plot(Hclust_01)

plot(Hclust_02)

plot(Hclust_03)

plot(color_branches(Hclust_01,k=5))

plot(color_branches(Hclust_02,k=5))

plot(color_branches(Hclust_03,k=5))

ggdendrogram(Hclust_01,rotate = TRUE, theme_dendro = FALSE)

Hclust_01.5<-cutree(Hclust_01,k=5)
Hclust_01.4<-cutree(Hclust_01,h=150)
pokemon_CL<-mutate(pokemon_CL,Cluster03=Hclust_01.5,Cluster04=Hclust_01.4)