## 0.1 Cargamos las librerias
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cluster)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.8     v stringr 1.4.0
## v tidyr   1.2.0     v forcats 0.5.1
## v readr   2.1.2     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.16.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## 
## Attaching package: 'dendextend'
## 
## The following object is masked from 'package:stats':
## 
##     cutree
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
library(ggdendro)
## 
## Attaching package: 'ggdendro'
## 
## The following object is masked from 'package:dendextend':
## 
##     theme_dendro
## 0.2 Preparamos las base de datos
pokemon_raw <- read.csv("https://assets.datacamp.com/production/course_6430/datasets/Pokemon.csv")
rownames(pokemon_raw) <- pokemon_raw[,2] # Names into observation IDs

## 0.3 Seleccionamos las variables 
pokemon <- pokemon_raw %>% filter(!grepl('desconocido', Name)&Generation==6)%>% select(6:11)

## 0.4 Inspeccionamos la base de datos
head(pokemon)
##            HitPoints Attack Defense SpecialAttack SpecialDefense Speed
## Chespin           56     61      65            48             45    38
## Quilladin         61     78      95            56             58    57
## Chesnaught        88    107     122            74             75    64
## Fennekin          40     45      40            62             60    60
## Braixen           59     59      58            90             70    73
## Delphox           75     69      72           114            100   104
str(pokemon)
## 'data.frame':    82 obs. of  6 variables:
##  $ HitPoints     : int  56 61 88 40 59 75 41 54 72 38 ...
##  $ Attack        : int  61 78 107 45 59 69 56 63 95 36 ...
##  $ Defense       : int  65 95 122 40 58 72 40 52 67 38 ...
##  $ SpecialAttack : int  48 56 74 62 90 114 62 83 103 32 ...
##  $ SpecialDefense: int  45 58 75 60 70 100 44 56 71 36 ...
##  $ Speed         : int  38 57 64 60 73 104 71 97 122 57 ...
plot(pokemon)

# 1 K-means

## 1.1 Generamos la agrupaci???n
poke_km01<-kmeans(pokemon,centers=3)

## 1.2 Extraemos los clusters 
clust_km1 <- poke_km01$cluster 

## 1.3 Incorparamos la nueva varbles
pokemon_CL<-mutate(pokemon,Cluster_01=clust_km1)

## 1.4 plot
clusplot(pokemon, 
         poke_km01$cluster, 
         color=TRUE, 
         shade=TRUE, 
         labels=2, 
         lines=0)

##1.5 scree plot 
fviz_nbclust(pokemon, kmeans, method = "wss")#'within sum of square'

fviz_nbclust(pokemon, kmeans, method = "silhouette") #silhouette

## 1.6 Generamos la agrupaci???n
poke_km02<-kmeans(pokemon,centers=2)

## 1.7 Extraemos los clusters 
clust_km2 <- poke_km02$cluster 

## 1.8 Incorparamos la nueva varbles
pokemon_CL<-mutate(pokemon_CL,Cluster_02=clust_km2)

## 1.9 plot
clusplot(pokemon, 
         poke_km02$cluster, 
         color=TRUE, 
         shade=TRUE, 
         labels=2, 
         lines=0)

#2.1 Creamos la matris de distancia.
poke_dist<-dist(pokemon)

#2.2 Creamos los clusters seg???n el "linkage"
Hclust_01 <- hclust(poke_dist , method = "complete") 
Hclust_02 <- hclust(poke_dist , method = "average") 
Hclust_03 <- hclust(poke_dist , method = "single")

plot(Hclust_01) 

plot(Hclust_02)

plot(Hclust_03)

plot(color_branches(Hclust_01,k=5))

plot(color_branches(Hclust_02,k=5))

plot(color_branches(Hclust_03,k=5))

ggdendrogram(Hclust_01,rotate = TRUE, theme_dendro = FALSE)

Hclust_01.5<-cutree(Hclust_01,k=5)
Hclust_01.4<-cutree(Hclust_01,h=150)


pokemon_CL<-mutate(pokemon_CL,Cluster03=Hclust_01.5,Cluster04=Hclust_01.4)