## 0.1 Cargamos las librerias
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cluster)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.8     v stringr 1.4.0
## v tidyr   1.2.0     v forcats 0.5.1
## v readr   2.1.2     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.16.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## 
## Attaching package: 'dendextend'
## 
## The following object is masked from 'package:stats':
## 
##     cutree
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
library(ggdendro)
## 
## Attaching package: 'ggdendro'
## 
## The following object is masked from 'package:dendextend':
## 
##     theme_dendro
## 0.2 Preparamos las base de datos
pokemon_raw <- read.csv("https://assets.datacamp.com/production/course_6430/datasets/Pokemon.csv")
rownames(pokemon_raw) <- pokemon_raw[,2] # Names into observation IDs

## 0.3 Seleccionamos las variables 
pokemon <- pokemon_raw %>% filter(!grepl('Mega', Name)&Generation==1)%>% select(6:11)

## 0.4 Inspeccionamos la base de datos
head(pokemon)
##            HitPoints Attack Defense SpecialAttack SpecialDefense Speed
## Bulbasaur         45     49      49            65             65    45
## Ivysaur           60     62      63            80             80    60
## Venusaur          80     82      83           100            100    80
## Charmander        39     52      43            60             50    65
## Charmeleon        58     64      58            80             65    80
## Charizard         78     84      78           109             85   100
str(pokemon)
## 'data.frame':    151 obs. of  6 variables:
##  $ HitPoints     : int  45 60 80 39 58 78 44 59 79 45 ...
##  $ Attack        : int  49 62 82 52 64 84 48 63 83 30 ...
##  $ Defense       : int  49 63 83 43 58 78 65 80 100 35 ...
##  $ SpecialAttack : int  65 80 100 60 80 109 50 65 85 20 ...
##  $ SpecialDefense: int  65 80 100 50 65 85 64 80 105 20 ...
##  $ Speed         : int  45 60 80 65 80 100 43 58 78 45 ...
plot(pokemon)

# 1 K-means

## 1.1 Generamos la agrupaci???n
poke_km01<-kmeans(pokemon,centers=2)

## 1.2 Extraemos los clusters 
clust_km1 <- poke_km01$cluster 

## 1.3 Incorparamos la nueva varbles
pokemon_CL<-mutate(pokemon,Cluster_01=clust_km1)

## 1.4 plot
clusplot(pokemon, 
         poke_km01$cluster, 
         color=TRUE, 
         shade=TRUE, 
         labels=2, 
         lines=0)

##1.5 scree plot 
fviz_nbclust(pokemon, kmeans, method = "wss")#'within sum of square'

fviz_nbclust(pokemon, kmeans, method = "silhouette") #silhouette

## 1.6 Generamos la agrupaci???n
poke_km02<-kmeans(pokemon,centers=2)

## 1.7 Extraemos los clusters 
clust_km2 <- poke_km02$cluster 

## 1.8 Incorparamos la nueva varbles
pokemon_CL<-mutate(pokemon_CL,Cluster_02=clust_km2)

## 1.9 plot
clusplot(pokemon, 
         poke_km02$cluster, 
         color=TRUE, 
         shade=TRUE, 
         labels=2, 
         lines=0)

#2.1 Creamos la matris de distancia.
poke_dist<-dist(pokemon)

#2.2 Creamos los clusters seg???n el "linkage"
Hclust_01 <- hclust(poke_dist , method = "complete") 
Hclust_02 <- hclust(poke_dist , method = "average") 
Hclust_03 <- hclust(poke_dist , method = "single")

plot(Hclust_01) 

plot(Hclust_02)

plot(Hclust_03)

plot(color_branches(Hclust_01,k=5))

plot(color_branches(Hclust_02,k=5))

plot(color_branches(Hclust_03,k=5))

ggdendrogram(Hclust_01,rotate = TRUE, theme_dendro = FALSE)

Hclust_01.5<-cutree(Hclust_01,k=5)
Hclust_01.4<-cutree(Hclust_01,h=150)


pokemon_CL<-mutate(pokemon_CL,Cluster03=Hclust_01.5,Cluster04=Hclust_01.4)