library(rio)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(cluster)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(polycor)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(factoextra)
Preparación de datos
data2=import("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
data2 = data2 %>%
separate(key, into = c("Departamento", "Provincia"), sep = "\\+")
data2_sin_lima=data2[-c(135),]
data2_sin_lima=data2_sin_lima[,c(32,47,48,49,50)]
data2_sin_lima$razónKC = data2_sin_lima$Keiko / data2_sin_lima$Castillo
data2_sin_lima=data2_sin_lima[,-c(2:4)]
data2_escalada= scale(data2_sin_lima)
Con los datos listos, sigue cluster
PAM -> 1
set.seed(123) # Para reproducibilidad
modelo_pam <- pam(data2_escalada, k = 1)
# Ver el resultado
modelo_pam
## Medoids:
## ID agua1_Red covidPositivos razónKC
## 112 112 -0.3311354 -0.3109417 -0.2646378
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 136 137 138 139 140 141
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Objective function:
## build swap
## 1.132889 1.132889
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
modelo_pam$clustering
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 136 137 138 139 140 141
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
modelo_pam$medoids
## agua1_Red covidPositivos razónKC
## 112 -0.3311354 -0.3109417 -0.2646378
fviz_cluster(modelo_pam, data = data2_escalada)

AGNES
modelo_agnes <- agnes(data2_escalada, method = "ward")
fviz_dend(modelo_agnes, rect = TRUE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

DIANA
modelo_diana <- diana(data2_escalada)
fviz_dend(modelo_diana, rect = TRUE)

esto si, pero no
fviz_nbclust(data2_escalada, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
