plot(res)
library(car)
## Loading required package: carData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
df=data.frame(iris)
some(df)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 4 4.6 3.1 1.5 0.2 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 60 5.2 2.7 3.9 1.4 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 142 6.9 3.1 5.1 2.3 virginica
iris.2 <- iris[,-5]
species <- iris[,5]
di=data.frame(df) %>%
mutate(Species=dplyr::recode(Species,
setosa="st",
versicolor="vs",
virginica="vg"))
pairs(di[,1:4], col = df$Species,lower.panel = NULL)
par(xpd = TRUE)
legend(x = 0.05, y = 0.4, cex = 2,
legend=as.character(levels(df$Species)),
fill = unique(df$Species))
par(xpd = NA)
Analisis de conglomerados por metodo k-means
set.seed(20)
k.means.fit <-kmeans(di[,1:4], 3, nstart = 10)
k.means.fit
## K-means clustering with 3 clusters of sizes 50, 62, 38
##
## Cluster means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.006000 3.428000 1.462000 0.246000
## 2 5.901613 2.748387 4.393548 1.433871
## 3 6.850000 3.073684 5.742105 2.071053
##
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2
##
## Within cluster sum of squares by cluster:
## [1] 15.15100 39.82097 23.87947
## (between_SS / total_SS = 88.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
k.means.fit$centers
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.006000 3.428000 1.462000 0.246000
## 2 5.901613 2.748387 4.393548 1.433871
## 3 6.850000 3.073684 5.742105 2.071053
k.means.fit$ifault
## [1] 0
grupos=k.means.fit$cluster
table(di$Species,grupos)
## grupos
## 1 2 3
## st 50 0 0
## vs 0 48 2
## vg 0 14 36
dif=data.frame(di,grupos)
dif=data.frame(dif) %>%
mutate(grupos=dplyr::recode(grupos,
"3"="st",
"2"="vs",
"1"="vg"))
table(dif$grupos,dif$Species)
##
## st vs vg
## st 0 2 36
## vg 50 0 0
## vs 0 48 14
Presentación de análisis de conglomerados en gráfico 2D
d2 <- scale(di[,1:4])
rownames(d2) <- di$Species
#www:for total within sum of square
fviz_nbclust(x = d2, FUNcluster = kmeans, method = "wss", k.max = 15,
diss = get_dist(d2, method = "euclidean"), nstart = 50)
set.seed(123)
d2f=data.frame(d2)
km_clusters <- kmeans(x = d2f, centers = 3, nstart = 50)
# Las funciones del paquete factoextra emplean el nombre de las filas del
# dataframe que contiene los datos como identificador de las observaciones.
# Esto permite añadir labels a los gráficos.
fviz_cluster(object = km_clusters, data = d2f, show.clust.cent = TRUE,
ellipse.type = "euclid", star.plot = TRUE, repel = TRUE,
pointsize=0.5,outlier.color="darkred") +
labs(title = "Resultados clustering K-means") +
theme_bw() + theme(legend.position = "none")
datos originales
plot_ly(data = di, x = di$Sepal.Length, y = di$Sepal.Width, z = di$Petal.Length,
size = di$Petal.Width, color = di$Species, symbols = di$Species)%>%
layout(
scene = list(
xaxis = list(title = "Longitud sepalo"),
yaxis = list(title = "Ancho sepalo"),
zaxis = list(title = "Longitud sepalo"))
)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
DAtos escalados
plot_ly(data = d2f, x = d2f$Sepal.Length, y = d2f$Sepal.Width, z = d2f$Petal.Length,
size = d2f$Petal.Width*10, color = di$Species, symbols = di$Species)%>%
layout(
scene = list(
xaxis = list(title = "Longitud sepalo"),
yaxis = list(title = "Ancho sepalo"),
zaxis = list(title = "Longitud sepalo"))
)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.