library(car)

## Loading required package: carData

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df=data.frame(iris)
some(df)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 18           5.1         3.5          1.4         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 63           6.0         2.2          4.0         1.0 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 102          5.8         2.7          5.1         1.9  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 143          5.8         2.7          5.1         1.9  virginica

iris.2 <- iris[,-5]
species <- iris[,5]

di=data.frame(df) %>% mutate(Species=dplyr::recode(Species,
                               setosa="st",
                               versicolor="vs",
                               virginica="vg"))  
pairs(di[,1:4], col = df$Species,lower.panel = NULL)
par(xpd = TRUE)
legend(x = 0.05, y = 0.4, cex = 2,
       legend=as.character(levels(df$Species)),
       fill = unique(df$Species))

par(xpd = NA)

set.seed(20)
k.means.fit <-kmeans(di[,1:4], 3, nstart = 10)
k.means.fit

## K-means clustering with 3 clusters of sizes 50, 62, 38
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 15.15100 39.82097 23.87947
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

k.means.fit$centers

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053

k.means.fit$ifault

## [1] 0

grupos=k.means.fit$cluster
table(di$Species,grupos)

##     grupos
##       1  2  3
##   st 50  0  0
##   vs  0 48  2
##   vg  0 14 36

dif=data.frame(di,grupos)
dif=data.frame(dif) %>% 
  mutate(grupos=dplyr::recode(grupos,
                               "3"="st",
                               "2"="vs",
                               "1"="vg")) 
table(dif$grupos,dif$Species)

##     
##      st vs vg
##   st  0  2 36
##   vg 50  0  0
##   vs  0 48 14

#######visualizacion
library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

d2 <- scale(di[,1:4])
rownames(d2) <- di$Species
#www:for total within sum of square
fviz_nbclust(x = d2, FUNcluster = kmeans, method = "wss", k.max = 15, 
             diss = get_dist(d2, method = "euclidean"), nstart = 50)

set.seed(123)
d2f=data.frame(d2)
km_clusters <- kmeans(x = d2f, centers = 3, nstart = 50)

# Las funciones del paquete factoextra emplean el nombre de las filas del
# dataframe que contiene los datos como identificador de las observaciones.
# Esto permite aÃ±adir labels a los grÃ¡ficos.
fviz_cluster(object = km_clusters, data = d2f, show.clust.cent = TRUE,
             ellipse.type = "euclid", star.plot = TRUE, repel = TRUE,
             pointsize=0.5,outlier.color="darkred") +
  labs(title = "Resultados clustering K-means") +
  theme_bw() +  theme(legend.position = "none")

Diagrama de burbujas 4D con la libreria plotly

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

plot_ly(d2f, x=d2f$Sepal.Length, y=d2f$Sepal.Width, z=d2f$Petal.Length, size=d2f$Petal.Width, color = di$Species)

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

Clustering. Diagrama de burbujas librería plotly

Antonio Ortiz

8/12/2020

Diagrama de burbujas 4D con la libreria plotly