libreria plotly

plot(res)

library(car)

## Loading required package: carData

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

df=data.frame(iris)
some(df)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 4            4.6         3.1          1.5         0.2     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 60           5.2         2.7          3.9         1.4 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 142          6.9         3.1          5.1         2.3  virginica

iris.2 <- iris[,-5]
species <- iris[,5]

di=data.frame(df) %>% 
  mutate(Species=dplyr::recode(Species,
                               setosa="st",
                               versicolor="vs",
                               virginica="vg"))  
pairs(di[,1:4], col = df$Species,lower.panel = NULL)
par(xpd = TRUE)
legend(x = 0.05, y = 0.4, cex = 2,
       legend=as.character(levels(df$Species)),
       fill = unique(df$Species))

par(xpd = NA)

Analisis de conglomerados por metodo k-means

set.seed(20)
k.means.fit <-kmeans(di[,1:4], 3, nstart = 10)
k.means.fit

## K-means clustering with 3 clusters of sizes 50, 62, 38
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 15.15100 39.82097 23.87947
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

k.means.fit$centers

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053

k.means.fit$ifault

## [1] 0

grupos=k.means.fit$cluster
table(di$Species,grupos)

##     grupos
##       1  2  3
##   st 50  0  0
##   vs  0 48  2
##   vg  0 14 36

dif=data.frame(di,grupos)
dif=data.frame(dif) %>% 
  mutate(grupos=dplyr::recode(grupos,
                               "3"="st",
                               "2"="vs",
                               "1"="vg")) 
table(dif$grupos,dif$Species)

##     
##      st vs vg
##   st  0  2 36
##   vg 50  0  0
##   vs  0 48 14

Presentación de análisis de conglomerados en gráfico 2D

d2 <- scale(di[,1:4])
rownames(d2) <- di$Species
#www:for total within sum of square
fviz_nbclust(x = d2, FUNcluster = kmeans, method = "wss", k.max = 15, 
             diss = get_dist(d2, method = "euclidean"), nstart = 50)

set.seed(123)
d2f=data.frame(d2)
km_clusters <- kmeans(x = d2f, centers = 3, nstart = 50)

# Las funciones del paquete factoextra emplean el nombre de las filas del
# dataframe que contiene los datos como identificador de las observaciones.
# Esto permite aÃ±adir labels a los grÃ¡ficos.
fviz_cluster(object = km_clusters, data = d2f, show.clust.cent = TRUE,
             ellipse.type = "euclid", star.plot = TRUE, repel = TRUE,
             pointsize=0.5,outlier.color="darkred") +
  labs(title = "Resultados clustering K-means") +
  theme_bw() +  theme(legend.position = "none")

datos originales

plot_ly(data = di, x = di$Sepal.Length, y = di$Sepal.Width, z = di$Petal.Length,
        size = di$Petal.Width, color = di$Species, symbols = di$Species)%>%
  layout(
  scene = list(
    xaxis = list(title = "Longitud sepalo"),
    yaxis = list(title = "Ancho sepalo"),
    zaxis = list(title = "Longitud sepalo"))
  )

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

DAtos escalados

plot_ly(data = d2f, x = d2f$Sepal.Length, y = d2f$Sepal.Width, z = d2f$Petal.Length,
        size = d2f$Petal.Width*10, color = di$Species, symbols = di$Species)%>%
  layout(
  scene = list(
    xaxis = list(title = "Longitud sepalo"),
    yaxis = list(title = "Ancho sepalo"),
    zaxis = list(title = "Longitud sepalo"))
  )

## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d

## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

libreria plotly

Ruth Jeanneth Perez Vallejo

25/11/2020