K-means

library(knitr)
library(ggplot2)
library(factoextra)
library(FactoMineR)
library(vegan)
library(psych)
library(corrplot)
library(dplyr)

ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, 
                   obs.scale = 1 - scale, var.scale = scale, 
                   grupos = NULL, ellipse = FALSE, ellipse.prob = 0.68, 
                   labels = NULL, labels.size = 3, alpha = 1, 
                   var.axes = TRUE, 
                   circle = FALSE, circle.prob = 0.69, 
                   varname.size = 3, varname.adjust = 1.5, 
                   varname.abbrev = FALSE, ...)
{
  library(ggplot2)
  library(plyr)
  library(scales)
  library(grid)
  
  stopifnot(length(choices) == 2)
  
  # Recover the SVD
  if(inherits(pcobj, 'prcomp')){
    nobs.factor <- sqrt(nrow(pcobj$x) - 1)
    d <- pcobj$sdev
    u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
    v <- pcobj$rotation
  } else if(inherits(pcobj, 'princomp')) {
    nobs.factor <- sqrt(pcobj$n.obs)
    d <- pcobj$sdev
    u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
    v <- pcobj$loadings
  } else if(inherits(pcobj, 'PCA')) {
    nobs.factor <- sqrt(nrow(pcobj$call$X))
    d <- unlist(sqrt(pcobj$eig)[1])
    u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
    v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
  } else {
    stop('Expected a object of class prcomp, princomp or PCA')
  }
  
  # Scores
  df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))
  
  # Directions
  v <- sweep(v, 2, d^var.scale, FUN='*')
  df.v <- as.data.frame(v[, choices])
  
  names(df.u) <- c('xvar', 'yvar')
  names(df.v) <- names(df.u)
  
  if(pc.biplot) {
    df.u <- df.u * nobs.factor
  }
  
  # Scale the radius of the correlation circle so that it corresponds to 
  # a data ellipse for the standardized PC scores
  r <- 1
  
  # Scale directions
  v.scale <- rowSums(v^2)
  df.v <- df.v / sqrt(max(v.scale))
  
  ## Scale Scores
  r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2))
  df.u=.99*df.u/r.scale
  
  # Change the labels for the axes
  if(obs.scale == 0) {
    u.axis.labs <- paste('standardized PC', choices, sep='')
  } else {
    u.axis.labs <- paste('Componente Principal ', choices, sep='')
  }
  
  # Append the proportion of explained variance to the axis labels
  u.axis.labs <- paste(u.axis.labs, 
                       sprintf('(%0.1f%%)', 
                               100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))
  
  # Score Labels
  if(!is.null(labels)) {
    df.u$labels <- labels
  }
  
  # Grouping variable
  if(!is.null(grupos)) {
    df.u$grupos <- grupos
  }
  
  # Variable Names
  if(varname.abbrev) {
    df.v$varname <- abbreviate(rownames(v))
  } else {
    df.v$varname <- rownames(v)
  }
  
  # Variables for text label placement
  df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
  df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
  
  # Base plot
  g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + 
    xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()
  
  if(var.axes) {
    # Draw circle
    if(circle) 
    {
      theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
      circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
      g <- g + geom_path(data = circle, color = muted('white'), 
                         size = 1/2, alpha = 1/3)
    }
    
    # Draw directions
    g <- g +
      geom_segment(data = df.v,
                   aes(x = 0, y = 0, xend = xvar, yend = yvar),
                   arrow = arrow(length = unit(1/2, 'picas')), 
                   color = muted('red'))
  }
  
  # Draw either labels or points
  if(!is.null(df.u$labels)) {
    if(!is.null(df.u$grupos)) {
      g <- g + geom_text(aes(label = labels, color = grupos), 
                         size = labels.size)
    } else {
      g <- g + geom_text(aes(label = labels), size = labels.size)      
    }
  } else {
    if(!is.null(df.u$grupos)) {
      g <- g + geom_point(aes(color = grupos), alpha = alpha)
    } else {
      g <- g + geom_point(alpha = alpha)      
    }
  }
  
  # Overlay a concentration ellipse if there are grupos
  if(!is.null(df.u$grupos) && ellipse) {
    theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
    circle <- cbind(cos(theta), sin(theta))
    
    ell <- ddply(df.u, 'grupos', function(x) {
      if(nrow(x) < 2) {
        return(NULL)
      } else if(nrow(x) == 2) {
        sigma <- var(cbind(x$xvar, x$yvar))
      } else {
        sigma <- diag(c(var(x$xvar), var(x$yvar)))
      }
      mu <- c(mean(x$xvar), mean(x$yvar))
      ed <- sqrt(qchisq(ellipse.prob, df = 2))
      data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'), 
                 grupos = x$grupos[1])
    })
    names(ell)[1:2] <- c('xvar', 'yvar')
    g <- g + geom_path(data = ell, aes(color = grupos, group = grupos))
  }
  
  # Label the variable axes
  if(var.axes) {
    g <- g + 
      geom_text(data = df.v, 
                aes(label = varname, x = xvar, y = yvar, 
                    angle = angle, hjust = hjust), 
                color = 'darkred', size = varname.size)
  }
  # Change the name of the legend for grupos
  # if(!is.null(grupos)) {
  #   g <- g + scale_color_brewer(name = deparse(substitute(grupos)), 
  #                               palette = 'Dark2')
  # }
  
  # TODO: Add a second set of axes
  
  return(g)
}

Carregamento dos dados

data("USArrests")      
USArrests %>% kable

	Murder	Assault	UrbanPop	Rape
Alabama	13.2	236	58	21.2
Alaska	10.0	263	48	44.5
Arizona	8.1	294	80	31.0
Arkansas	8.8	190	50	19.5
California	9.0	276	91	40.6
Colorado	7.9	204	78	38.7
Connecticut	3.3	110	77	11.1
Delaware	5.9	238	72	15.8
Florida	15.4	335	80	31.9
Georgia	17.4	211	60	25.8
Hawaii	5.3	46	83	20.2
Idaho	2.6	120	54	14.2
Illinois	10.4	249	83	24.0
Indiana	7.2	113	65	21.0
Iowa	2.2	56	57	11.3
Kansas	6.0	115	66	18.0
Kentucky	9.7	109	52	16.3
Louisiana	15.4	249	66	22.2
Maine	2.1	83	51	7.8
Maryland	11.3	300	67	27.8
Massachusetts	4.4	149	85	16.3
Michigan	12.1	255	74	35.1
Minnesota	2.7	72	66	14.9
Mississippi	16.1	259	44	17.1
Missouri	9.0	178	70	28.2
Montana	6.0	109	53	16.4
Nebraska	4.3	102	62	16.5
Nevada	12.2	252	81	46.0
New Hampshire	2.1	57	56	9.5
New Jersey	7.4	159	89	18.8
New Mexico	11.4	285	70	32.1
New York	11.1	254	86	26.1
North Carolina	13.0	337	45	16.1
North Dakota	0.8	45	44	7.3
Ohio	7.3	120	75	21.4
Oklahoma	6.6	151	68	20.0
Oregon	4.9	159	67	29.3
Pennsylvania	6.3	106	72	14.9
Rhode Island	3.4	174	87	8.3
South Carolina	14.4	279	48	22.5
South Dakota	3.8	86	45	12.8
Tennessee	13.2	188	59	26.9
Texas	12.7	201	80	25.5
Utah	3.2	120	80	22.9
Vermont	2.2	48	32	11.2
Virginia	8.5	156	63	20.7
Washington	4.0	145	73	26.2
West Virginia	5.7	81	39	9.3
Wisconsin	2.6	53	66	10.8
Wyoming	6.8	161	60	15.6

Dimensionamento e padronização

df <- scale(USArrests) 

df %>% kable()

	Murder	Assault	UrbanPop	Rape
Alabama	1.2425641	0.7828393	-0.5209066	-0.0034165
Alaska	0.5078625	1.1068225	-1.2117642	2.4842029
Arizona	0.0716334	1.4788032	0.9989801	1.0428784
Arkansas	0.2323494	0.2308680	-1.0735927	-0.1849166
California	0.2782682	1.2628144	1.7589234	2.0678203
Colorado	0.0257146	0.3988593	0.8608085	1.8649672
Connecticut	-1.0304190	-0.7290821	0.7917228	-1.0817408
Delaware	-0.4334739	0.8068381	0.4462940	-0.5799463
Florida	1.7476714	1.9707777	0.9989801	1.1389667
Georgia	2.2068599	0.4828549	-0.3827351	0.4877015
Hawaii	-0.5712305	-1.4970423	1.2062373	-0.1101813
Idaho	-1.1911350	-0.6090884	-0.7972496	-0.7507699
Illinois	0.5997002	0.9388312	1.2062373	0.2955249
Indiana	-0.1350014	-0.6930840	-0.0373063	-0.0247694
Iowa	-1.2829727	-1.3770485	-0.5899924	-1.0603878
Kansas	-0.4105145	-0.6690853	0.0317794	-0.3450638
Kentucky	0.4389842	-0.7410815	-0.9354212	-0.5265639
Louisiana	1.7476714	0.9388312	0.0317794	0.1033483
Maine	-1.3059321	-1.0530653	-1.0045069	-1.4340645
Maryland	0.8063350	1.5507995	0.1008652	0.7012311
Massachusetts	-0.7778653	-0.2611064	1.3444088	-0.5265639
Michigan	0.9900104	1.0108275	0.5844655	1.4806140
Minnesota	-1.1681755	-1.1850585	0.0317794	-0.6760346
Mississippi	1.9083874	1.0588250	-1.4881072	-0.4411521
Missouri	0.2782682	0.0868755	0.3081225	0.7439370
Montana	-0.4105145	-0.7410815	-0.8663354	-0.5158874
Nebraska	-0.8008247	-0.8250772	-0.2445636	-0.5052109
Nevada	1.0129698	0.9748294	1.0680658	2.6443501
New Hampshire	-1.3059321	-1.3650491	-0.6590781	-1.2525644
New Jersey	-0.0890826	-0.1411127	1.6207519	-0.2596519
New Mexico	0.8292944	1.3708088	0.3081225	1.1603196
New York	0.7604162	0.9988281	1.4134946	0.5197310
North Carolina	1.1966452	1.9947764	-1.4190215	-0.5479169
North Dakota	-1.6044046	-1.5090416	-1.4881072	-1.4874469
Ohio	-0.1120420	-0.6090884	0.6535513	0.0179365
Oklahoma	-0.2727580	-0.2371077	0.1699510	-0.1315342
Oregon	-0.6630682	-0.1411127	0.1008652	0.8613783
Pennsylvania	-0.3416362	-0.7770796	0.4462940	-0.6760346
Rhode Island	-1.0074596	0.0388780	1.4825804	-1.3806822
South Carolina	1.5180772	1.2988126	-1.2117642	0.1353777
South Dakota	-0.9156219	-1.0170672	-1.4190215	-0.9002406
Tennessee	1.2425641	0.2068693	-0.4518209	0.6051428
Texas	1.1277670	0.3628612	0.9989801	0.4556721
Utah	-1.0533784	-0.6090884	0.9989801	0.1780837
Vermont	-1.2829727	-1.4730435	-2.3171363	-1.0710643
Virginia	0.1634711	-0.1771108	-0.1754778	-0.0567989
Washington	-0.8697030	-0.3091039	0.5153798	0.5304074
West Virginia	-0.4793928	-1.0770641	-1.8335360	-1.2739174
Wisconsin	-1.1911350	-1.4130466	0.0317794	-1.1137702
Wyoming	-0.2268391	-0.1171139	-0.3827351	-0.6012993

Número ótimo de clusters

# Silhueta média para kmeans
fviz_nbclust(df, kmeans, method = "silhouette")

# Estatística de lacunas
fviz_nbclust(df, kmeans, method = "gap_stat")

# Método Elbow para kmeans
fviz_nbclust(df, kmeans, method = "wss") +
  geom_vline(xintercept = 4, linetype = 2)

Clusterização k-means

set.seed(123)
km.res=kmeans(df, 4, nstart=25)
print(km.res)

## K-means clustering with 4 clusters of sizes 8, 13, 16, 13
## 
## Cluster means:
##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 4  0.6950701  1.0394414  0.7226370  1.27693964
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              3              3              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              2              4              3              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              2              1              2              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              4              2              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              4              2              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              2              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              4              3              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              2              2              3 
## 
## Within cluster sum of squares by cluster:
## [1]  8.316061 11.952463 16.212213 19.922437
##  (between_SS / total_SS =  71.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Criando novo banco de dados com cluster

aggregate(USArrests, by=list(cluster=km.res$cluster), mean)

##   cluster   Murder   Assault UrbanPop     Rape
## 1       1 13.93750 243.62500 53.75000 21.41250
## 2       2  3.60000  78.53846 52.07692 12.17692
## 3       3  5.65625 138.87500 73.87500 18.78125
## 4       4 10.81538 257.38462 76.00000 33.19231

dd <- cbind(USArrests, cluster = km.res$cluster)
head(dd)

##            Murder Assault UrbanPop Rape cluster
## Alabama      13.2     236       58 21.2       1
## Alaska       10.0     263       48 44.5       4
## Arizona       8.1     294       80 31.0       4
## Arkansas      8.8     190       50 19.5       1
## California    9.0     276       91 40.6       4
## Colorado      7.9     204       78 38.7       4

km.res$cluster

##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              3              3              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              2              4              3              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              2              1              2              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              4              2              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              4              2              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              2              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              4              3              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              2              2              3

km.res$size

## [1]  8 13 16 13

km.res$centers

##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 4  0.6950701  1.0394414  0.7226370  1.27693964

Vizualizando os clusters

fviz_cluster(km.res, data=df,
             geom.ind = c("text"),
             ellipse.type="euclid",
             star.plot=TRUE,
             palette = "Dark2",
             repel=TRUE,
             ggtheme=theme_minimal()
)

Dendrograma

dista=dist(df, method="euclidean")
dista.hc=hclust(d=dista, method="ward.D")
fviz_dend(dista.hc, cex=0.5)

PCA

km.pca <- PCA(
  df,
  graph = F,
  scale.unit = TRUE)

eig.val <- get_eigenvalue(km.pca)
eig.val

##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.4802416        62.006039                    62.00604
## Dim.2  0.9897652        24.744129                    86.75017
## Dim.3  0.3565632         8.914080                    95.66425
## Dim.4  0.1734301         4.335752                   100.00000

fviz_eig(km.pca, addlabels=TRUE)

var <- get_pca_var(km.pca)
var

## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"

#  Coordenadas
head(var$coord)

##              Dim.1      Dim.2      Dim.3       Dim.4
## Murder   0.8439764 -0.4160354  0.2037600  0.27037052
## Assault  0.9184432 -0.1870211  0.1601192 -0.30959159
## UrbanPop 0.4381168  0.8683282  0.2257242  0.05575330
## Rape     0.8558394  0.1664602 -0.4883190  0.03707412

# Cos2: qualidade no mapa do fator
head(var$cos2)

##              Dim.1     Dim.2      Dim.3       Dim.4
## Murder   0.7122962 0.1730854 0.04151814 0.073100217
## Assault  0.8435380 0.0349769 0.02563817 0.095846950
## UrbanPop 0.1919463 0.7539938 0.05095143 0.003108430
## Rape     0.7324611 0.0277090 0.23845544 0.001374491

# Contribuições para os componentes principais
head(var$contrib)

##              Dim.1     Dim.2     Dim.3     Dim.4
## Murder   28.718825 17.487524 11.643977 42.149674
## Assault  34.010315  3.533859  7.190358 55.265468
## UrbanPop  7.739016 76.179065 14.289594  1.792325
## Rape     29.531844  2.799553 66.876071  0.792533

fviz_cos2(km.pca, choice = "var", axes = 1:2)

df %>% cor(method = "spearman") %>% corrplot(.,
                        method = "number",
                        type = "upper",
                        tl.pos = "td")

summary(km.pca)

## 
## Call:
## PCA(X = df, scale.unit = TRUE, graph = F) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4
## Variance               2.480   0.990   0.357   0.173
## % of var.             62.006  24.744   8.914   4.336
## Cumulative % of var.  62.006  86.750  95.664 100.000
## 
## Individuals (the 10 first)
##                 Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3
## Alabama     |  1.574 |  0.986  0.783  0.392 | -1.133  2.596  0.518 |  0.444
## Alaska      |  3.051 |  1.950  3.067  0.409 | -1.073  2.327  0.124 | -2.040
## Arizona     |  2.089 |  1.763  2.507  0.712 |  0.746  1.124  0.127 | -0.055
## Arkansas    |  1.149 | -0.141  0.016  0.015 | -1.120  2.534  0.950 | -0.115
## California  |  3.037 |  2.524  5.137  0.690 |  1.543  4.811  0.258 | -0.599
## Colorado    |  2.114 |  1.515  1.850  0.513 |  0.988  1.971  0.218 | -1.095
## Connecticut |  1.860 | -1.359  1.489  0.534 |  1.089  2.396  0.343 |  0.643
## Delaware    |  1.184 |  0.048  0.002  0.002 |  0.325  0.214  0.075 |  0.719
## Florida     |  3.070 |  3.013  7.321  0.964 | -0.039  0.003  0.000 |  0.577
## Georgia     |  2.366 |  1.639  2.167  0.480 | -1.279  3.305  0.292 |  0.342
##                ctr   cos2  
## Alabama      1.107  0.080 |
## Alaska      23.343  0.447 |
## Arizona      0.017  0.001 |
## Arkansas     0.074  0.010 |
## California   2.010  0.039 |
## Colorado     6.726  0.268 |
## Connecticut  2.321  0.120 |
## Delaware     2.897  0.368 |
## Florida      1.866  0.035 |
## Georgia      0.658  0.021 |
## 
## Variables
##                Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## Murder      |  0.844 28.719  0.712 | -0.416 17.488  0.173 |  0.204 11.644
## Assault     |  0.918 34.010  0.844 | -0.187  3.534  0.035 |  0.160  7.190
## UrbanPop    |  0.438  7.739  0.192 |  0.868 76.179  0.754 |  0.226 14.290
## Rape        |  0.856 29.532  0.732 |  0.166  2.800  0.028 | -0.488 66.876
##               cos2  
## Murder       0.042 |
## Assault      0.026 |
## UrbanPop     0.051 |
## Rape         0.238 |

# Contribuições de variáveis para PC1
fviz_contrib(km.pca, choice = "var", axes = 1, top = 10)

# Contribuições de variáveis para PC2
fviz_contrib(km.pca, choice = "var", axes = 2, top = 10)

# contribuição total para PC1 e PC2 

fviz_contrib(km.pca, choice = "var", axes = 1:2)

fviz_pca_biplot(
  km.pca,
  geom.ind = "text",
  col.var = "contrib",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  legend.title = "Contribuição",
  palette = "Dark2",
  repel = F
)

fviz_pca_ind(
  km.pca,
  geom = "text",
  habillage = as.factor(dd$cluster),
  addEllipses = TRUE,
  palette = "Dark2"
)

fviz_pca_ind(km.pca,
             geom.ind = "text",
             col.ind = as.factor(dd$cluster),
             addEllipses = TRUE, 
             legend.title = "Grupos",
             repel = T,
             palette = "Dark2"
)

df %>% pairs.panels(., 
                 show.points=TRUE, 
                 method = "spearman",
                 gap=0, 
                 stars=TRUE,
                 ci=FALSE,
                 alpha=0.05,
                 cex.cor=1,
                 cex=1.0,
                 breaks="Sturges",
                 rug=FALSE,
                 density=F,
                 hist.col="darkgreen",
                 factor=5,
                 digits=2,
                 ellipses=FALSE,
                 scale=FALSE,
                 smooth=TRUE,
                 lm=T,
                 cor=T
)

dd.pca = prcomp(df, scale = T)


ggbiplot2(
  dd.pca,
  obs.scale = 1,
  var.scale = 1,
  ellipse = T,
  circle = T,
  varname.abbrev = T,
  grupos = as.factor(dd$cluster)
) + theme_minimal() + scale_color_brewer( palette = 'Dark2')

ind <- get_pca_ind(km.pca)
ind

## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"

# Coordenadas de indivíduos
head(ind$coord)

##                 Dim.1      Dim.2       Dim.3        Dim.4
## Alabama     0.9855659 -1.1333924  0.44426879  0.156267145
## Alaska      1.9501378 -1.0732133 -2.04000333 -0.438583440
## Arizona     1.7631635  0.7459568 -0.05478082 -0.834652924
## Arkansas   -0.1414203 -1.1197968 -0.11457369 -0.182810896
## California  2.5239801  1.5429340 -0.59855680 -0.341996478
## Colorado    1.5145629  0.9875551 -1.09500699  0.001464887

# Qualidade dos indivíduos
head(ind$cos2)

##                 Dim.1     Dim.2        Dim.3        Dim.4
## Alabama    0.39203099 0.5184533 0.0796600695 9.855631e-03
## Alaska     0.40854247 0.1237310 0.4470626440 2.066384e-02
## Arizona    0.71222383 0.1274849 0.0006875249 1.596038e-01
## Arkansas   0.01514565 0.9496046 0.0099410946 2.530862e-02
## California 0.69046522 0.2580267 0.0388311881 1.267690e-02
## Colorado   0.51338268 0.2182676 0.2683492506 4.802583e-07

# Contribuições de indivíduos
head(ind$contrib)

##                 Dim.1    Dim.2       Dim.3        Dim.4
## Alabama    0.78326250 2.595723  1.10709555 0.2816053535
## Alaska     3.06666679 2.327394 23.34292392 2.2182475522
## Arizona    2.50680877 1.124411  0.01683258 8.0337329373
## Arkansas   0.01612722 2.533823  0.07363144 0.3853982229
## California 5.13697999 4.810526  2.00957508 1.3488039169
## Colorado   1.84973970 1.970700  6.72554194 0.0000247465

fviz_pca_ind(km.pca)

fviz_pca_ind(km.pca, col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE
)

fviz_contrib(km.pca, choice = "ind", axes = 1:2)

set.seed(123)
my.cont.var <- rnorm(50)

fviz_pca_ind(km.pca, col.ind = my.cont.var,
             gradient.cols = c("blue", "yellow", "red"),
             legend.title = "Cont.Var")

fviz_pca_ind(km.pca,
             geom.ind = "point",
             col.ind = as.factor(dd$cluster),
             palette = "Dark2",
             addEllipses = TRUE, 
             legend.title = "Grupos"
)

fviz_pca_biplot(km.pca, 
                geom.ind = "point",
                fill.ind = as.factor(dd$cluster), col.ind = "black",
                pointshape = 21, 
                pointsize = 2,
                palette = "jco",
                addEllipses = TRUE,
                alpha.var ="contrib", 
                col.var = "contrib",
                gradient.cols = "Set2",
                legend.title = list(fill = "Cluster",
                                    color = "Contrib",
                                    alpha = "Contrib")
)

K-means

Cid Póvoas

2020-07-08