library(knitr)
library(ggplot2)
library(factoextra)
library(FactoMineR)
library(vegan)
library(psych)
library(corrplot)
library(dplyr)

ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, 
                   obs.scale = 1 - scale, var.scale = scale, 
                   grupos = NULL, ellipse = FALSE, ellipse.prob = 0.68, 
                   labels = NULL, labels.size = 3, alpha = 1, 
                   var.axes = TRUE, 
                   circle = FALSE, circle.prob = 0.69, 
                   varname.size = 3, varname.adjust = 1.5, 
                   varname.abbrev = FALSE, ...)
{
  library(ggplot2)
  library(plyr)
  library(scales)
  library(grid)
  
  stopifnot(length(choices) == 2)
  
  # Recover the SVD
  if(inherits(pcobj, 'prcomp')){
    nobs.factor <- sqrt(nrow(pcobj$x) - 1)
    d <- pcobj$sdev
    u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
    v <- pcobj$rotation
  } else if(inherits(pcobj, 'princomp')) {
    nobs.factor <- sqrt(pcobj$n.obs)
    d <- pcobj$sdev
    u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
    v <- pcobj$loadings
  } else if(inherits(pcobj, 'PCA')) {
    nobs.factor <- sqrt(nrow(pcobj$call$X))
    d <- unlist(sqrt(pcobj$eig)[1])
    u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
    v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
  } else {
    stop('Expected a object of class prcomp, princomp or PCA')
  }
  
  # Scores
  df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))
  
  # Directions
  v <- sweep(v, 2, d^var.scale, FUN='*')
  df.v <- as.data.frame(v[, choices])
  
  names(df.u) <- c('xvar', 'yvar')
  names(df.v) <- names(df.u)
  
  if(pc.biplot) {
    df.u <- df.u * nobs.factor
  }
  
  # Scale the radius of the correlation circle so that it corresponds to 
  # a data ellipse for the standardized PC scores
  r <- 1
  
  # Scale directions
  v.scale <- rowSums(v^2)
  df.v <- df.v / sqrt(max(v.scale))
  
  ## Scale Scores
  r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2))
  df.u=.99*df.u/r.scale
  
  # Change the labels for the axes
  if(obs.scale == 0) {
    u.axis.labs <- paste('standardized PC', choices, sep='')
  } else {
    u.axis.labs <- paste('Componente Principal ', choices, sep='')
  }
  
  # Append the proportion of explained variance to the axis labels
  u.axis.labs <- paste(u.axis.labs, 
                       sprintf('(%0.1f%%)', 
                               100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))
  
  # Score Labels
  if(!is.null(labels)) {
    df.u$labels <- labels
  }
  
  # Grouping variable
  if(!is.null(grupos)) {
    df.u$grupos <- grupos
  }
  
  # Variable Names
  if(varname.abbrev) {
    df.v$varname <- abbreviate(rownames(v))
  } else {
    df.v$varname <- rownames(v)
  }
  
  # Variables for text label placement
  df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
  df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
  
  # Base plot
  g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + 
    xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()
  
  if(var.axes) {
    # Draw circle
    if(circle) 
    {
      theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
      circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
      g <- g + geom_path(data = circle, color = muted('white'), 
                         size = 1/2, alpha = 1/3)
    }
    
    # Draw directions
    g <- g +
      geom_segment(data = df.v,
                   aes(x = 0, y = 0, xend = xvar, yend = yvar),
                   arrow = arrow(length = unit(1/2, 'picas')), 
                   color = muted('red'))
  }
  
  # Draw either labels or points
  if(!is.null(df.u$labels)) {
    if(!is.null(df.u$grupos)) {
      g <- g + geom_text(aes(label = labels, color = grupos), 
                         size = labels.size)
    } else {
      g <- g + geom_text(aes(label = labels), size = labels.size)      
    }
  } else {
    if(!is.null(df.u$grupos)) {
      g <- g + geom_point(aes(color = grupos), alpha = alpha)
    } else {
      g <- g + geom_point(alpha = alpha)      
    }
  }
  
  # Overlay a concentration ellipse if there are grupos
  if(!is.null(df.u$grupos) && ellipse) {
    theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
    circle <- cbind(cos(theta), sin(theta))
    
    ell <- ddply(df.u, 'grupos', function(x) {
      if(nrow(x) < 2) {
        return(NULL)
      } else if(nrow(x) == 2) {
        sigma <- var(cbind(x$xvar, x$yvar))
      } else {
        sigma <- diag(c(var(x$xvar), var(x$yvar)))
      }
      mu <- c(mean(x$xvar), mean(x$yvar))
      ed <- sqrt(qchisq(ellipse.prob, df = 2))
      data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'), 
                 grupos = x$grupos[1])
    })
    names(ell)[1:2] <- c('xvar', 'yvar')
    g <- g + geom_path(data = ell, aes(color = grupos, group = grupos))
  }
  
  # Label the variable axes
  if(var.axes) {
    g <- g + 
      geom_text(data = df.v, 
                aes(label = varname, x = xvar, y = yvar, 
                    angle = angle, hjust = hjust), 
                color = 'darkred', size = varname.size)
  }
  # Change the name of the legend for grupos
  # if(!is.null(grupos)) {
  #   g <- g + scale_color_brewer(name = deparse(substitute(grupos)), 
  #                               palette = 'Dark2')
  # }
  
  # TODO: Add a second set of axes
  
  return(g)
}

Carregamento dos dados

data("USArrests")      
USArrests %>% kable
Murder Assault UrbanPop Rape
Alabama 13.2 236 58 21.2
Alaska 10.0 263 48 44.5
Arizona 8.1 294 80 31.0
Arkansas 8.8 190 50 19.5
California 9.0 276 91 40.6
Colorado 7.9 204 78 38.7
Connecticut 3.3 110 77 11.1
Delaware 5.9 238 72 15.8
Florida 15.4 335 80 31.9
Georgia 17.4 211 60 25.8
Hawaii 5.3 46 83 20.2
Idaho 2.6 120 54 14.2
Illinois 10.4 249 83 24.0
Indiana 7.2 113 65 21.0
Iowa 2.2 56 57 11.3
Kansas 6.0 115 66 18.0
Kentucky 9.7 109 52 16.3
Louisiana 15.4 249 66 22.2
Maine 2.1 83 51 7.8
Maryland 11.3 300 67 27.8
Massachusetts 4.4 149 85 16.3
Michigan 12.1 255 74 35.1
Minnesota 2.7 72 66 14.9
Mississippi 16.1 259 44 17.1
Missouri 9.0 178 70 28.2
Montana 6.0 109 53 16.4
Nebraska 4.3 102 62 16.5
Nevada 12.2 252 81 46.0
New Hampshire 2.1 57 56 9.5
New Jersey 7.4 159 89 18.8
New Mexico 11.4 285 70 32.1
New York 11.1 254 86 26.1
North Carolina 13.0 337 45 16.1
North Dakota 0.8 45 44 7.3
Ohio 7.3 120 75 21.4
Oklahoma 6.6 151 68 20.0
Oregon 4.9 159 67 29.3
Pennsylvania 6.3 106 72 14.9
Rhode Island 3.4 174 87 8.3
South Carolina 14.4 279 48 22.5
South Dakota 3.8 86 45 12.8
Tennessee 13.2 188 59 26.9
Texas 12.7 201 80 25.5
Utah 3.2 120 80 22.9
Vermont 2.2 48 32 11.2
Virginia 8.5 156 63 20.7
Washington 4.0 145 73 26.2
West Virginia 5.7 81 39 9.3
Wisconsin 2.6 53 66 10.8
Wyoming 6.8 161 60 15.6

Dimensionamento e padronização

df <- scale(USArrests) 

df %>% kable()
Murder Assault UrbanPop Rape
Alabama 1.2425641 0.7828393 -0.5209066 -0.0034165
Alaska 0.5078625 1.1068225 -1.2117642 2.4842029
Arizona 0.0716334 1.4788032 0.9989801 1.0428784
Arkansas 0.2323494 0.2308680 -1.0735927 -0.1849166
California 0.2782682 1.2628144 1.7589234 2.0678203
Colorado 0.0257146 0.3988593 0.8608085 1.8649672
Connecticut -1.0304190 -0.7290821 0.7917228 -1.0817408
Delaware -0.4334739 0.8068381 0.4462940 -0.5799463
Florida 1.7476714 1.9707777 0.9989801 1.1389667
Georgia 2.2068599 0.4828549 -0.3827351 0.4877015
Hawaii -0.5712305 -1.4970423 1.2062373 -0.1101813
Idaho -1.1911350 -0.6090884 -0.7972496 -0.7507699
Illinois 0.5997002 0.9388312 1.2062373 0.2955249
Indiana -0.1350014 -0.6930840 -0.0373063 -0.0247694
Iowa -1.2829727 -1.3770485 -0.5899924 -1.0603878
Kansas -0.4105145 -0.6690853 0.0317794 -0.3450638
Kentucky 0.4389842 -0.7410815 -0.9354212 -0.5265639
Louisiana 1.7476714 0.9388312 0.0317794 0.1033483
Maine -1.3059321 -1.0530653 -1.0045069 -1.4340645
Maryland 0.8063350 1.5507995 0.1008652 0.7012311
Massachusetts -0.7778653 -0.2611064 1.3444088 -0.5265639
Michigan 0.9900104 1.0108275 0.5844655 1.4806140
Minnesota -1.1681755 -1.1850585 0.0317794 -0.6760346
Mississippi 1.9083874 1.0588250 -1.4881072 -0.4411521
Missouri 0.2782682 0.0868755 0.3081225 0.7439370
Montana -0.4105145 -0.7410815 -0.8663354 -0.5158874
Nebraska -0.8008247 -0.8250772 -0.2445636 -0.5052109
Nevada 1.0129698 0.9748294 1.0680658 2.6443501
New Hampshire -1.3059321 -1.3650491 -0.6590781 -1.2525644
New Jersey -0.0890826 -0.1411127 1.6207519 -0.2596519
New Mexico 0.8292944 1.3708088 0.3081225 1.1603196
New York 0.7604162 0.9988281 1.4134946 0.5197310
North Carolina 1.1966452 1.9947764 -1.4190215 -0.5479169
North Dakota -1.6044046 -1.5090416 -1.4881072 -1.4874469
Ohio -0.1120420 -0.6090884 0.6535513 0.0179365
Oklahoma -0.2727580 -0.2371077 0.1699510 -0.1315342
Oregon -0.6630682 -0.1411127 0.1008652 0.8613783
Pennsylvania -0.3416362 -0.7770796 0.4462940 -0.6760346
Rhode Island -1.0074596 0.0388780 1.4825804 -1.3806822
South Carolina 1.5180772 1.2988126 -1.2117642 0.1353777
South Dakota -0.9156219 -1.0170672 -1.4190215 -0.9002406
Tennessee 1.2425641 0.2068693 -0.4518209 0.6051428
Texas 1.1277670 0.3628612 0.9989801 0.4556721
Utah -1.0533784 -0.6090884 0.9989801 0.1780837
Vermont -1.2829727 -1.4730435 -2.3171363 -1.0710643
Virginia 0.1634711 -0.1771108 -0.1754778 -0.0567989
Washington -0.8697030 -0.3091039 0.5153798 0.5304074
West Virginia -0.4793928 -1.0770641 -1.8335360 -1.2739174
Wisconsin -1.1911350 -1.4130466 0.0317794 -1.1137702
Wyoming -0.2268391 -0.1171139 -0.3827351 -0.6012993

Número ótimo de clusters

# Silhueta mƩdia para kmeans
fviz_nbclust(df, kmeans, method = "silhouette")

# EstatĆ­stica de lacunas
fviz_nbclust(df, kmeans, method = "gap_stat")

# MƩtodo Elbow para kmeans
fviz_nbclust(df, kmeans, method = "wss") +
  geom_vline(xintercept = 4, linetype = 2)

Clusterização k-means

set.seed(123)
km.res=kmeans(df, 4, nstart=25)
print(km.res)
## K-means clustering with 4 clusters of sizes 8, 13, 16, 13
## 
## Cluster means:
##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 4  0.6950701  1.0394414  0.7226370  1.27693964
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              3              3              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              2              4              3              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              2              1              2              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              4              2              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              4              2              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              2              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              4              3              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              2              2              3 
## 
## Within cluster sum of squares by cluster:
## [1]  8.316061 11.952463 16.212213 19.922437
##  (between_SS / total_SS =  71.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Criando novo banco de dados com cluster

aggregate(USArrests, by=list(cluster=km.res$cluster), mean)
##   cluster   Murder   Assault UrbanPop     Rape
## 1       1 13.93750 243.62500 53.75000 21.41250
## 2       2  3.60000  78.53846 52.07692 12.17692
## 3       3  5.65625 138.87500 73.87500 18.78125
## 4       4 10.81538 257.38462 76.00000 33.19231
dd <- cbind(USArrests, cluster = km.res$cluster)
head(dd)
##            Murder Assault UrbanPop Rape cluster
## Alabama      13.2     236       58 21.2       1
## Alaska       10.0     263       48 44.5       4
## Arizona       8.1     294       80 31.0       4
## Arkansas      8.8     190       50 19.5       1
## California    9.0     276       91 40.6       4
## Colorado      7.9     204       78 38.7       4
km.res$cluster
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              3              3              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              2              4              3              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              2              1              2              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              4              2              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              4              2              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              2              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              4              3              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              2              2              3
km.res$size
## [1]  8 13 16 13
km.res$centers
##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 4  0.6950701  1.0394414  0.7226370  1.27693964

Vizualizando os clusters

fviz_cluster(km.res, data=df,
             geom.ind = c("text"),
             ellipse.type="euclid",
             star.plot=TRUE,
             palette = "Dark2",
             repel=TRUE,
             ggtheme=theme_minimal()
)

Dendrograma

dista=dist(df, method="euclidean")
dista.hc=hclust(d=dista, method="ward.D")
fviz_dend(dista.hc, cex=0.5)

PCA

km.pca <- PCA(
  df,
  graph = F,
  scale.unit = TRUE)

eig.val <- get_eigenvalue(km.pca)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.4802416        62.006039                    62.00604
## Dim.2  0.9897652        24.744129                    86.75017
## Dim.3  0.3565632         8.914080                    95.66425
## Dim.4  0.1734301         4.335752                   100.00000
fviz_eig(km.pca, addlabels=TRUE)

var <- get_pca_var(km.pca)
var
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
#  Coordenadas
head(var$coord)
##              Dim.1      Dim.2      Dim.3       Dim.4
## Murder   0.8439764 -0.4160354  0.2037600  0.27037052
## Assault  0.9184432 -0.1870211  0.1601192 -0.30959159
## UrbanPop 0.4381168  0.8683282  0.2257242  0.05575330
## Rape     0.8558394  0.1664602 -0.4883190  0.03707412
# Cos2: qualidade no mapa do fator
head(var$cos2)
##              Dim.1     Dim.2      Dim.3       Dim.4
## Murder   0.7122962 0.1730854 0.04151814 0.073100217
## Assault  0.8435380 0.0349769 0.02563817 0.095846950
## UrbanPop 0.1919463 0.7539938 0.05095143 0.003108430
## Rape     0.7324611 0.0277090 0.23845544 0.001374491
# ContribuiƧƵes para os componentes principais
head(var$contrib)
##              Dim.1     Dim.2     Dim.3     Dim.4
## Murder   28.718825 17.487524 11.643977 42.149674
## Assault  34.010315  3.533859  7.190358 55.265468
## UrbanPop  7.739016 76.179065 14.289594  1.792325
## Rape     29.531844  2.799553 66.876071  0.792533
fviz_cos2(km.pca, choice = "var", axes = 1:2)

df %>% cor(method = "spearman") %>% corrplot(.,
                        method = "number",
                        type = "upper",
                        tl.pos = "td")

summary(km.pca)
## 
## Call:
## PCA(X = df, scale.unit = TRUE, graph = F) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4
## Variance               2.480   0.990   0.357   0.173
## % of var.             62.006  24.744   8.914   4.336
## Cumulative % of var.  62.006  86.750  95.664 100.000
## 
## Individuals (the 10 first)
##                 Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3
## Alabama     |  1.574 |  0.986  0.783  0.392 | -1.133  2.596  0.518 |  0.444
## Alaska      |  3.051 |  1.950  3.067  0.409 | -1.073  2.327  0.124 | -2.040
## Arizona     |  2.089 |  1.763  2.507  0.712 |  0.746  1.124  0.127 | -0.055
## Arkansas    |  1.149 | -0.141  0.016  0.015 | -1.120  2.534  0.950 | -0.115
## California  |  3.037 |  2.524  5.137  0.690 |  1.543  4.811  0.258 | -0.599
## Colorado    |  2.114 |  1.515  1.850  0.513 |  0.988  1.971  0.218 | -1.095
## Connecticut |  1.860 | -1.359  1.489  0.534 |  1.089  2.396  0.343 |  0.643
## Delaware    |  1.184 |  0.048  0.002  0.002 |  0.325  0.214  0.075 |  0.719
## Florida     |  3.070 |  3.013  7.321  0.964 | -0.039  0.003  0.000 |  0.577
## Georgia     |  2.366 |  1.639  2.167  0.480 | -1.279  3.305  0.292 |  0.342
##                ctr   cos2  
## Alabama      1.107  0.080 |
## Alaska      23.343  0.447 |
## Arizona      0.017  0.001 |
## Arkansas     0.074  0.010 |
## California   2.010  0.039 |
## Colorado     6.726  0.268 |
## Connecticut  2.321  0.120 |
## Delaware     2.897  0.368 |
## Florida      1.866  0.035 |
## Georgia      0.658  0.021 |
## 
## Variables
##                Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## Murder      |  0.844 28.719  0.712 | -0.416 17.488  0.173 |  0.204 11.644
## Assault     |  0.918 34.010  0.844 | -0.187  3.534  0.035 |  0.160  7.190
## UrbanPop    |  0.438  7.739  0.192 |  0.868 76.179  0.754 |  0.226 14.290
## Rape        |  0.856 29.532  0.732 |  0.166  2.800  0.028 | -0.488 66.876
##               cos2  
## Murder       0.042 |
## Assault      0.026 |
## UrbanPop     0.051 |
## Rape         0.238 |
# ContribuiƧƵes de variƔveis para PC1
fviz_contrib(km.pca, choice = "var", axes = 1, top = 10)

# ContribuiƧƵes de variƔveis para PC2
fviz_contrib(km.pca, choice = "var", axes = 2, top = 10)

# contribuição total para PC1 e PC2 

fviz_contrib(km.pca, choice = "var", axes = 1:2)

fviz_pca_biplot(
  km.pca,
  geom.ind = "text",
  col.var = "contrib",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  legend.title = "Contribuição",
  palette = "Dark2",
  repel = F
)

fviz_pca_ind(
  km.pca,
  geom = "text",
  habillage = as.factor(dd$cluster),
  addEllipses = TRUE,
  palette = "Dark2"
)

fviz_pca_ind(km.pca,
             geom.ind = "text",
             col.ind = as.factor(dd$cluster),
             addEllipses = TRUE, 
             legend.title = "Grupos",
             repel = T,
             palette = "Dark2"
)

df %>% pairs.panels(., 
                 show.points=TRUE, 
                 method = "spearman",
                 gap=0, 
                 stars=TRUE,
                 ci=FALSE,
                 alpha=0.05,
                 cex.cor=1,
                 cex=1.0,
                 breaks="Sturges",
                 rug=FALSE,
                 density=F,
                 hist.col="darkgreen",
                 factor=5,
                 digits=2,
                 ellipses=FALSE,
                 scale=FALSE,
                 smooth=TRUE,
                 lm=T,
                 cor=T
) 

dd.pca = prcomp(df, scale = T)


ggbiplot2(
  dd.pca,
  obs.scale = 1,
  var.scale = 1,
  ellipse = T,
  circle = T,
  varname.abbrev = T,
  grupos = as.factor(dd$cluster)
) + theme_minimal() + scale_color_brewer( palette = 'Dark2')

ind <- get_pca_ind(km.pca)
ind
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
# Coordenadas de indivĆ­duos
head(ind$coord)
##                 Dim.1      Dim.2       Dim.3        Dim.4
## Alabama     0.9855659 -1.1333924  0.44426879  0.156267145
## Alaska      1.9501378 -1.0732133 -2.04000333 -0.438583440
## Arizona     1.7631635  0.7459568 -0.05478082 -0.834652924
## Arkansas   -0.1414203 -1.1197968 -0.11457369 -0.182810896
## California  2.5239801  1.5429340 -0.59855680 -0.341996478
## Colorado    1.5145629  0.9875551 -1.09500699  0.001464887
# Qualidade dos indivĆ­duos
head(ind$cos2)
##                 Dim.1     Dim.2        Dim.3        Dim.4
## Alabama    0.39203099 0.5184533 0.0796600695 9.855631e-03
## Alaska     0.40854247 0.1237310 0.4470626440 2.066384e-02
## Arizona    0.71222383 0.1274849 0.0006875249 1.596038e-01
## Arkansas   0.01514565 0.9496046 0.0099410946 2.530862e-02
## California 0.69046522 0.2580267 0.0388311881 1.267690e-02
## Colorado   0.51338268 0.2182676 0.2683492506 4.802583e-07
# ContribuiƧƵes de indivƭduos
head(ind$contrib)
##                 Dim.1    Dim.2       Dim.3        Dim.4
## Alabama    0.78326250 2.595723  1.10709555 0.2816053535
## Alaska     3.06666679 2.327394 23.34292392 2.2182475522
## Arizona    2.50680877 1.124411  0.01683258 8.0337329373
## Arkansas   0.01612722 2.533823  0.07363144 0.3853982229
## California 5.13697999 4.810526  2.00957508 1.3488039169
## Colorado   1.84973970 1.970700  6.72554194 0.0000247465
fviz_pca_ind(km.pca)

fviz_pca_ind(km.pca, col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE
)

fviz_contrib(km.pca, choice = "ind", axes = 1:2)

set.seed(123)
my.cont.var <- rnorm(50)

fviz_pca_ind(km.pca, col.ind = my.cont.var,
             gradient.cols = c("blue", "yellow", "red"),
             legend.title = "Cont.Var")

fviz_pca_ind(km.pca,
             geom.ind = "point",
             col.ind = as.factor(dd$cluster),
             palette = "Dark2",
             addEllipses = TRUE, 
             legend.title = "Grupos"
)

fviz_pca_biplot(km.pca, 
                geom.ind = "point",
                fill.ind = as.factor(dd$cluster), col.ind = "black",
                pointshape = 21, 
                pointsize = 2,
                palette = "jco",
                addEllipses = TRUE,
                alpha.var ="contrib", 
                col.var = "contrib",
                gradient.cols = "Set2",
                legend.title = list(fill = "Cluster",
                                    color = "Contrib",
                                    alpha = "Contrib")
)