library(knitr)
library(ggplot2)
library(factoextra)
library(FactoMineR)
library(vegan)
library(psych)
library(corrplot)
library(dplyr)
ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
grupos = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3, alpha = 1,
var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
{
library(ggplot2)
library(plyr)
library(scales)
library(grid)
stopifnot(length(choices) == 2)
# Recover the SVD
if(inherits(pcobj, 'prcomp')){
nobs.factor <- sqrt(nrow(pcobj$x) - 1)
d <- pcobj$sdev
u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$rotation
} else if(inherits(pcobj, 'princomp')) {
nobs.factor <- sqrt(pcobj$n.obs)
d <- pcobj$sdev
u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
v <- pcobj$loadings
} else if(inherits(pcobj, 'PCA')) {
nobs.factor <- sqrt(nrow(pcobj$call$X))
d <- unlist(sqrt(pcobj$eig)[1])
u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
} else {
stop('Expected a object of class prcomp, princomp or PCA')
}
# Scores
df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))
# Directions
v <- sweep(v, 2, d^var.scale, FUN='*')
df.v <- as.data.frame(v[, choices])
names(df.u) <- c('xvar', 'yvar')
names(df.v) <- names(df.u)
if(pc.biplot) {
df.u <- df.u * nobs.factor
}
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
r <- 1
# Scale directions
v.scale <- rowSums(v^2)
df.v <- df.v / sqrt(max(v.scale))
## Scale Scores
r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2))
df.u=.99*df.u/r.scale
# Change the labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized PC', choices, sep='')
} else {
u.axis.labs <- paste('Componente Principal ', choices, sep='')
}
# Append the proportion of explained variance to the axis labels
u.axis.labs <- paste(u.axis.labs,
sprintf('(%0.1f%%)',
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))
# Score Labels
if(!is.null(labels)) {
df.u$labels <- labels
}
# Grouping variable
if(!is.null(grupos)) {
df.u$grupos <- grupos
}
# Variable Names
if(varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
}
# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)
# Base plot
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) +
xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()
if(var.axes) {
# Draw circle
if(circle)
{
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
g <- g + geom_path(data = circle, color = muted('white'),
size = 1/2, alpha = 1/3)
}
# Draw directions
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, 'picas')),
color = muted('red'))
}
# Draw either labels or points
if(!is.null(df.u$labels)) {
if(!is.null(df.u$grupos)) {
g <- g + geom_text(aes(label = labels, color = grupos),
size = labels.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
}
} else {
if(!is.null(df.u$grupos)) {
g <- g + geom_point(aes(color = grupos), alpha = alpha)
} else {
g <- g + geom_point(alpha = alpha)
}
}
# Overlay a concentration ellipse if there are grupos
if(!is.null(df.u$grupos) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))
ell <- ddply(df.u, 'grupos', function(x) {
if(nrow(x) < 2) {
return(NULL)
} else if(nrow(x) == 2) {
sigma <- var(cbind(x$xvar, x$yvar))
} else {
sigma <- diag(c(var(x$xvar), var(x$yvar)))
}
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse.prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
grupos = x$grupos[1])
})
names(ell)[1:2] <- c('xvar', 'yvar')
g <- g + geom_path(data = ell, aes(color = grupos, group = grupos))
}
# Label the variable axes
if(var.axes) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = 'darkred', size = varname.size)
}
# Change the name of the legend for grupos
# if(!is.null(grupos)) {
# g <- g + scale_color_brewer(name = deparse(substitute(grupos)),
# palette = 'Dark2')
# }
# TODO: Add a second set of axes
return(g)
}
Carregamento dos dados
data("USArrests")
USArrests %>% kable
| Murder | Assault | UrbanPop | Rape | |
|---|---|---|---|---|
| Alabama | 13.2 | 236 | 58 | 21.2 |
| Alaska | 10.0 | 263 | 48 | 44.5 |
| Arizona | 8.1 | 294 | 80 | 31.0 |
| Arkansas | 8.8 | 190 | 50 | 19.5 |
| California | 9.0 | 276 | 91 | 40.6 |
| Colorado | 7.9 | 204 | 78 | 38.7 |
| Connecticut | 3.3 | 110 | 77 | 11.1 |
| Delaware | 5.9 | 238 | 72 | 15.8 |
| Florida | 15.4 | 335 | 80 | 31.9 |
| Georgia | 17.4 | 211 | 60 | 25.8 |
| Hawaii | 5.3 | 46 | 83 | 20.2 |
| Idaho | 2.6 | 120 | 54 | 14.2 |
| Illinois | 10.4 | 249 | 83 | 24.0 |
| Indiana | 7.2 | 113 | 65 | 21.0 |
| Iowa | 2.2 | 56 | 57 | 11.3 |
| Kansas | 6.0 | 115 | 66 | 18.0 |
| Kentucky | 9.7 | 109 | 52 | 16.3 |
| Louisiana | 15.4 | 249 | 66 | 22.2 |
| Maine | 2.1 | 83 | 51 | 7.8 |
| Maryland | 11.3 | 300 | 67 | 27.8 |
| Massachusetts | 4.4 | 149 | 85 | 16.3 |
| Michigan | 12.1 | 255 | 74 | 35.1 |
| Minnesota | 2.7 | 72 | 66 | 14.9 |
| Mississippi | 16.1 | 259 | 44 | 17.1 |
| Missouri | 9.0 | 178 | 70 | 28.2 |
| Montana | 6.0 | 109 | 53 | 16.4 |
| Nebraska | 4.3 | 102 | 62 | 16.5 |
| Nevada | 12.2 | 252 | 81 | 46.0 |
| New Hampshire | 2.1 | 57 | 56 | 9.5 |
| New Jersey | 7.4 | 159 | 89 | 18.8 |
| New Mexico | 11.4 | 285 | 70 | 32.1 |
| New York | 11.1 | 254 | 86 | 26.1 |
| North Carolina | 13.0 | 337 | 45 | 16.1 |
| North Dakota | 0.8 | 45 | 44 | 7.3 |
| Ohio | 7.3 | 120 | 75 | 21.4 |
| Oklahoma | 6.6 | 151 | 68 | 20.0 |
| Oregon | 4.9 | 159 | 67 | 29.3 |
| Pennsylvania | 6.3 | 106 | 72 | 14.9 |
| Rhode Island | 3.4 | 174 | 87 | 8.3 |
| South Carolina | 14.4 | 279 | 48 | 22.5 |
| South Dakota | 3.8 | 86 | 45 | 12.8 |
| Tennessee | 13.2 | 188 | 59 | 26.9 |
| Texas | 12.7 | 201 | 80 | 25.5 |
| Utah | 3.2 | 120 | 80 | 22.9 |
| Vermont | 2.2 | 48 | 32 | 11.2 |
| Virginia | 8.5 | 156 | 63 | 20.7 |
| Washington | 4.0 | 145 | 73 | 26.2 |
| West Virginia | 5.7 | 81 | 39 | 9.3 |
| Wisconsin | 2.6 | 53 | 66 | 10.8 |
| Wyoming | 6.8 | 161 | 60 | 15.6 |
Dimensionamento e padronização
df <- scale(USArrests)
df %>% kable()
| Murder | Assault | UrbanPop | Rape | |
|---|---|---|---|---|
| Alabama | 1.2425641 | 0.7828393 | -0.5209066 | -0.0034165 |
| Alaska | 0.5078625 | 1.1068225 | -1.2117642 | 2.4842029 |
| Arizona | 0.0716334 | 1.4788032 | 0.9989801 | 1.0428784 |
| Arkansas | 0.2323494 | 0.2308680 | -1.0735927 | -0.1849166 |
| California | 0.2782682 | 1.2628144 | 1.7589234 | 2.0678203 |
| Colorado | 0.0257146 | 0.3988593 | 0.8608085 | 1.8649672 |
| Connecticut | -1.0304190 | -0.7290821 | 0.7917228 | -1.0817408 |
| Delaware | -0.4334739 | 0.8068381 | 0.4462940 | -0.5799463 |
| Florida | 1.7476714 | 1.9707777 | 0.9989801 | 1.1389667 |
| Georgia | 2.2068599 | 0.4828549 | -0.3827351 | 0.4877015 |
| Hawaii | -0.5712305 | -1.4970423 | 1.2062373 | -0.1101813 |
| Idaho | -1.1911350 | -0.6090884 | -0.7972496 | -0.7507699 |
| Illinois | 0.5997002 | 0.9388312 | 1.2062373 | 0.2955249 |
| Indiana | -0.1350014 | -0.6930840 | -0.0373063 | -0.0247694 |
| Iowa | -1.2829727 | -1.3770485 | -0.5899924 | -1.0603878 |
| Kansas | -0.4105145 | -0.6690853 | 0.0317794 | -0.3450638 |
| Kentucky | 0.4389842 | -0.7410815 | -0.9354212 | -0.5265639 |
| Louisiana | 1.7476714 | 0.9388312 | 0.0317794 | 0.1033483 |
| Maine | -1.3059321 | -1.0530653 | -1.0045069 | -1.4340645 |
| Maryland | 0.8063350 | 1.5507995 | 0.1008652 | 0.7012311 |
| Massachusetts | -0.7778653 | -0.2611064 | 1.3444088 | -0.5265639 |
| Michigan | 0.9900104 | 1.0108275 | 0.5844655 | 1.4806140 |
| Minnesota | -1.1681755 | -1.1850585 | 0.0317794 | -0.6760346 |
| Mississippi | 1.9083874 | 1.0588250 | -1.4881072 | -0.4411521 |
| Missouri | 0.2782682 | 0.0868755 | 0.3081225 | 0.7439370 |
| Montana | -0.4105145 | -0.7410815 | -0.8663354 | -0.5158874 |
| Nebraska | -0.8008247 | -0.8250772 | -0.2445636 | -0.5052109 |
| Nevada | 1.0129698 | 0.9748294 | 1.0680658 | 2.6443501 |
| New Hampshire | -1.3059321 | -1.3650491 | -0.6590781 | -1.2525644 |
| New Jersey | -0.0890826 | -0.1411127 | 1.6207519 | -0.2596519 |
| New Mexico | 0.8292944 | 1.3708088 | 0.3081225 | 1.1603196 |
| New York | 0.7604162 | 0.9988281 | 1.4134946 | 0.5197310 |
| North Carolina | 1.1966452 | 1.9947764 | -1.4190215 | -0.5479169 |
| North Dakota | -1.6044046 | -1.5090416 | -1.4881072 | -1.4874469 |
| Ohio | -0.1120420 | -0.6090884 | 0.6535513 | 0.0179365 |
| Oklahoma | -0.2727580 | -0.2371077 | 0.1699510 | -0.1315342 |
| Oregon | -0.6630682 | -0.1411127 | 0.1008652 | 0.8613783 |
| Pennsylvania | -0.3416362 | -0.7770796 | 0.4462940 | -0.6760346 |
| Rhode Island | -1.0074596 | 0.0388780 | 1.4825804 | -1.3806822 |
| South Carolina | 1.5180772 | 1.2988126 | -1.2117642 | 0.1353777 |
| South Dakota | -0.9156219 | -1.0170672 | -1.4190215 | -0.9002406 |
| Tennessee | 1.2425641 | 0.2068693 | -0.4518209 | 0.6051428 |
| Texas | 1.1277670 | 0.3628612 | 0.9989801 | 0.4556721 |
| Utah | -1.0533784 | -0.6090884 | 0.9989801 | 0.1780837 |
| Vermont | -1.2829727 | -1.4730435 | -2.3171363 | -1.0710643 |
| Virginia | 0.1634711 | -0.1771108 | -0.1754778 | -0.0567989 |
| Washington | -0.8697030 | -0.3091039 | 0.5153798 | 0.5304074 |
| West Virginia | -0.4793928 | -1.0770641 | -1.8335360 | -1.2739174 |
| Wisconsin | -1.1911350 | -1.4130466 | 0.0317794 | -1.1137702 |
| Wyoming | -0.2268391 | -0.1171139 | -0.3827351 | -0.6012993 |
Número ótimo de clusters
# Silhueta mƩdia para kmeans
fviz_nbclust(df, kmeans, method = "silhouette")
# EstatĆstica de lacunas
fviz_nbclust(df, kmeans, method = "gap_stat")
# MƩtodo Elbow para kmeans
fviz_nbclust(df, kmeans, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
Clusterização k-means
set.seed(123)
km.res=kmeans(df, 4, nstart=25)
print(km.res)
## K-means clustering with 4 clusters of sizes 8, 13, 16, 13
##
## Cluster means:
## Murder Assault UrbanPop Rape
## 1 1.4118898 0.8743346 -0.8145211 0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001 0.5758298 -0.26165379
## 4 0.6950701 1.0394414 0.7226370 1.27693964
##
## Clustering vector:
## Alabama Alaska Arizona Arkansas California
## 1 4 4 1 4
## Colorado Connecticut Delaware Florida Georgia
## 4 3 3 4 1
## Hawaii Idaho Illinois Indiana Iowa
## 3 2 4 3 2
## Kansas Kentucky Louisiana Maine Maryland
## 3 2 1 2 4
## Massachusetts Michigan Minnesota Mississippi Missouri
## 3 4 2 1 4
## Montana Nebraska Nevada New Hampshire New Jersey
## 2 2 4 2 3
## New Mexico New York North Carolina North Dakota Ohio
## 4 4 1 2 3
## Oklahoma Oregon Pennsylvania Rhode Island South Carolina
## 3 3 3 3 1
## South Dakota Tennessee Texas Utah Vermont
## 2 1 4 3 2
## Virginia Washington West Virginia Wisconsin Wyoming
## 3 3 2 2 3
##
## Within cluster sum of squares by cluster:
## [1] 8.316061 11.952463 16.212213 19.922437
## (between_SS / total_SS = 71.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
Criando novo banco de dados com cluster
aggregate(USArrests, by=list(cluster=km.res$cluster), mean)
## cluster Murder Assault UrbanPop Rape
## 1 1 13.93750 243.62500 53.75000 21.41250
## 2 2 3.60000 78.53846 52.07692 12.17692
## 3 3 5.65625 138.87500 73.87500 18.78125
## 4 4 10.81538 257.38462 76.00000 33.19231
dd <- cbind(USArrests, cluster = km.res$cluster)
head(dd)
## Murder Assault UrbanPop Rape cluster
## Alabama 13.2 236 58 21.2 1
## Alaska 10.0 263 48 44.5 4
## Arizona 8.1 294 80 31.0 4
## Arkansas 8.8 190 50 19.5 1
## California 9.0 276 91 40.6 4
## Colorado 7.9 204 78 38.7 4
km.res$cluster
## Alabama Alaska Arizona Arkansas California
## 1 4 4 1 4
## Colorado Connecticut Delaware Florida Georgia
## 4 3 3 4 1
## Hawaii Idaho Illinois Indiana Iowa
## 3 2 4 3 2
## Kansas Kentucky Louisiana Maine Maryland
## 3 2 1 2 4
## Massachusetts Michigan Minnesota Mississippi Missouri
## 3 4 2 1 4
## Montana Nebraska Nevada New Hampshire New Jersey
## 2 2 4 2 3
## New Mexico New York North Carolina North Dakota Ohio
## 4 4 1 2 3
## Oklahoma Oregon Pennsylvania Rhode Island South Carolina
## 3 3 3 3 1
## South Dakota Tennessee Texas Utah Vermont
## 2 1 4 3 2
## Virginia Washington West Virginia Wisconsin Wyoming
## 3 3 2 2 3
km.res$size
## [1] 8 13 16 13
km.res$centers
## Murder Assault UrbanPop Rape
## 1 1.4118898 0.8743346 -0.8145211 0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001 0.5758298 -0.26165379
## 4 0.6950701 1.0394414 0.7226370 1.27693964
Vizualizando os clusters
fviz_cluster(km.res, data=df,
geom.ind = c("text"),
ellipse.type="euclid",
star.plot=TRUE,
palette = "Dark2",
repel=TRUE,
ggtheme=theme_minimal()
)
Dendrograma
dista=dist(df, method="euclidean")
dista.hc=hclust(d=dista, method="ward.D")
fviz_dend(dista.hc, cex=0.5)
PCA
km.pca <- PCA(
df,
graph = F,
scale.unit = TRUE)
eig.val <- get_eigenvalue(km.pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.4802416 62.006039 62.00604
## Dim.2 0.9897652 24.744129 86.75017
## Dim.3 0.3565632 8.914080 95.66425
## Dim.4 0.1734301 4.335752 100.00000
fviz_eig(km.pca, addlabels=TRUE)
var <- get_pca_var(km.pca)
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
# Coordenadas
head(var$coord)
## Dim.1 Dim.2 Dim.3 Dim.4
## Murder 0.8439764 -0.4160354 0.2037600 0.27037052
## Assault 0.9184432 -0.1870211 0.1601192 -0.30959159
## UrbanPop 0.4381168 0.8683282 0.2257242 0.05575330
## Rape 0.8558394 0.1664602 -0.4883190 0.03707412
# Cos2: qualidade no mapa do fator
head(var$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4
## Murder 0.7122962 0.1730854 0.04151814 0.073100217
## Assault 0.8435380 0.0349769 0.02563817 0.095846950
## UrbanPop 0.1919463 0.7539938 0.05095143 0.003108430
## Rape 0.7324611 0.0277090 0.23845544 0.001374491
# ContribuiƧƵes para os componentes principais
head(var$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4
## Murder 28.718825 17.487524 11.643977 42.149674
## Assault 34.010315 3.533859 7.190358 55.265468
## UrbanPop 7.739016 76.179065 14.289594 1.792325
## Rape 29.531844 2.799553 66.876071 0.792533
fviz_cos2(km.pca, choice = "var", axes = 1:2)
df %>% cor(method = "spearman") %>% corrplot(.,
method = "number",
type = "upper",
tl.pos = "td")
summary(km.pca)
##
## Call:
## PCA(X = df, scale.unit = TRUE, graph = F)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4
## Variance 2.480 0.990 0.357 0.173
## % of var. 62.006 24.744 8.914 4.336
## Cumulative % of var. 62.006 86.750 95.664 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
## Alabama | 1.574 | 0.986 0.783 0.392 | -1.133 2.596 0.518 | 0.444
## Alaska | 3.051 | 1.950 3.067 0.409 | -1.073 2.327 0.124 | -2.040
## Arizona | 2.089 | 1.763 2.507 0.712 | 0.746 1.124 0.127 | -0.055
## Arkansas | 1.149 | -0.141 0.016 0.015 | -1.120 2.534 0.950 | -0.115
## California | 3.037 | 2.524 5.137 0.690 | 1.543 4.811 0.258 | -0.599
## Colorado | 2.114 | 1.515 1.850 0.513 | 0.988 1.971 0.218 | -1.095
## Connecticut | 1.860 | -1.359 1.489 0.534 | 1.089 2.396 0.343 | 0.643
## Delaware | 1.184 | 0.048 0.002 0.002 | 0.325 0.214 0.075 | 0.719
## Florida | 3.070 | 3.013 7.321 0.964 | -0.039 0.003 0.000 | 0.577
## Georgia | 2.366 | 1.639 2.167 0.480 | -1.279 3.305 0.292 | 0.342
## ctr cos2
## Alabama 1.107 0.080 |
## Alaska 23.343 0.447 |
## Arizona 0.017 0.001 |
## Arkansas 0.074 0.010 |
## California 2.010 0.039 |
## Colorado 6.726 0.268 |
## Connecticut 2.321 0.120 |
## Delaware 2.897 0.368 |
## Florida 1.866 0.035 |
## Georgia 0.658 0.021 |
##
## Variables
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
## Murder | 0.844 28.719 0.712 | -0.416 17.488 0.173 | 0.204 11.644
## Assault | 0.918 34.010 0.844 | -0.187 3.534 0.035 | 0.160 7.190
## UrbanPop | 0.438 7.739 0.192 | 0.868 76.179 0.754 | 0.226 14.290
## Rape | 0.856 29.532 0.732 | 0.166 2.800 0.028 | -0.488 66.876
## cos2
## Murder 0.042 |
## Assault 0.026 |
## UrbanPop 0.051 |
## Rape 0.238 |
# ContribuiƧƵes de variƔveis para PC1
fviz_contrib(km.pca, choice = "var", axes = 1, top = 10)
# ContribuiƧƵes de variƔveis para PC2
fviz_contrib(km.pca, choice = "var", axes = 2, top = 10)
# contribuição total para PC1 e PC2
fviz_contrib(km.pca, choice = "var", axes = 1:2)
fviz_pca_biplot(
km.pca,
geom.ind = "text",
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Contribuição",
palette = "Dark2",
repel = F
)
fviz_pca_ind(
km.pca,
geom = "text",
habillage = as.factor(dd$cluster),
addEllipses = TRUE,
palette = "Dark2"
)
fviz_pca_ind(km.pca,
geom.ind = "text",
col.ind = as.factor(dd$cluster),
addEllipses = TRUE,
legend.title = "Grupos",
repel = T,
palette = "Dark2"
)
df %>% pairs.panels(.,
show.points=TRUE,
method = "spearman",
gap=0,
stars=TRUE,
ci=FALSE,
alpha=0.05,
cex.cor=1,
cex=1.0,
breaks="Sturges",
rug=FALSE,
density=F,
hist.col="darkgreen",
factor=5,
digits=2,
ellipses=FALSE,
scale=FALSE,
smooth=TRUE,
lm=T,
cor=T
)
dd.pca = prcomp(df, scale = T)
ggbiplot2(
dd.pca,
obs.scale = 1,
var.scale = 1,
ellipse = T,
circle = T,
varname.abbrev = T,
grupos = as.factor(dd$cluster)
) + theme_minimal() + scale_color_brewer( palette = 'Dark2')
ind <- get_pca_ind(km.pca)
ind
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
# Coordenadas de indivĆduos
head(ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4
## Alabama 0.9855659 -1.1333924 0.44426879 0.156267145
## Alaska 1.9501378 -1.0732133 -2.04000333 -0.438583440
## Arizona 1.7631635 0.7459568 -0.05478082 -0.834652924
## Arkansas -0.1414203 -1.1197968 -0.11457369 -0.182810896
## California 2.5239801 1.5429340 -0.59855680 -0.341996478
## Colorado 1.5145629 0.9875551 -1.09500699 0.001464887
# Qualidade dos indivĆduos
head(ind$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4
## Alabama 0.39203099 0.5184533 0.0796600695 9.855631e-03
## Alaska 0.40854247 0.1237310 0.4470626440 2.066384e-02
## Arizona 0.71222383 0.1274849 0.0006875249 1.596038e-01
## Arkansas 0.01514565 0.9496046 0.0099410946 2.530862e-02
## California 0.69046522 0.2580267 0.0388311881 1.267690e-02
## Colorado 0.51338268 0.2182676 0.2683492506 4.802583e-07
# ContribuiƧƵes de indivĆduos
head(ind$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4
## Alabama 0.78326250 2.595723 1.10709555 0.2816053535
## Alaska 3.06666679 2.327394 23.34292392 2.2182475522
## Arizona 2.50680877 1.124411 0.01683258 8.0337329373
## Arkansas 0.01612722 2.533823 0.07363144 0.3853982229
## California 5.13697999 4.810526 2.00957508 1.3488039169
## Colorado 1.84973970 1.970700 6.72554194 0.0000247465
fviz_pca_ind(km.pca)
fviz_pca_ind(km.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_contrib(km.pca, choice = "ind", axes = 1:2)
set.seed(123)
my.cont.var <- rnorm(50)
fviz_pca_ind(km.pca, col.ind = my.cont.var,
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")
fviz_pca_ind(km.pca,
geom.ind = "point",
col.ind = as.factor(dd$cluster),
palette = "Dark2",
addEllipses = TRUE,
legend.title = "Grupos"
)
fviz_pca_biplot(km.pca,
geom.ind = "point",
fill.ind = as.factor(dd$cluster), col.ind = "black",
pointshape = 21,
pointsize = 2,
palette = "jco",
addEllipses = TRUE,
alpha.var ="contrib",
col.var = "contrib",
gradient.cols = "Set2",
legend.title = list(fill = "Cluster",
color = "Contrib",
alpha = "Contrib")
)