head(jdat)
## # A tibble: 6 × 62
## STATE_ABBR statename state region division rz_ext rz_agr rz_cns rz_neu
## <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AK Alabama 1 3 6 -0.126 -0.105 -0.110 -0.0491
## 2 AL Alaska 2 4 9 0.0430 0.100 0.0806 -0.0152
## 3 AR Arizona 4 4 8 -0.00934 -0.0154 -0.0583 0.0765
## 4 AZ Arkansas 5 3 7 0.0122 -0.00702 0.00843 -0.0436
## 5 CA California 6 4 9 -0.0430 -0.0357 -0.0654 -0.0143
## 6 CO Colorado 8 4 8 -0.00489 -0.0658 -0.0221 -0.0451
## # … with 53 more variables: rz_opn <dbl>, mzextra <dbl>, mzagree <dbl>,
## # mzconsc <dbl>, mzneuro <dbl>, mzopen <dbl>, fzextra <dbl>, fzagree <dbl>,
## # fzconsc <dbl>, fzneuro <dbl>, fzopen <dbl>, E_LT30 <dbl>, A_LT30 <dbl>,
## # C_LT30 <dbl>, N_LT30 <dbl>, O_LT30 <dbl>, E_GT30 <dbl>, A_GT30 <dbl>,
## # C_GT30 <dbl>, N_GT30 <dbl>, O_GT30 <dbl>, GenD_E <dbl>, GenD_A <dbl>,
## # GenD_C <dbl>, GenD_N <dbl>, GenD_O <dbl>, AgeD_E <dbl>, AgeD_A <dbl>,
## # AgeD_C <dbl>, AgeD_N <dbl>, AgeD_O <dbl>, TFR <dbl>, alpha <dbl>, …
# changing from tibble to data frame
jdat <- as.data.frame(jdat)
head(jdat)
## STATE_ABBR statename state region division rz_ext rz_agr rz_cns
## 1 AK Alabama 1 3 6 -0.125762 -0.104588 -0.109966
## 2 AL Alaska 2 4 9 0.043042 0.100483 0.080650
## 3 AR Arizona 4 4 8 -0.009335 -0.015433 -0.058324
## 4 AZ Arkansas 5 3 7 0.012155 -0.007023 0.008430
## 5 CA California 6 4 9 -0.043034 -0.035667 -0.065377
## 6 CO Colorado 8 4 8 -0.004892 -0.065783 -0.022110
## rz_neu rz_opn mzextra mzagree mzconsc mzneuro mzopen fzextra
## 1 -0.049075 0.051630 -0.004242 0.412499 0.301710 -0.326062 0.427654 0.139469
## 2 -0.015238 -0.112345 0.112153 0.504055 0.357393 -0.292434 0.394611 0.250319
## 3 0.076501 -0.080592 0.090463 0.468918 0.313488 -0.242351 0.435278 0.200209
## 4 -0.043650 0.002879 0.100366 0.469782 0.343585 -0.307043 0.452290 0.219097
## 5 -0.014349 0.092209 0.058111 0.463655 0.294992 -0.271535 0.496399 0.187948
## 6 -0.045139 0.073840 0.084083 0.433223 0.310563 -0.315733 0.477640 0.211284
## fzagree fzconsc fzneuro fzopen E_LT30 A_LT30 C_LT30 N_LT30
## 1 0.534365 0.337558 -0.013555 0.419930 -0.140225 -0.133313 -0.142583 -0.041884
## 2 0.635946 0.440848 0.012200 0.300435 0.051194 0.140257 0.130334 -0.030951
## 3 0.574798 0.384941 0.061924 0.311068 -0.009714 -0.012163 -0.048157 0.072481
## 4 0.578078 0.413102 -0.013868 0.365238 -0.003936 0.007324 0.020526 -0.047188
## 5 0.557243 0.376993 -0.004257 0.411690 -0.070760 -0.023765 -0.059610 -0.013238
## 6 0.552301 0.406930 -0.010579 0.407388 -0.007388 -0.066719 -0.012239 -0.044091
## O_LT30 E_GT30 A_GT30 C_GT30 N_GT30 O_GT30 GenD_E
## 1 0.064560 -0.093362 -0.040240 -0.036899 -0.065184 0.022665 -0.143711
## 2 -0.109746 0.017092 -0.026130 -0.077508 0.034782 -0.120618 -0.138166
## 3 -0.036030 -0.008526 -0.022415 -0.080035 0.085087 -0.175739 -0.109746
## 4 0.014119 0.051829 -0.042397 -0.021394 -0.034928 -0.024835 -0.118731
## 5 0.082416 0.027033 -0.065745 -0.079949 -0.017157 0.116955 -0.129837
## 6 0.078307 0.001056 -0.063551 -0.045642 -0.047637 0.063192 -0.127201
## GenD_A GenD_C GenD_N GenD_O AgeD_E AgeD_A AgeD_C
## 1 -0.121866 -0.035848 -0.312507 0.007724 -0.046863 -0.093073 -0.105684
## 2 -0.131891 -0.083455 -0.304634 0.094176 0.034101 0.166388 0.207842
## 3 -0.105880 -0.071453 -0.304275 0.124210 -0.001189 0.010252 0.031878
## 4 -0.108296 -0.069517 -0.293175 0.087052 -0.055765 0.049721 0.041920
## 5 -0.093588 -0.082001 -0.267278 0.084709 -0.097794 0.041980 0.020339
## 6 -0.119078 -0.096367 -0.305154 0.070252 -0.008443 -0.003168 0.033403
## AgeD_N AgeD_O TFR alpha peak stop ageFB t_ageFM nevermar
## 1 0.023300 0.041896 2.3470 13.338400 23.90560 2.854100 24.3 25.85 0.316093
## 2 -0.065733 0.010872 1.8715 11.279618 24.63356 4.435501 23.6 26.60 0.291652
## 3 -0.012606 0.139709 2.0030 12.428397 23.17632 4.598583 23.0 25.65 0.263799
## 4 -0.012260 0.038953 2.0680 10.913909 25.19252 2.924860 24.0 26.80 0.316186
## 5 0.003918 -0.034539 1.9475 7.012688 28.85349 2.941833 25.6 28.30 0.360036
## 6 0.003546 0.015115 1.9240 6.950724 28.27968 3.288943 25.7 27.15 0.306618
## divorce cohabit abortion t_nmf unintprg famplnpw med_inc perAA perHisp
## 1 0.016276 8.2 12.0 31.9 53 147 64576 3.7 5.5
## 2 0.017909 4.8 12.0 45.0 55 147 40474 26.2 3.9
## 3 0.018978 5.3 8.7 39.1 56 152 38307 15.4 6.4
## 4 0.014391 7.7 15.2 41.3 51 151 46789 4.1 29.6
## 5 0.012347 8.0 27.6 33.9 56 245 57708 6.2 37.6
## 6 0.015716 8.1 15.7 29.8 48 80 54046 4.0 20.7
## perFem perBA perUrb voteO vryrel relcons
## 1 47.9 27.2 66.02 38.74 56.3 42.76
## 2 51.5 21.7 59.04 37.89 27.9 18.75
## 3 50.9 19.1 56.16 45.12 35.7 18.08
## 4 50.3 26.3 89.81 38.86 52.1 39.93
## 5 50.3 30.1 94.95 61.01 34.0 11.45
## 6 49.9 35.9 86.15 53.66 32.6 14.78
A Principal Components Analysis was conducted for five personality variables at the state level. The variables are Extraversion (rz_ext), Agreeableness (rz_agr), Conscientiousness (rz_cns), Neuroticism (rz_neu), and Openness (rz_opn).
# Personality variables
# c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")
jdat.pc <-prcomp(jdat[, c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")], center = TRUE, scale = TRUE, retx = T)
summary(jdat.pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.7632 1.0031 0.68075 0.53194 0.37220
## Proportion of Variance 0.6218 0.2012 0.09268 0.05659 0.02771
## Cumulative Proportion 0.6218 0.8230 0.91570 0.97229 1.00000
PC1 explains 62% of variance and PC2 explains 20% of variance. Together the cumulative proportion of variance is 82%.
jdat.pc$rotation
## PC1 PC2 PC3 PC4 PC5
## rz_ext 0.4727570 -0.28812904 0.5841284 -0.31910990 -0.50044514
## rz_agr 0.5046479 0.05736142 -0.1897925 0.79135796 -0.28243859
## rz_cns 0.5265494 0.09646826 0.3029963 0.01126307 0.78835652
## rz_neu -0.2324309 -0.88700949 0.1094530 0.31628415 0.21719671
## rz_opn -0.4365237 0.34292763 0.7204069 0.41443808 -0.03320681
biplot(jdat.pc, scale = 0, xlabs=jdat[,1])
The biplot appears to show that Agreeableness and Conscientiousness are highly correlated.
#Screeplot
screeplot(jdat.pc, type = "l", main = "Scree Plot")
abline(h=1)
jdat.pc$sdev^2
## [1] 3.1088923 1.0061839 0.4634242 0.2829631 0.1385365
# Different function to obtain the eigenvalues
jdat.play <- PCA(jdat[, c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")], scale.unit=T,graph=F)
eigenvalues <- jdat.play$eig
head(eigenvalues[,1:2])
## eigenvalue percentage of variance
## comp 1 3.1088923 62.177847
## comp 2 1.0061839 20.123677
## comp 3 0.4634242 9.268483
## comp 4 0.2829631 5.659262
## comp 5 0.1385365 2.770731
# Plot the first two components
hist(jdat.pc$x[,1])
hist(jdat.pc$x[,2])
cor(jdat.pc$x[,1],jdat.pc$x[,2])
## [1] -1.745457e-16
# Put scores in separate file so that I can merge them with my original data set
scores <- data.frame(jdat.pc$x)
jdat <- cbind(jdat, scores)
# now imposing USA regions on a biplot
ggplot(jdat, aes(PC1, PC2, col = Species, fill = region)) +
stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) +
geom_point(shape = 21, col = "black")
Region 1 is the north east. Region 2 is the midwest. Region 3 is the south. Region 4 is the west. All four regions overlap. Region 3 is wholly contained in Region 4. Region 1 diverges from the other regions to a greater extent than 2, 3, and 4.
It appears you would want “fill” to be a variable with only a few categorical values, such as region.
I tried mapping my division as well, but it isn’t clear what division means.
# now imposing USA divisions on a biplot
ggplot(jdat, aes(PC1, PC2, col = Species, fill = division)) +
stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) +
geom_point(shape = 21, col = "black")
## Too few points to calculate an ellipse
Graphic by division shows less difference than by region.
#correlations of original variables with PC scores
round(cor(jdat[,c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")]), 3)
## rz_ext rz_agr rz_cns rz_neu rz_opn
## rz_ext 1.000 0.622 0.772 -0.098 -0.581
## rz_agr 0.622 1.000 0.777 -0.363 -0.634
## rz_cns 0.772 0.777 1.000 -0.426 -0.582
## rz_neu -0.098 -0.363 -0.426 1.000 0.082
## rz_opn -0.581 -0.634 -0.582 0.082 1.000
# Plot a US map
# Get centroids
centroid_labels <- usmapdata::centroid_labels("states")
# Rename state abbreviation to match centroid_labels
jdat <- rename(jdat, "abbr" = STATE_ABBR)
# Join data to centroids
data_labels <- merge(centroid_labels, jdat, by = "abbr")
# producing map of PC1 values for States
map1dat <- data_labels %>%
select(fips, PC1)
plot_usmap(data = map1dat, values = "PC1")+
labs(title="Junkins Index",
subtitle="PC1",
caption = "Source: Junkins Data") +
theme(panel.background = element_rect(colour = "black"))+
scale_fill_continuous(low = "white", high ="darkred",
name = "Value on Index",label = scales::comma) +
theme(legend.position = "right") +
guides(fill = "none") +
geom_text(data = data_labels, ggplot2::aes(
x = x, y = y,
label = scales::number(PC1, scale = 1, accuracy = .1)), color = "black")
# producing map of PC2 values for States
map2dat <- data_labels %>%
select(fips, PC2)
plot_usmap(data = map2dat, values = "PC2")+
labs(title="Junkins Index",
subtitle="PC2",
caption = "Source: Junkins Data") +
theme(panel.background = element_rect(colour = "black"))+
scale_fill_continuous(low = "white", high ="darkred",
name = "Value on Index",label = scales::comma) +
theme(legend.position = "right") +
guides(fill = "none") +
geom_text(data = data_labels, ggplot2::aes(
x = x, y = y,
label = scales::number(PC1, scale = 1, accuracy = .1)), color = "black")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
# I need to center and scale my variables
# c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")
jdat$Extraversion <- scale(jdat$rz_ext, center = TRUE, scale = TRUE)
jdat$Agreeableness <- scale(jdat$rz_agr, center = TRUE, scale = TRUE)
jdat$Conscientiousness <- scale(jdat$rz_cns, center = TRUE, scale = TRUE)
jdat$Neuroticism <- scale(jdat$rz_neu, center = TRUE, scale = TRUE)
jdat$Openness <- scale(jdat$rz_opn, center = TRUE, scale = TRUE)
STATEDATA <- subset(jdat, select = c("Extraversion", "Agreeableness", "Conscientiousness", "Neuroticism", "Openness"))
head(STATEDATA)
## Extraversion Agreeableness Conscientiousness Neuroticism Openness
## 1 -3.5516082 -1.8638115 -2.2141392 -1.0510473 1.1440545
## 2 1.2637407 1.8008589 1.6336407 -0.3856869 -1.4936254
## 3 -0.2303799 -0.2705892 -1.1716923 1.4182409 -0.9828509
## 4 0.3826497 -0.1203004 0.1758058 -0.9443718 0.3598524
## 5 -1.1916867 -0.6321759 -1.3140644 -0.3682058 1.7968028
## 6 -0.1036377 -1.1703564 -0.4406755 -0.9736510 1.5013215
#DETERMINE HOW MANY CLUSTERS IS OPTIMAL
#########################################
#plot number of clusters vs. total within sum of squares
fviz_nbclust(STATEDATA, kmeans, method = "wss")
#calculate gap statistic based on number of clusters
gap_stat <- clusGap(STATEDATA,
FUN = kmeans,
nstart = 25,
K.max = 10,
B = 50)
#plot number of clusters vs. gap statistic
fviz_gap_stat(gap_stat)
##########################################
#PERFORM K-MEANS CLUSTERING WITH OPTIMAL K
##########################################
#make this example reproducible
set.seed(1)
#perform k-means clustering with k = 3 clusters
km <- kmeans(STATEDATA, centers = 3, nstart = 25)
#view results
km
## K-means clustering with 3 clusters of sizes 24, 13, 13
##
## Cluster means:
## Extraversion Agreeableness Conscientiousness Neuroticism Openness
## 1 0.6809430 0.6450706 0.8641922 -0.3815411 -0.5791547
## 2 -0.9838597 -0.6627354 -0.7672295 -0.5966224 0.7617837
## 3 -0.2732658 -0.5281641 -0.8282022 1.3010059 0.3074249
##
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 2 1 3 1 2 2 3 1 1 1 2 1 1 1 1 1 1 3 3 2 3 1 1 1 1 2
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## 1 1 1 3 3 2 2 3 3 3 2 1 3 1 1 1 2 1 2 3 2 1 3 2
##
## Within cluster sum of squares by cluster:
## [1] 52.86982 43.97169 22.71133
## (between_SS / total_SS = 51.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
#plot results of final k-means model
fviz_cluster(km, data = STATEDATA)
#find mean of each cluster
aggregate(STATEDATA, by=list(cluster=km$cluster), mean)
## cluster Extraversion Agreeableness Conscientiousness Neuroticism Openness
## 1 1 0.6809430 0.6450706 0.8641922 -0.3815411 -0.5791547
## 2 2 -0.9838597 -0.6627354 -0.7672295 -0.5966224 0.7617837
## 3 3 -0.2732658 -0.5281641 -0.8282022 1.3010059 0.3074249
jdat <- cbind(jdat, cluster = km$cluster)
jdat$newcluster <- car::recode(jdat$cluster,"1=3;2=1;3=2")
boxplot(jdat$PC1 ~ jdat$newcluster,
col='steelblue',
xlab='Cluster',
ylab='Value on PC1')
boxplot(jdat$PC2 ~ jdat$newcluster,
col='steelblue',
xlab='Cluster',
ylab='Value on PC2')
#add cluster assigment to original data
final_data <- cbind(jdat, kcluster = km$cluster)
#view final data
head(final_data)
## abbr statename state region division rz_ext rz_agr rz_cns rz_neu
## 1 AK Alabama 1 3 6 -0.125762 -0.104588 -0.109966 -0.049075
## 2 AL Alaska 2 4 9 0.043042 0.100483 0.080650 -0.015238
## 3 AR Arizona 4 4 8 -0.009335 -0.015433 -0.058324 0.076501
## 4 AZ Arkansas 5 3 7 0.012155 -0.007023 0.008430 -0.043650
## 5 CA California 6 4 9 -0.043034 -0.035667 -0.065377 -0.014349
## 6 CO Colorado 8 4 8 -0.004892 -0.065783 -0.022110 -0.045139
## rz_opn mzextra mzagree mzconsc mzneuro mzopen fzextra fzagree
## 1 0.051630 -0.004242 0.412499 0.301710 -0.326062 0.427654 0.139469 0.534365
## 2 -0.112345 0.112153 0.504055 0.357393 -0.292434 0.394611 0.250319 0.635946
## 3 -0.080592 0.090463 0.468918 0.313488 -0.242351 0.435278 0.200209 0.574798
## 4 0.002879 0.100366 0.469782 0.343585 -0.307043 0.452290 0.219097 0.578078
## 5 0.092209 0.058111 0.463655 0.294992 -0.271535 0.496399 0.187948 0.557243
## 6 0.073840 0.084083 0.433223 0.310563 -0.315733 0.477640 0.211284 0.552301
## fzconsc fzneuro fzopen E_LT30 A_LT30 C_LT30 N_LT30 O_LT30
## 1 0.337558 -0.013555 0.419930 -0.140225 -0.133313 -0.142583 -0.041884 0.064560
## 2 0.440848 0.012200 0.300435 0.051194 0.140257 0.130334 -0.030951 -0.109746
## 3 0.384941 0.061924 0.311068 -0.009714 -0.012163 -0.048157 0.072481 -0.036030
## 4 0.413102 -0.013868 0.365238 -0.003936 0.007324 0.020526 -0.047188 0.014119
## 5 0.376993 -0.004257 0.411690 -0.070760 -0.023765 -0.059610 -0.013238 0.082416
## 6 0.406930 -0.010579 0.407388 -0.007388 -0.066719 -0.012239 -0.044091 0.078307
## E_GT30 A_GT30 C_GT30 N_GT30 O_GT30 GenD_E GenD_A
## 1 -0.093362 -0.040240 -0.036899 -0.065184 0.022665 -0.143711 -0.121866
## 2 0.017092 -0.026130 -0.077508 0.034782 -0.120618 -0.138166 -0.131891
## 3 -0.008526 -0.022415 -0.080035 0.085087 -0.175739 -0.109746 -0.105880
## 4 0.051829 -0.042397 -0.021394 -0.034928 -0.024835 -0.118731 -0.108296
## 5 0.027033 -0.065745 -0.079949 -0.017157 0.116955 -0.129837 -0.093588
## 6 0.001056 -0.063551 -0.045642 -0.047637 0.063192 -0.127201 -0.119078
## GenD_C GenD_N GenD_O AgeD_E AgeD_A AgeD_C AgeD_N
## 1 -0.035848 -0.312507 0.007724 -0.046863 -0.093073 -0.105684 0.023300
## 2 -0.083455 -0.304634 0.094176 0.034101 0.166388 0.207842 -0.065733
## 3 -0.071453 -0.304275 0.124210 -0.001189 0.010252 0.031878 -0.012606
## 4 -0.069517 -0.293175 0.087052 -0.055765 0.049721 0.041920 -0.012260
## 5 -0.082001 -0.267278 0.084709 -0.097794 0.041980 0.020339 0.003918
## 6 -0.096367 -0.305154 0.070252 -0.008443 -0.003168 0.033403 0.003546
## AgeD_O TFR alpha peak stop ageFB t_ageFM nevermar divorce
## 1 0.041896 2.3470 13.338400 23.90560 2.854100 24.3 25.85 0.316093 0.016276
## 2 0.010872 1.8715 11.279618 24.63356 4.435501 23.6 26.60 0.291652 0.017909
## 3 0.139709 2.0030 12.428397 23.17632 4.598583 23.0 25.65 0.263799 0.018978
## 4 0.038953 2.0680 10.913909 25.19252 2.924860 24.0 26.80 0.316186 0.014391
## 5 -0.034539 1.9475 7.012688 28.85349 2.941833 25.6 28.30 0.360036 0.012347
## 6 0.015115 1.9240 6.950724 28.27968 3.288943 25.7 27.15 0.306618 0.015716
## cohabit abortion t_nmf unintprg famplnpw med_inc perAA perHisp perFem perBA
## 1 8.2 12.0 31.9 53 147 64576 3.7 5.5 47.9 27.2
## 2 4.8 12.0 45.0 55 147 40474 26.2 3.9 51.5 21.7
## 3 5.3 8.7 39.1 56 152 38307 15.4 6.4 50.9 19.1
## 4 7.7 15.2 41.3 51 151 46789 4.1 29.6 50.3 26.3
## 5 8.0 27.6 33.9 56 245 57708 6.2 37.6 50.3 30.1
## 6 8.1 15.7 29.8 48 80 54046 4.0 20.7 49.9 35.9
## perUrb voteO vryrel relcons PC1 PC2 PC3 PC4
## 1 66.02 38.74 56.3 42.76 -4.0405808 2.0274333 -1.6825893 -0.2248166
## 2 59.04 37.89 27.9 18.75 3.1080828 -0.2733236 -0.2268482 0.2992498
## 3 56.16 45.12 35.7 18.08 -0.7630252 -1.6572132 -0.9910559 -0.1125770
## 4 89.81 38.86 52.1 39.93 0.2751785 0.8608766 0.4554930 -0.3648812
## 5 94.95 61.01 34.0 11.45 -2.2730888 1.1231070 0.2798554 0.4934071
## 6 86.15 53.66 32.6 14.78 -1.3007065 1.2986988 1.0030575 -0.5838080
## PC5 Extraversion Agreeableness Conscientiousness Neuroticism
## 1 0.2919919 -3.5516082 -1.8638115 -2.2141392 -1.0510473
## 2 0.1126550 1.2637407 1.8008589 1.6336407 -0.3856869
## 3 -0.3913193 -0.2303799 -0.2705892 -1.1716923 1.4182409
## 4 -0.2359841 0.3826497 -0.1203004 0.1758058 -0.9443718
## 5 -0.4006657 -1.1916867 -0.6321759 -1.3140644 -0.3682058
## 6 -0.2263186 -0.1036377 -1.1703564 -0.4406755 -0.9736510
## Openness cluster newcluster kcluster
## 1 1.1440545 2 1 2
## 2 -1.4936254 1 3 1
## 3 -0.9828509 3 2 3
## 4 0.3598524 1 3 1
## 5 1.7968028 2 1 2
## 6 1.5013215 2 1 2
Comment on your overall findings and what you would do differently were you to start over on Assignments 2 and 3.
I would have started with more variables, not just the personality traits. I could run additional PCA models to determine which variables to remove.
I need to understand better how to choose the number of clusters and how to interpret each cluster. I ran the function with 3 clusters, but the charts seem to show I could have chosen up to 7 clusters. Perhaps if I remove variables with high colinearity, this would reduce the optimal number of clusters.
Cluster 1 seems to show high extraversion, agreeableness, and conscientiousness with low neuroticism and openness.
Cluster 2 seems to show negative values in all personality traits, except openness.
Cluster 3 has negative extraversion, agreeableness, and conscientiousness, with high neuroticism and positive openness.
I used cbind to add the kcluster assignment back to the original dataframe. This allows me to associate each state to a cluster. In the future, I can create tables and charts to show the states in each cluster.