head(jdat)
## # A tibble: 6 × 62
## STATE_ABBR statename state region division rz_ext rz_agr rz_cns rz_neu
## <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AK Alabama 1 3 6 -0.126 -0.105 -0.110 -0.0491
## 2 AL Alaska 2 4 9 0.0430 0.100 0.0806 -0.0152
## 3 AR Arizona 4 4 8 -0.00934 -0.0154 -0.0583 0.0765
## 4 AZ Arkansas 5 3 7 0.0122 -0.00702 0.00843 -0.0436
## 5 CA California 6 4 9 -0.0430 -0.0357 -0.0654 -0.0143
## 6 CO Colorado 8 4 8 -0.00489 -0.0658 -0.0221 -0.0451
## # … with 53 more variables: rz_opn <dbl>, mzextra <dbl>, mzagree <dbl>,
## # mzconsc <dbl>, mzneuro <dbl>, mzopen <dbl>, fzextra <dbl>, fzagree <dbl>,
## # fzconsc <dbl>, fzneuro <dbl>, fzopen <dbl>, E_LT30 <dbl>, A_LT30 <dbl>,
## # C_LT30 <dbl>, N_LT30 <dbl>, O_LT30 <dbl>, E_GT30 <dbl>, A_GT30 <dbl>,
## # C_GT30 <dbl>, N_GT30 <dbl>, O_GT30 <dbl>, GenD_E <dbl>, GenD_A <dbl>,
## # GenD_C <dbl>, GenD_N <dbl>, GenD_O <dbl>, AgeD_E <dbl>, AgeD_A <dbl>,
## # AgeD_C <dbl>, AgeD_N <dbl>, AgeD_O <dbl>, TFR <dbl>, alpha <dbl>, …
# changing from tibble to data frame
jdat <- as.data.frame(jdat)
head(jdat)
## STATE_ABBR statename state region division rz_ext rz_agr rz_cns
## 1 AK Alabama 1 3 6 -0.125762 -0.104588 -0.109966
## 2 AL Alaska 2 4 9 0.043042 0.100483 0.080650
## 3 AR Arizona 4 4 8 -0.009335 -0.015433 -0.058324
## 4 AZ Arkansas 5 3 7 0.012155 -0.007023 0.008430
## 5 CA California 6 4 9 -0.043034 -0.035667 -0.065377
## 6 CO Colorado 8 4 8 -0.004892 -0.065783 -0.022110
## rz_neu rz_opn mzextra mzagree mzconsc mzneuro mzopen fzextra
## 1 -0.049075 0.051630 -0.004242 0.412499 0.301710 -0.326062 0.427654 0.139469
## 2 -0.015238 -0.112345 0.112153 0.504055 0.357393 -0.292434 0.394611 0.250319
## 3 0.076501 -0.080592 0.090463 0.468918 0.313488 -0.242351 0.435278 0.200209
## 4 -0.043650 0.002879 0.100366 0.469782 0.343585 -0.307043 0.452290 0.219097
## 5 -0.014349 0.092209 0.058111 0.463655 0.294992 -0.271535 0.496399 0.187948
## 6 -0.045139 0.073840 0.084083 0.433223 0.310563 -0.315733 0.477640 0.211284
## fzagree fzconsc fzneuro fzopen E_LT30 A_LT30 C_LT30 N_LT30
## 1 0.534365 0.337558 -0.013555 0.419930 -0.140225 -0.133313 -0.142583 -0.041884
## 2 0.635946 0.440848 0.012200 0.300435 0.051194 0.140257 0.130334 -0.030951
## 3 0.574798 0.384941 0.061924 0.311068 -0.009714 -0.012163 -0.048157 0.072481
## 4 0.578078 0.413102 -0.013868 0.365238 -0.003936 0.007324 0.020526 -0.047188
## 5 0.557243 0.376993 -0.004257 0.411690 -0.070760 -0.023765 -0.059610 -0.013238
## 6 0.552301 0.406930 -0.010579 0.407388 -0.007388 -0.066719 -0.012239 -0.044091
## O_LT30 E_GT30 A_GT30 C_GT30 N_GT30 O_GT30 GenD_E
## 1 0.064560 -0.093362 -0.040240 -0.036899 -0.065184 0.022665 -0.143711
## 2 -0.109746 0.017092 -0.026130 -0.077508 0.034782 -0.120618 -0.138166
## 3 -0.036030 -0.008526 -0.022415 -0.080035 0.085087 -0.175739 -0.109746
## 4 0.014119 0.051829 -0.042397 -0.021394 -0.034928 -0.024835 -0.118731
## 5 0.082416 0.027033 -0.065745 -0.079949 -0.017157 0.116955 -0.129837
## 6 0.078307 0.001056 -0.063551 -0.045642 -0.047637 0.063192 -0.127201
## GenD_A GenD_C GenD_N GenD_O AgeD_E AgeD_A AgeD_C
## 1 -0.121866 -0.035848 -0.312507 0.007724 -0.046863 -0.093073 -0.105684
## 2 -0.131891 -0.083455 -0.304634 0.094176 0.034101 0.166388 0.207842
## 3 -0.105880 -0.071453 -0.304275 0.124210 -0.001189 0.010252 0.031878
## 4 -0.108296 -0.069517 -0.293175 0.087052 -0.055765 0.049721 0.041920
## 5 -0.093588 -0.082001 -0.267278 0.084709 -0.097794 0.041980 0.020339
## 6 -0.119078 -0.096367 -0.305154 0.070252 -0.008443 -0.003168 0.033403
## AgeD_N AgeD_O TFR alpha peak stop ageFB t_ageFM nevermar
## 1 0.023300 0.041896 2.3470 13.338400 23.90560 2.854100 24.3 25.85 0.316093
## 2 -0.065733 0.010872 1.8715 11.279618 24.63356 4.435501 23.6 26.60 0.291652
## 3 -0.012606 0.139709 2.0030 12.428397 23.17632 4.598583 23.0 25.65 0.263799
## 4 -0.012260 0.038953 2.0680 10.913909 25.19252 2.924860 24.0 26.80 0.316186
## 5 0.003918 -0.034539 1.9475 7.012688 28.85349 2.941833 25.6 28.30 0.360036
## 6 0.003546 0.015115 1.9240 6.950724 28.27968 3.288943 25.7 27.15 0.306618
## divorce cohabit abortion t_nmf unintprg famplnpw med_inc perAA perHisp
## 1 0.016276 8.2 12.0 31.9 53 147 64576 3.7 5.5
## 2 0.017909 4.8 12.0 45.0 55 147 40474 26.2 3.9
## 3 0.018978 5.3 8.7 39.1 56 152 38307 15.4 6.4
## 4 0.014391 7.7 15.2 41.3 51 151 46789 4.1 29.6
## 5 0.012347 8.0 27.6 33.9 56 245 57708 6.2 37.6
## 6 0.015716 8.1 15.7 29.8 48 80 54046 4.0 20.7
## perFem perBA perUrb voteO vryrel relcons
## 1 47.9 27.2 66.02 38.74 56.3 42.76
## 2 51.5 21.7 59.04 37.89 27.9 18.75
## 3 50.9 19.1 56.16 45.12 35.7 18.08
## 4 50.3 26.3 89.81 38.86 52.1 39.93
## 5 50.3 30.1 94.95 61.01 34.0 11.45
## 6 49.9 35.9 86.15 53.66 32.6 14.78
A Principal Components Analysis was conducted for five personality variables at the state level. The variables are Extraversion (rz_ext), Agreeableness (rz_agr), Conscientiousness (rz_cns), Neuroticism (rz_neu), and Openness (rz_opn).
# Personality variables
# c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")
jdat.pc <-prcomp(jdat[, c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")], center = TRUE, scale = TRUE, retx = T)
summary(jdat.pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.7632 1.0031 0.68075 0.53194 0.37220
## Proportion of Variance 0.6218 0.2012 0.09268 0.05659 0.02771
## Cumulative Proportion 0.6218 0.8230 0.91570 0.97229 1.00000
PC1 explains 62% of variance and PC2 explains 20% of variance. Together the cumulative proportion of variance is 82%.
jdat.pc$rotation
## PC1 PC2 PC3 PC4 PC5
## rz_ext 0.4727570 -0.28812904 0.5841284 -0.31910990 -0.50044514
## rz_agr 0.5046479 0.05736142 -0.1897925 0.79135796 -0.28243859
## rz_cns 0.5265494 0.09646826 0.3029963 0.01126307 0.78835652
## rz_neu -0.2324309 -0.88700949 0.1094530 0.31628415 0.21719671
## rz_opn -0.4365237 0.34292763 0.7204069 0.41443808 -0.03320681
biplot(jdat.pc, scale = 0, xlabs=jdat[,1])
The biplot appears to show that Agreeableness and Conscientiousness are highly correlated.
#Screeplot
screeplot(jdat.pc, type = "l", main = "Scree Plot")
abline(h=1)
jdat.pc$sdev^2
## [1] 3.1088923 1.0061839 0.4634242 0.2829631 0.1385365
# Different function to obtain the eigenvalues
jdat.play <- PCA(jdat[, c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")], scale.unit=T,graph=F)
eigenvalues <- jdat.play$eig
head(eigenvalues[,1:2])
## eigenvalue percentage of variance
## comp 1 3.1088923 62.177847
## comp 2 1.0061839 20.123677
## comp 3 0.4634242 9.268483
## comp 4 0.2829631 5.659262
## comp 5 0.1385365 2.770731
# Plot the first two components
hist(jdat.pc$x[,1])
hist(jdat.pc$x[,2])
cor(jdat.pc$x[,1],jdat.pc$x[,2])
## [1] -1.745457e-16
# Put scores in separate file so that I can merge them with my original data set
scores <- data.frame(jdat.pc$x)
jdat <- cbind(jdat, scores)
# now imposing USA regions on a biplot
ggplot(jdat, aes(PC1, PC2, col = Species, fill = region)) +
stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) +
geom_point(shape = 21, col = "black")
Region 1 is the north east. Region 2 is the midwest. Region 3 is the south. Region 4 is the west. All four regions overlap. Region 3 is wholly contained in Region 4. Region 1 diverges from the other regions to a greater extent than 2, 3, and 4.
It appears you would want “fill” to be a variable with only a few categorical values, such as region.
I tried mapping my division as well, but it isn’t clear what division means.
# now imposing USA divisions on a biplot
ggplot(jdat, aes(PC1, PC2, col = Species, fill = division)) +
stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) +
geom_point(shape = 21, col = "black")
## Too few points to calculate an ellipse
Graphic by division shows less difference than by region.
#correlations of original variables with PC scores
round(cor(jdat[,c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")]), 3)
## rz_ext rz_agr rz_cns rz_neu rz_opn
## rz_ext 1.000 0.622 0.772 -0.098 -0.581
## rz_agr 0.622 1.000 0.777 -0.363 -0.634
## rz_cns 0.772 0.777 1.000 -0.426 -0.582
## rz_neu -0.098 -0.363 -0.426 1.000 0.082
## rz_opn -0.581 -0.634 -0.582 0.082 1.000
# Plot a US map
# Get centroids
centroid_labels <- usmapdata::centroid_labels("states")
# Rename state abbreviation to match centroid_labels
jdat <- rename(jdat, "abbr" = STATE_ABBR)
# Join data to centroids
data_labels <- merge(centroid_labels, jdat, by = "abbr")
# producing map of PC1 values for States
map1dat <- data_labels %>%
select(fips, PC1)
plot_usmap(data = map1dat, values = "PC1")+
labs(title="Junkins Index",
subtitle="PC1",
caption = "Source: Junkins Data") +
theme(panel.background = element_rect(colour = "black"))+
scale_fill_continuous(low = "white", high ="darkred",
name = "Value on Index",label = scales::comma) +
theme(legend.position = "right") +
guides(fill = "none") +
geom_text(data = data_labels, ggplot2::aes(
x = x, y = y,
label = scales::number(PC1, scale = 1, accuracy = .1)), color = "black")
# producing map of PC2 values for States
map2dat <- data_labels %>%
select(fips, PC2)
plot_usmap(data = map2dat, values = "PC2")+
labs(title="Junkins Index",
subtitle="PC2",
caption = "Source: Junkins Data") +
theme(panel.background = element_rect(colour = "black"))+
scale_fill_continuous(low = "white", high ="darkred",
name = "Value on Index",label = scales::comma) +
theme(legend.position = "right") +
guides(fill = "none") +
geom_text(data = data_labels, ggplot2::aes(
x = x, y = y,
label = scales::number(PC1, scale = 1, accuracy = .1)), color = "black")