Brian Surratt

head(jdat)
## # A tibble: 6 × 62
##   STATE_ABBR statename  state region division   rz_ext   rz_agr   rz_cns  rz_neu
##   <chr>      <chr>      <dbl> <chr>  <chr>       <dbl>    <dbl>    <dbl>   <dbl>
## 1 AK         Alabama        1 3      6        -0.126   -0.105   -0.110   -0.0491
## 2 AL         Alaska         2 4      9         0.0430   0.100    0.0806  -0.0152
## 3 AR         Arizona        4 4      8        -0.00934 -0.0154  -0.0583   0.0765
## 4 AZ         Arkansas       5 3      7         0.0122  -0.00702  0.00843 -0.0436
## 5 CA         California     6 4      9        -0.0430  -0.0357  -0.0654  -0.0143
## 6 CO         Colorado       8 4      8        -0.00489 -0.0658  -0.0221  -0.0451
## # … with 53 more variables: rz_opn <dbl>, mzextra <dbl>, mzagree <dbl>,
## #   mzconsc <dbl>, mzneuro <dbl>, mzopen <dbl>, fzextra <dbl>, fzagree <dbl>,
## #   fzconsc <dbl>, fzneuro <dbl>, fzopen <dbl>, E_LT30 <dbl>, A_LT30 <dbl>,
## #   C_LT30 <dbl>, N_LT30 <dbl>, O_LT30 <dbl>, E_GT30 <dbl>, A_GT30 <dbl>,
## #   C_GT30 <dbl>, N_GT30 <dbl>, O_GT30 <dbl>, GenD_E <dbl>, GenD_A <dbl>,
## #   GenD_C <dbl>, GenD_N <dbl>, GenD_O <dbl>, AgeD_E <dbl>, AgeD_A <dbl>,
## #   AgeD_C <dbl>, AgeD_N <dbl>, AgeD_O <dbl>, TFR <dbl>, alpha <dbl>, …
# changing from tibble to data frame

jdat <- as.data.frame(jdat)

head(jdat)
##   STATE_ABBR  statename state region division    rz_ext    rz_agr    rz_cns
## 1         AK    Alabama     1      3        6 -0.125762 -0.104588 -0.109966
## 2         AL     Alaska     2      4        9  0.043042  0.100483  0.080650
## 3         AR    Arizona     4      4        8 -0.009335 -0.015433 -0.058324
## 4         AZ   Arkansas     5      3        7  0.012155 -0.007023  0.008430
## 5         CA California     6      4        9 -0.043034 -0.035667 -0.065377
## 6         CO   Colorado     8      4        8 -0.004892 -0.065783 -0.022110
##      rz_neu    rz_opn   mzextra  mzagree  mzconsc   mzneuro   mzopen  fzextra
## 1 -0.049075  0.051630 -0.004242 0.412499 0.301710 -0.326062 0.427654 0.139469
## 2 -0.015238 -0.112345  0.112153 0.504055 0.357393 -0.292434 0.394611 0.250319
## 3  0.076501 -0.080592  0.090463 0.468918 0.313488 -0.242351 0.435278 0.200209
## 4 -0.043650  0.002879  0.100366 0.469782 0.343585 -0.307043 0.452290 0.219097
## 5 -0.014349  0.092209  0.058111 0.463655 0.294992 -0.271535 0.496399 0.187948
## 6 -0.045139  0.073840  0.084083 0.433223 0.310563 -0.315733 0.477640 0.211284
##    fzagree  fzconsc   fzneuro   fzopen    E_LT30    A_LT30    C_LT30    N_LT30
## 1 0.534365 0.337558 -0.013555 0.419930 -0.140225 -0.133313 -0.142583 -0.041884
## 2 0.635946 0.440848  0.012200 0.300435  0.051194  0.140257  0.130334 -0.030951
## 3 0.574798 0.384941  0.061924 0.311068 -0.009714 -0.012163 -0.048157  0.072481
## 4 0.578078 0.413102 -0.013868 0.365238 -0.003936  0.007324  0.020526 -0.047188
## 5 0.557243 0.376993 -0.004257 0.411690 -0.070760 -0.023765 -0.059610 -0.013238
## 6 0.552301 0.406930 -0.010579 0.407388 -0.007388 -0.066719 -0.012239 -0.044091
##      O_LT30    E_GT30    A_GT30    C_GT30    N_GT30    O_GT30    GenD_E
## 1  0.064560 -0.093362 -0.040240 -0.036899 -0.065184  0.022665 -0.143711
## 2 -0.109746  0.017092 -0.026130 -0.077508  0.034782 -0.120618 -0.138166
## 3 -0.036030 -0.008526 -0.022415 -0.080035  0.085087 -0.175739 -0.109746
## 4  0.014119  0.051829 -0.042397 -0.021394 -0.034928 -0.024835 -0.118731
## 5  0.082416  0.027033 -0.065745 -0.079949 -0.017157  0.116955 -0.129837
## 6  0.078307  0.001056 -0.063551 -0.045642 -0.047637  0.063192 -0.127201
##      GenD_A    GenD_C    GenD_N   GenD_O    AgeD_E    AgeD_A    AgeD_C
## 1 -0.121866 -0.035848 -0.312507 0.007724 -0.046863 -0.093073 -0.105684
## 2 -0.131891 -0.083455 -0.304634 0.094176  0.034101  0.166388  0.207842
## 3 -0.105880 -0.071453 -0.304275 0.124210 -0.001189  0.010252  0.031878
## 4 -0.108296 -0.069517 -0.293175 0.087052 -0.055765  0.049721  0.041920
## 5 -0.093588 -0.082001 -0.267278 0.084709 -0.097794  0.041980  0.020339
## 6 -0.119078 -0.096367 -0.305154 0.070252 -0.008443 -0.003168  0.033403
##      AgeD_N    AgeD_O    TFR     alpha     peak     stop ageFB t_ageFM nevermar
## 1  0.023300  0.041896 2.3470 13.338400 23.90560 2.854100  24.3   25.85 0.316093
## 2 -0.065733  0.010872 1.8715 11.279618 24.63356 4.435501  23.6   26.60 0.291652
## 3 -0.012606  0.139709 2.0030 12.428397 23.17632 4.598583  23.0   25.65 0.263799
## 4 -0.012260  0.038953 2.0680 10.913909 25.19252 2.924860  24.0   26.80 0.316186
## 5  0.003918 -0.034539 1.9475  7.012688 28.85349 2.941833  25.6   28.30 0.360036
## 6  0.003546  0.015115 1.9240  6.950724 28.27968 3.288943  25.7   27.15 0.306618
##    divorce cohabit abortion t_nmf unintprg famplnpw med_inc perAA perHisp
## 1 0.016276     8.2     12.0  31.9       53      147   64576   3.7     5.5
## 2 0.017909     4.8     12.0  45.0       55      147   40474  26.2     3.9
## 3 0.018978     5.3      8.7  39.1       56      152   38307  15.4     6.4
## 4 0.014391     7.7     15.2  41.3       51      151   46789   4.1    29.6
## 5 0.012347     8.0     27.6  33.9       56      245   57708   6.2    37.6
## 6 0.015716     8.1     15.7  29.8       48       80   54046   4.0    20.7
##   perFem perBA perUrb voteO vryrel relcons
## 1   47.9  27.2  66.02 38.74   56.3   42.76
## 2   51.5  21.7  59.04 37.89   27.9   18.75
## 3   50.9  19.1  56.16 45.12   35.7   18.08
## 4   50.3  26.3  89.81 38.86   52.1   39.93
## 5   50.3  30.1  94.95 61.01   34.0   11.45
## 6   49.9  35.9  86.15 53.66   32.6   14.78

A Principal Components Analysis was conducted for five personality variables at the state level. The variables are Extraversion (rz_ext), Agreeableness (rz_agr), Conscientiousness (rz_cns), Neuroticism (rz_neu), and Openness (rz_opn).

# Personality variables
# c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")

jdat.pc <-prcomp(jdat[, c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")], center = TRUE, scale = TRUE, retx = T)

summary(jdat.pc)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5
## Standard deviation     1.7632 1.0031 0.68075 0.53194 0.37220
## Proportion of Variance 0.6218 0.2012 0.09268 0.05659 0.02771
## Cumulative Proportion  0.6218 0.8230 0.91570 0.97229 1.00000

PC1 explains 62% of variance and PC2 explains 20% of variance. Together the cumulative proportion of variance is 82%.

jdat.pc$rotation
##               PC1         PC2        PC3         PC4         PC5
## rz_ext  0.4727570 -0.28812904  0.5841284 -0.31910990 -0.50044514
## rz_agr  0.5046479  0.05736142 -0.1897925  0.79135796 -0.28243859
## rz_cns  0.5265494  0.09646826  0.3029963  0.01126307  0.78835652
## rz_neu -0.2324309 -0.88700949  0.1094530  0.31628415  0.21719671
## rz_opn -0.4365237  0.34292763  0.7204069  0.41443808 -0.03320681
biplot(jdat.pc, scale = 0, xlabs=jdat[,1])

The biplot appears to show that Agreeableness and Conscientiousness are highly correlated.

#Screeplot

screeplot(jdat.pc, type = "l", main = "Scree Plot")

abline(h=1)

jdat.pc$sdev^2
## [1] 3.1088923 1.0061839 0.4634242 0.2829631 0.1385365
# Different function to obtain the eigenvalues

jdat.play <- PCA(jdat[, c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")], scale.unit=T,graph=F)

eigenvalues <- jdat.play$eig

head(eigenvalues[,1:2])
##        eigenvalue percentage of variance
## comp 1  3.1088923              62.177847
## comp 2  1.0061839              20.123677
## comp 3  0.4634242               9.268483
## comp 4  0.2829631               5.659262
## comp 5  0.1385365               2.770731
# Plot the first two components

hist(jdat.pc$x[,1])

hist(jdat.pc$x[,2])

cor(jdat.pc$x[,1],jdat.pc$x[,2])
## [1] -1.745457e-16
# Put scores in separate file so that I can merge them with my original data set

scores <- data.frame(jdat.pc$x)

jdat <- cbind(jdat, scores)
# now imposing USA regions on a biplot

ggplot(jdat, aes(PC1, PC2, col = Species, fill = region)) +
  stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) +
  geom_point(shape = 21, col = "black")

Region 1 is the north east. Region 2 is the midwest. Region 3 is the south. Region 4 is the west. All four regions overlap. Region 3 is wholly contained in Region 4. Region 1 diverges from the other regions to a greater extent than 2, 3, and 4.

It appears you would want “fill” to be a variable with only a few categorical values, such as region.

I tried mapping my division as well, but it isn’t clear what division means.

# now imposing USA divisions on a biplot

ggplot(jdat, aes(PC1, PC2, col = Species, fill = division)) +
  stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) +
  geom_point(shape = 21, col = "black")
## Too few points to calculate an ellipse

Graphic by division shows less difference than by region.

#correlations of original variables with PC scores

round(cor(jdat[,c("rz_ext", "rz_agr", "rz_cns", "rz_neu", "rz_opn")]), 3)
##        rz_ext rz_agr rz_cns rz_neu rz_opn
## rz_ext  1.000  0.622  0.772 -0.098 -0.581
## rz_agr  0.622  1.000  0.777 -0.363 -0.634
## rz_cns  0.772  0.777  1.000 -0.426 -0.582
## rz_neu -0.098 -0.363 -0.426  1.000  0.082
## rz_opn -0.581 -0.634 -0.582  0.082  1.000
# Plot a US map
# Get centroids
centroid_labels <- usmapdata::centroid_labels("states")
# Rename state abbreviation to match centroid_labels
jdat <- rename(jdat, "abbr" = STATE_ABBR)
# Join data to centroids
data_labels <- merge(centroid_labels, jdat, by = "abbr")
# producing map of PC1 values for States

map1dat <- data_labels %>%
  select(fips, PC1)

plot_usmap(data = map1dat, values = "PC1")+
labs(title="Junkins Index", 
       subtitle="PC1", 
        caption = "Source: Junkins Data") +
    theme(panel.background = element_rect(colour = "black"))+
    scale_fill_continuous(low = "white", high ="darkred", 
                          name = "Value on Index",label = scales::comma) + 
    theme(legend.position = "right") +
    guides(fill = "none") +
  geom_text(data = data_labels, ggplot2::aes(
    x = x, y = y,
    label = scales::number(PC1, scale = 1, accuracy = .1)), color = "black")

# producing map of PC2 values for States

map2dat <- data_labels %>%
  select(fips, PC2)

plot_usmap(data = map2dat, values = "PC2")+
labs(title="Junkins Index", 
       subtitle="PC2", 
        caption = "Source: Junkins Data") +
    theme(panel.background = element_rect(colour = "black"))+
    scale_fill_continuous(low = "white", high ="darkred", 
                          name = "Value on Index",label = scales::comma) + 
    theme(legend.position = "right") +
    guides(fill = "none") +
  geom_text(data = data_labels, ggplot2::aes(
    x = x, y = y,
    label = scales::number(PC1, scale = 1, accuracy = .1)), color = "black")