# bringing in LTDB raw data
setwd("~/Google Drive/dissertation_files/LTDB")
ltdb2019 <- read_csv("LTDB_Std_201519_Sample.csv")
ltdb2012 <- read_csv("LTDB_Std_200812_Sample.csv")
ltdb2000 <- read_csv("LTDB_Std_2000_Sample.csv")
lt00 <- subset(ltdb2000, county == "Bexar County" | county == "Travis County" | county == "Dallas County")
lt12 <- subset(ltdb2012, countya == "Bexar County" | countya == "Travis County" | countya == "Dallas County")
lt19 <- subset(ltdb2019, countya == "Bexar County" | countya == "Travis County" | countya == "Dallas County")
lt12 <- lt12 %>%
mutate(
county= ifelse(countya=="Bexar County","Bexar County",
ifelse(countya=="Travis County","Travis County",
ifelse(countya == "Dallas County", "Dallas County",NA)))
)
lt19 <- lt19 %>%
mutate(
county= ifelse(countya=="Bexar County","Bexar County",
ifelse(countya=="Travis County","Travis County",
ifelse(countya == "Dallas County", "Dallas County",NA)))
)
## eigenvalue percentage of variance
## comp 1 7.59245979 58.4035368
## comp 2 2.91457731 22.4198255
## comp 3 0.90673706 6.9749005
## comp 4 0.40774097 3.1364690
## comp 5 0.36044454 2.7726503
## comp 6 0.24653547 1.8964267
## comp 7 0.14608504 1.1237311
## comp 8 0.13208617 1.0160475
## comp 9 0.09117591 0.7013532
## comp 10 0.06950403 0.5346464
## comp 11 0.05167858 0.3975275
## comp 12 0.04915105 0.3780850
## comp 13 0.03182408 0.2448006
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## y00_04 0.4106739 0.7372815 0.16398107 0.369676866 0.32208157
## y05_09 0.4455453 0.8432740 0.05572513 0.008448677 0.02104554
## y10_14 0.5111071 0.8045792 -0.04936393 -0.120280591 -0.11118086
## y15_19 0.4225442 0.8175104 -0.01948292 -0.240857616 -0.17704819
## pnhw00 0.7881359 -0.1923683 -0.42967258 0.029795754 0.12642731
## pnhw12 0.8913689 -0.1842160 -0.22662971 -0.173393419 0.22041301
## That looks promising. Down to just two dimensions accounting for 80%
of the variance. I just realized I forgot to include population.
### I’m going to press forward to see what the LPA looks like then work
back and include ln(pop)
pca_orig_dim <- as.data.frame(pca_orig$ind$coord[,1:2])
pca_orig_dim <- cbind(bpsw_comb,pca_orig_dim)
pca_orig_dim <- pca_orig_dim %>%
mutate(
dim1 = Dim.1,
dim2 = Dim.2
)
#comparing models
## comparing models
pca_orig_dim <- as.data.frame(pca_orig_dim)
pca_orig_dim%>%
dplyr::select(dim1,dim2) %>%
single_imputation() %>%
estimate_profiles(1:2,
variances = c("equal", "varying"),
covariances = c("zero", "varying")
) %>%
compare_solutions(statistics = c("AIC", "BIC"))
## Compare tidyLPA solutions:
##
## Model Classes AIC BIC
## 1 1 10087.757 10107.944
## 1 2 9922.026 9957.352
## 6 1 10089.757 10114.990
## 6 2 8936.281 8991.794
##
## Best model according to AIC is Model 6 with 2 classes.
## Best model according to BIC is Model 6 with 2 classes.
##
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 6 with 2 classes.
y1 <- pca_orig_dim %>%
dplyr::select(dim1,dim2) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(2,
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y2<- pca_orig_dim %>%
dplyr::select(dim1,dim2) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(2,package = "MplusAutomation",
variances = "varying",
covariances = "varying") %>%
plot_profiles()