So, with this version, I picked up from where my last update ended, and did the LPA analysis with everything I had last time, and added population from census tracts, and later the natural log of population. The model selection recommended 4 classes, which doesn’t completely make sense to me, so I ran the analysis with both 3 classes and 4 classes.
Right now, I’m not getting very much separation of classes from the analysis. hopefully tomorrow we can discuss ways I might be able to get more distinction of classes, and what the 4 classes might represent.
Also, I intentionally left the code displayed vice writing out the differences as I think the brevity is a little easier to understand.
## eigenvalue percentage of variance
## comp 1 4.81506649 30.0941656
## comp 2 3.00051718 18.7532324
## comp 3 2.23288942 13.9555589
## comp 4 1.71631177 10.7269486
## comp 5 0.99122966 6.1951854
## comp 6 0.82148934 5.1343084
## comp 7 0.51880250 3.2425156
## comp 8 0.50620590 3.1637869
## comp 9 0.37114965 2.3196853
## comp 10 0.25769565 1.6105978
## comp 11 0.23594615 1.4746635
## comp 12 0.14859537 0.9287210
## comp 13 0.11836782 0.7397989
## comp 14 0.11274452 0.7046532
## comp 15 0.08217834 0.5136146
## comp 16 0.07081024 0.4425640
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## y00_04 0.5082115 0.6794351 -0.09453642 0.01066100 -0.013505930
## y05_09 0.5530741 0.7721438 -0.09976778 -0.08208166 -0.013288453
## y10_14 0.5973760 0.7307166 -0.07993783 -0.08829118 -0.004722731
## y15_19 0.5243002 0.7488211 -0.07217826 -0.09042962 0.007533299
## pnhw00 0.7191840 -0.3749509 0.01603900 -0.08124349 0.010183651
## pnhw12 0.7777672 -0.4194049 0.03417118 -0.04690623 -0.012993279
## Compare tidyLPA solutions:
##
## Model Classes AIC BIC Warnings
## 1 1 19038.51 19079.48
## 1 2 19049.29 19115.86 Warning
## 1 3 18358.81 18451.00
## 1 4 18335.94 18453.73
## 6 1 19050.51 19122.21
## 6 2 17292.71 17441.23
## 6 3 16775.44 17000.77
## 6 4 16593.65 16895.80
##
## Best model according to AIC is Model 6 with 4 classes.
## Best model according to BIC is Model 6 with 4 classes.
##
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 6 with 4 classes.
y1 <- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(3,
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y2<- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(3,package = "MplusAutomation",
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y3 <- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(4,
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y4<- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(4,package = "MplusAutomation",
variances = "varying",
covariances = "varying") %>%
plot_profiles()
#summary(dat_orig)
#bpsw[is.na(bpsw)[,4:22],] = 0
dat_spl <- dat_orig[,4:19]
dat_spl[is.na(dat_spl)] = 0
dat_comb <- cbind(dat_orig[,1:3],dat_spl)
dat_origln <- dat_comb %>%
mutate(
pop00ln = log(pop00 + 0.0001),
pop12ln = log(pop12 + 0.0001),
pop19ln = log(pop19 + 0.0001)
)
#dat_origln <- dat_origln[!is.infinite(rowSums(dat_origln)),]
#### troubleshooting
#summary(dat_comb)
#summary(dat_origln$pop00ln)
#print(dat_origln)
pca_origln <- PCA(dat_origln[,c("y00_04","y05_09","y10_14","y15_19", "pnhw00", "pnhw12", "pnhw19", "pedu00","pedu12","pedu19","mhhinc00","mhhinc12","mhhinc19", "pop00ln","pop12ln","pop19ln")],
scale.unit = T)
#summary(pca)
#eigenvalues
eigenval_origln <- pca_origln$eig
eigvar_origln <- as.data.frame(eigenval_origln[,1:2])
print(eigvar_origln)
## eigenvalue percentage of variance
## comp 1 4.81246383 30.0778989
## comp 2 2.97418799 18.5886749
## comp 3 2.29323024 14.3326890
## comp 4 1.65770238 10.3606399
## comp 5 0.90452136 5.6532585
## comp 6 0.78705599 4.9191000
## comp 7 0.54836202 3.4272626
## comp 8 0.50120577 3.1325361
## comp 9 0.38283320 2.3927075
## comp 10 0.30926878 1.9329299
## comp 11 0.23157371 1.4473357
## comp 12 0.16540408 1.0337755
## comp 13 0.15311328 0.9569580
## comp 14 0.11462541 0.7164088
## comp 15 0.08396484 0.5247803
## comp 16 0.08048711 0.5030445
#Scree plot
fviz_screeplot(pca_origln,ncp = 10)
# examining first four dimensions
var_orig <- get_pca_var(pca_origln)
#head(var_origln$coord)
corrplot::corrplot(var_orig$coord)
#stripping out the top four dimensions and adding them to dat_orig
pca_orig_dim <- as.data.frame(pca_orig$ind$coord[,1:4])
pca_orig_dim <- cbind(dat_orig,pca_orig_dim)
pca_orig_dim <- pca_orig_dim %>%
mutate(
dim1 = Dim.1,
dim2 = Dim.2,
dim3 = Dim.3,
dim4 = Dim.4
)
#comparing models
## comparing models
pca_orig_dim <- as.data.frame(pca_orig_dim)
pca_orig_dim%>%
dplyr::select(dim1,dim2,dim3, dim4) %>%
single_imputation() %>%
estimate_profiles(1:4,
variances = c("equal", "varying"),
covariances = c("zero", "varying")
) %>%
compare_solutions(statistics = c("AIC", "BIC"))
## Warning:
## One or more analyses resulted in warnings! Examine these analyses carefully: model_1_class_2
## Warning:
## One or more analyses resulted in warnings! Examine these analyses carefully: model_1_class_2
## Compare tidyLPA solutions:
##
## Model Classes AIC BIC Warnings
## 1 1 19038.51 19079.48
## 1 2 19049.29 19115.86 Warning
## 1 3 18358.81 18451.00
## 1 4 18335.94 18453.73
## 6 1 19050.51 19122.21
## 6 2 17292.71 17441.23
## 6 3 16775.44 17000.77
## 6 4 16593.65 16895.80
##
## Best model according to AIC is Model 6 with 4 classes.
## Best model according to BIC is Model 6 with 4 classes.
##
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 6 with 4 classes.
y5 <- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(3,
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y6<- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(3,package = "MplusAutomation",
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y7 <- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(4,
variances = "varying",
covariances = "varying") %>%
plot_profiles()
y8<- pca_orig_dim %>%
dplyr::select(dim1,dim2,dim3,dim4) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(4,package = "MplusAutomation",
variances = "varying",
covariances = "varying") %>%
plot_profiles()