Main analysis
We carry out a number of analyses using various subgroups.
The sexes together.
#S factor
s.data = subset(names.df, select=c("no.job","own.place","married",
"conviction", "income",
"age"))
#missing data
if (F){ #run manually when needed
matrixplot2(s.data)
matrixplot(s.data)
miss.table(s.data)
miss.hist(s.data)
GG_save("figures/missing_histogram.png")
}
#impute data
s.data = s.data[miss_by_case(s.data) <= 2, ] #subset to cases with 0 or 1 NA
s.data = miss_impute(s.data, noise = F) #impute data
#residualize
s.data.r = s.data #copy
s.data.r$age2 = s.data$age^2
s.data.r$age3 = s.data$age^3
s.data.r = df_residualize(s.data.r, resid.var = c("age","age2","age3")) #remove effect of age on indicators
s.data.r$age2 = NULL #remove sq and cube
s.data.r$age3 = NULL
#correlations
cors.B = cor(s.data); round(cors.B,2)
## no.job own.place married conviction income age
## no.job 1.00 -0.50 0.18 0.42 -0.21 -0.28
## own.place -0.50 1.00 0.43 -0.44 0.73 0.58
## married 0.18 0.43 1.00 -0.13 0.45 0.57
## conviction 0.42 -0.44 -0.13 1.00 -0.12 -0.26
## income -0.21 0.73 0.45 -0.12 1.00 0.48
## age -0.28 0.58 0.57 -0.26 0.48 1.00
cors2.B = cor(s.data.r); round(cors2.B,2)
## no.job own.place married conviction income age
## no.job 1.00 -0.43 0.43 0.38 -0.09 0
## own.place -0.43 1.00 0.15 -0.37 0.63 0
## married 0.43 0.15 1.00 0.02 0.24 0
## conviction 0.38 -0.37 0.02 1.00 0.01 0
## income -0.09 0.63 0.24 0.01 1.00 0
## age 0.00 0.00 0.00 0.00 0.00 1
#write
cors3.B = combine_upperlower(cors.B, cors2.B)
write_csv(cors3.B %>% as.data.frame, "results/S_both_cors.csv", na = "")
#remove age
s.data$age = NULL
s.data.r$age = NULL
#factor analyze
s.fa.B = fa(s.data);s.fa.B #without removing effect of age
## Factor Analysis using method = minres
## Call: fa(r = s.data)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## no.job -0.50 0.25 0.755 1
## own.place 1.00 1.00 0.005 1
## married 0.43 0.19 0.813 1
## conviction -0.44 0.19 0.809 1
## income 0.73 0.54 0.463 1
##
## MR1
## SS loadings 2.16
## Proportion Var 0.43
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 2 with Chi Square of 3810
## The degrees of freedom for the model are 5 and the objective function was 0.54
##
## The root mean square of the residuals (RMSR) is 0.17
## The df corrected root mean square of the residuals is 0.24
##
## The harmonic number of observations is 1903 with the empirical chi square 1083 with prob < 6.6e-232
## The total number of observations was 1903 with Likelihood Chi Square = 1029 with prob < 3.3e-220
##
## Tucker Lewis Index of factoring reliability = 0.46
## RMSEA index = 0.33 and the 90 % confidence intervals are 0.31 0.34
## BIC = 991
## Fit based upon off diagonal values = 0.83
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
s.fa2.B = fa(s.data.r);s.fa2.B #removing effect of age
## Factor Analysis using method = minres
## Call: fa(r = s.data.r)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## no.job -0.43 0.184 0.816 1
## own.place 1.00 0.995 0.005 1
## married 0.15 0.021 0.979 1
## conviction -0.37 0.134 0.866 1
## income 0.63 0.402 0.598 1
##
## MR1
## SS loadings 1.74
## Proportion Var 0.35
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 1.5 with Chi Square of 2819
## The degrees of freedom for the model are 5 and the objective function was 0.6
##
## The root mean square of the residuals (RMSR) is 0.2
## The df corrected root mean square of the residuals is 0.29
##
## The harmonic number of observations is 1903 with the empirical chi square 1562 with prob < 0
## The total number of observations was 1903 with Likelihood Chi Square = 1147 with prob < 7.5e-246
##
## Tucker Lewis Index of factoring reliability = 0.19
## RMSEA index = 0.35 and the 90 % confidence intervals are 0.33 0.35
## BIC = 1110
## Fit based upon off diagonal values = 0.64
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
#get scores
scores = data.frame(matrix(nrow=nrow(s.data), ncol=0))
scores$s.scores = s.fa.B$scores
scores$s.scores2 = s.fa2.B$scores
colnames(scores) = c("S.both", "S.both.no.age")
rownames(scores) = rownames(s.fa.B$scores)
dimnames(scores$S) = NULL
dimnames(scores$S.no.age) = NULL
wtd.cors(scores)
## S.both S.both.no.age
## S.both 1.00 0.81
## S.both.no.age 0.81 1.00
#insert into main df
names.df = merge_datasets(names.df, scores)
Men only.
#S factor
s.data = subset(names.df, select=c("no.job","own.place","married",
"conviction", "income",
"age"))
s.data = subset(s.data, names.df$gender=="Male")
#impute data
s.data = s.data[miss_by_case(s.data)<=2,] #subset to cases with 0 or 1 NA
s.data = miss_impute(s.data, noise = F) #impute data
#residualize
s.data.r = s.data #copy
s.data.r$age2 = s.data$age^2
s.data.r$age3 = s.data$age^3
s.data.r = df_residualize(s.data.r, resid.var = c("age","age2","age3")) #remove effect of age on indicators
s.data.r$age2 = NULL #remove sq and cube
s.data.r$age3 = NULL
#correlations
cors.M = cor(s.data);round(cors.M,2)
## no.job own.place married conviction income age
## no.job 1.00 -0.53 0.03 0.63 -0.29 -0.21
## own.place -0.53 1.00 0.54 -0.59 0.78 0.63
## married 0.03 0.54 1.00 -0.29 0.46 0.71
## conviction 0.63 -0.59 -0.29 1.00 -0.40 -0.35
## income -0.29 0.78 0.46 -0.40 1.00 0.60
## age -0.21 0.63 0.71 -0.35 0.60 1.00
cors2.M = cor(s.data.r);round(cors2.M,2)
## no.job own.place married conviction income age
## no.job 1.00 -0.52 0.26 0.61 -0.22 0
## own.place -0.52 1.00 0.18 -0.51 0.65 0
## married 0.26 0.18 1.00 -0.06 0.06 0
## conviction 0.61 -0.51 -0.06 1.00 -0.26 0
## income -0.22 0.65 0.06 -0.26 1.00 0
## age 0.00 0.00 0.00 0.00 0.00 1
#write
cors3.M = combine_upperlower(cors.M, cors2.M)
write_csv(cors3.M %>% as.data.frame, "results/S_men_cors.csv", na = "")
#remove age
s.data$age = NULL
s.data.r$age = NULL
#factor analyze
s.fa.M = fa(s.data);s.fa.M #without removing effect of age
## Factor Analysis using method = minres
## Call: fa(r = s.data)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## no.job -0.53 0.28 0.721 1
## own.place 1.00 1.00 0.005 1
## married 0.55 0.30 0.703 1
## conviction -0.60 0.36 0.645 1
## income 0.78 0.61 0.390 1
##
## MR1
## SS loadings 2.54
## Proportion Var 0.51
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 2.6 with Chi Square of 2273
## The degrees of freedom for the model are 5 and the objective function was 0.55
##
## The root mean square of the residuals (RMSR) is 0.15
## The df corrected root mean square of the residuals is 0.21
##
## The harmonic number of observations is 877 with the empirical chi square 390 with prob < 4.3e-82
## The total number of observations was 877 with Likelihood Chi Square = 484 with prob < 2.2e-102
##
## Tucker Lewis Index of factoring reliability = 0.58
## RMSEA index = 0.33 and the 90 % confidence intervals are 0.3 0.34
## BIC = 450
## Fit based upon off diagonal values = 0.91
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
s.fa2.M = fa(s.data.r);s.fa2.M #without removing effect of age
## Factor Analysis using method = minres
## Call: fa(r = s.data.r)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## no.job -0.59 0.347 0.65 1
## own.place 0.91 0.834 0.17 1
## married 0.13 0.016 0.98 1
## conviction -0.59 0.354 0.65 1
## income 0.65 0.418 0.58 1
##
## MR1
## SS loadings 1.97
## Proportion Var 0.39
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 1.8 with Chi Square of 1525
## The degrees of freedom for the model are 5 and the objective function was 0.57
##
## The root mean square of the residuals (RMSR) is 0.15
## The df corrected root mean square of the residuals is 0.21
##
## The harmonic number of observations is 877 with the empirical chi square 405 with prob < 3e-85
## The total number of observations was 877 with Likelihood Chi Square = 501 with prob < 4.6e-106
##
## Tucker Lewis Index of factoring reliability = 0.34
## RMSEA index = 0.34 and the 90 % confidence intervals are 0.31 0.35
## BIC = 467
## Fit based upon off diagonal values = 0.85
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.93
## Multiple R square of scores with factors 0.87
## Minimum correlation of possible factor scores 0.74
#get scores
scores = data.frame(matrix(nrow=nrow(s.data), ncol=0))
scores$s.scores = s.fa.M$scores
scores$s.scores2 = s.fa2.M$scores
colnames(scores) = c("S.men", "S.men.no.age")
rownames(scores) = rownames(s.fa.M$scores)
dimnames(scores$S) = NULL
dimnames(scores$S.no.age) = NULL
wtd.cors(scores)
## S.men S.men.no.age
## S.men 1.00 0.76
## S.men.no.age 0.76 1.00
#insert into main df
names.df = merge_datasets(names.df, scores)
Women only.
#S factor
s.data = subset(names.df, select=c("no.job","own.place","married",
"conviction", "income",
"age"))
s.data = subset(s.data, names.df$gender=="Female")
#impute data
s.data = s.data[miss_by_case(s.data)<=2,] #subset to cases with 0 or 1 NA
s.data = miss_impute(s.data, noise = F) #impute data
#residualize
s.data.r = s.data #copy
s.data.r$age2 = s.data$age^2
s.data.r$age3 = s.data$age^3
s.data.r = df_residualize(s.data.r, resid.var = c("age","age2","age3")) #remove effect of age on indicators
s.data.r$age2 = NULL #remove sq and cube
s.data.r$age3 = NULL
#correlations
cors.F = cor(s.data);round(cors.F,2)
## no.job own.place married conviction income age
## no.job 1.00 -0.47 0.34 0.42 -0.12 -0.33
## own.place -0.47 1.00 0.27 -0.41 0.74 0.56
## married 0.34 0.27 1.00 -0.06 0.39 0.46
## conviction 0.42 -0.41 -0.06 1.00 -0.18 -0.28
## income -0.12 0.74 0.39 -0.18 1.00 0.45
## age -0.33 0.56 0.46 -0.28 0.45 1.00
cors2.F = cor(s.data.r);round(cors2.F,2)
## no.job own.place married conviction income age
## no.job 1.00 -0.37 0.59 0.37 0.03 0
## own.place -0.37 1.00 0.02 -0.32 0.66 0
## married 0.59 0.02 1.00 0.08 0.24 0
## conviction 0.37 -0.32 0.08 1.00 -0.07 0
## income 0.03 0.66 0.24 -0.07 1.00 0
## age 0.00 0.00 0.00 0.00 0.00 1
#write
cors3.F = combine_upperlower(cors.F, cors2.F)
write_csv(cors3.F %>% as.data.frame, "results/S_women_cors.csv", na = "")
#remove age
s.data$age = NULL
s.data.r$age = NULL
#factor analyze
s.fa.F = fa(s.data);s.fa.F #without removing effect of age
## Factor Analysis using method = minres
## Call: fa(r = s.data)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## no.job -0.47 0.221 0.779 1
## own.place 1.00 0.995 0.005 1
## married 0.27 0.072 0.928 1
## conviction -0.41 0.169 0.831 1
## income 0.74 0.544 0.456 1
##
## MR1
## SS loadings 2.0
## Proportion Var 0.4
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 1.9 with Chi Square of 1979
## The degrees of freedom for the model are 5 and the objective function was 0.65
##
## The root mean square of the residuals (RMSR) is 0.19
## The df corrected root mean square of the residuals is 0.28
##
## The harmonic number of observations is 1026 with the empirical chi square 777 with prob < 1.3e-165
## The total number of observations was 1026 with Likelihood Chi Square = 660 with prob < 1.8e-140
##
## Tucker Lewis Index of factoring reliability = 0.33
## RMSEA index = 0.36 and the 90 % confidence intervals are 0.33 0.37
## BIC = 626
## Fit based upon off diagonal values = 0.75
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
s.fa2.F = fa(s.data.r);s.fa2.F #without removing effect of age
## Factor Analysis using method = minres
## Call: fa(r = s.data.r)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## no.job -0.37 0.13360 0.866 1
## own.place 1.00 0.99501 0.005 1
## married 0.02 0.00024 1.000 1
## conviction -0.32 0.10281 0.897 1
## income 0.66 0.43225 0.568 1
##
## MR1
## SS loadings 1.66
## Proportion Var 0.33
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 1.6 with Chi Square of 1649
## The degrees of freedom for the model are 5 and the objective function was 0.8
##
## The root mean square of the residuals (RMSR) is 0.24
## The df corrected root mean square of the residuals is 0.34
##
## The harmonic number of observations is 1026 with the empirical chi square 1159 with prob < 2.1e-248
## The total number of observations was 1026 with Likelihood Chi Square = 816 with prob < 4.5e-174
##
## Tucker Lewis Index of factoring reliability = 0.01
## RMSEA index = 0.4 and the 90 % confidence intervals are 0.37 0.41
## BIC = 781
## Fit based upon off diagonal values = 0.53
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
#get scores
scores = data.frame(matrix(nrow=nrow(s.data), ncol=0))
scores$s.scores = s.fa.F$scores
scores$s.scores2 = s.fa2.F$scores
colnames(scores) = c("S.women", "S.women.no.age")
rownames(scores) = rownames(s.fa.F$scores)
dimnames(scores$S) = NULL
dimnames(scores$S.no.age) = NULL
wtd.cors(scores)
## S.women S.women.no.age
## S.women 1.00 0.83
## S.women.no.age 0.83 1.00
#insert into main df
names.df = merge_datasets(names.df, scores)
Both together in age bins.
## S FACTOR WITHIN AGE BINS, BOTH GENDERS
#fetch data
s.data = subset(names.df, select=c("no.job","own.place","married",
"conviction", "income",
"age"))
#impute data
s.data = s.data[miss_by_case(s.data)<=2,] #subset to cases with 0 or 1 NA
s.data = miss_impute(s.data, noise = F) #impute data
#the databins
s.data.bins = list()
starts = seq(20, 50, 5)
for (idx in seq_along(starts)) {
element.name = str_c("age.", starts[idx], ".", (starts[idx]+5))
print(element.name)
s.data.bins[[element.name]] = s.data[s.data$age >= starts[idx] & s.data$age <= (starts[idx]+5), -6]
}
## [1] "age.20.25"
## [1] "age.25.30"
## [1] "age.30.35"
## [1] "age.35.40"
## [1] "age.40.45"
## [1] "age.45.50"
## [1] "age.50.55"
#fa objects
fa.bins = llply(.data = s.data.bins, .fun = function(x) {
print(cor(x, use="pair"))
print(table(miss_by_case(x)))
fa(x)
})
## no.job own.place married conviction income
## no.job 1.00 -0.51 0.4575 0.244 -0.3066
## own.place -0.51 1.00 -0.1504 -0.208 0.6476
## married 0.46 -0.15 1.0000 -0.067 -0.0049
## conviction 0.24 -0.21 -0.0670 1.000 -0.0926
## income -0.31 0.65 -0.0049 -0.093 1.0000
##
## 0
## 201
## no.job own.place married conviction income
## no.job 1.00 -0.362 0.52955 0.349 -0.23665
## own.place -0.36 1.000 -0.06677 -0.341 0.77357
## married 0.53 -0.067 1.00000 0.028 0.00023
## conviction 0.35 -0.341 0.02815 1.000 -0.11085
## income -0.24 0.774 0.00023 -0.111 1.00000
##
## 0
## 254
## no.job own.place married conviction income
## no.job 1.00 -0.377 0.578 0.268 -0.377
## own.place -0.38 1.000 -0.078 -0.326 0.755
## married 0.58 -0.078 1.000 -0.063 -0.222
## conviction 0.27 -0.326 -0.063 1.000 -0.095
## income -0.38 0.755 -0.222 -0.095 1.000
##
## 0
## 271
## no.job own.place married conviction income
## no.job 1.00 -0.49 0.570 0.327 -0.42
## own.place -0.49 1.00 -0.189 -0.449 0.75
## married 0.57 -0.19 1.000 0.059 -0.25
## conviction 0.33 -0.45 0.059 1.000 -0.17
## income -0.42 0.75 -0.254 -0.166 1.00
##
## 0
## 253
## no.job own.place married conviction income
## no.job 1.00 -0.60 0.486 0.281 -0.430
## own.place -0.60 1.00 -0.185 -0.397 0.746
## married 0.49 -0.19 1.000 0.042 -0.090
## conviction 0.28 -0.40 0.042 1.000 -0.092
## income -0.43 0.75 -0.090 -0.092 1.000
##
## 0
## 194
## no.job own.place married conviction income
## no.job 1.00 -0.40 0.161 0.367 -0.270
## own.place -0.40 1.00 0.211 -0.256 0.671
## married 0.16 0.21 1.000 -0.042 0.203
## conviction 0.37 -0.26 -0.042 1.000 0.071
## income -0.27 0.67 0.203 0.071 1.000
##
## 0
## 161
## no.job own.place married conviction income
## no.job 1.000 -0.321 -0.081 0.238 -0.096
## own.place -0.321 1.000 0.718 -0.014 0.653
## married -0.081 0.718 1.000 0.134 0.525
## conviction 0.238 -0.014 0.134 1.000 0.240
## income -0.096 0.653 0.525 0.240 1.000
##
## 0
## 139
#plot
fa_plot_loadings(fa.bins, fa_labels = names(fa.bins))
GG_save("figures/S_agebin_both.png")
#factor sizes
fa.sizes.B = ldply(.data = s.data.bins, .fun = function(x) {
mean(fa(x)$communality)
})
# males only --------------------------------------------------------------
#fetch data
s.data = subset(names.df, select=c("no.job","own.place","married",
"conviction", "income",
"age"))
s.data = s.data[names.df$gender == "Male",] #males only
#impute data
s.data = s.data[miss_by_case(s.data) <= 2,] #subset to cases with 0 or 1 NA
s.data = miss_impute(s.data, noise = F) #impute data
#the databins
s.data.bins = list()
starts = seq(20, 50, 5)
for (idx in seq_along(starts)) {
element.name = str_c("age.", starts[idx], ".", (starts[idx]+5))
print(element.name)
s.data.bins[[element.name]] = s.data[s.data$age >= starts[idx] & s.data$age <= (starts[idx]+5), -6]
}
## [1] "age.20.25"
## [1] "age.25.30"
## [1] "age.30.35"
## [1] "age.35.40"
## [1] "age.40.45"
## [1] "age.45.50"
## [1] "age.50.55"
#fa objects
fa.bins = llply(.data = s.data.bins, .fun = function(x) {
print(cor(x, use="pair"))
print(table(miss_by_case(x)))
fa(x)
})
## no.job own.place married conviction income
## no.job 1.00 -0.517 0.404 0.39 -0.396
## own.place -0.52 1.000 0.057 -0.35 0.701
## married 0.40 0.057 1.000 -0.08 0.068
## conviction 0.39 -0.346 -0.080 1.00 -0.554
## income -0.40 0.701 0.068 -0.55 1.000
##
## 0
## 76
## no.job own.place married conviction income
## no.job 1.00 -0.38 0.512 0.578 -0.28
## own.place -0.38 1.00 0.107 -0.348 0.86
## married 0.51 0.11 1.000 0.097 0.12
## conviction 0.58 -0.35 0.097 1.000 -0.33
## income -0.28 0.86 0.121 -0.334 1.00
##
## 0
## 120
## no.job own.place married conviction income
## no.job 1.00 -0.41 0.501 0.59 -0.507
## own.place -0.41 1.00 0.108 -0.37 0.883
## married 0.50 0.11 1.000 0.13 -0.032
## conviction 0.59 -0.37 0.129 1.00 -0.452
## income -0.51 0.88 -0.032 -0.45 1.000
##
## 0
## 126
## no.job own.place married conviction income
## no.job 1.00 -0.469 0.503 0.446 -0.49
## own.place -0.47 1.000 -0.099 -0.471 0.82
## married 0.50 -0.099 1.000 0.077 -0.24
## conviction 0.45 -0.471 0.077 1.000 -0.39
## income -0.49 0.818 -0.239 -0.389 1.00
##
## 0
## 131
## no.job own.place married conviction income
## no.job 1.00 -0.604 0.236 0.550 -0.53
## own.place -0.60 1.000 -0.072 -0.573 0.87
## married 0.24 -0.072 1.000 0.022 -0.11
## conviction 0.55 -0.573 0.022 1.000 -0.46
## income -0.53 0.873 -0.115 -0.461 1.00
##
## 0
## 93
## no.job own.place married conviction income
## no.job 1.00 -0.37 -0.12 0.68 -0.29
## own.place -0.37 1.00 0.28 -0.47 0.78
## married -0.12 0.28 1.00 -0.30 0.11
## conviction 0.68 -0.47 -0.30 1.00 -0.39
## income -0.29 0.78 0.11 -0.39 1.00
##
## 0
## 73
## no.job own.place married conviction income
## no.job 1.00 -0.44 -0.29 0.43 -0.17
## own.place -0.44 1.00 0.69 -0.16 0.65
## married -0.29 0.69 1.00 -0.13 0.32
## conviction 0.43 -0.16 -0.13 1.00 -0.07
## income -0.17 0.65 0.32 -0.07 1.00
##
## 0
## 61
#plot
fa_plot_loadings(fa.bins, fa_labels = names(fa.bins))
GG_save("figures/S_agebin_males.png")
#factor sizes
fa.sizes.M = ldply(.data = s.data.bins, .fun = function(x) {
mean(fa(x)$communality)
})
# females only ------------------------------------------------------------
#fetch data
s.data = subset(names.df, select=c("no.job","own.place","married",
"conviction", "income",
"age"))
s.data = s.data[names.df$gender == "Female",] #males only
#impute data
s.data = s.data[miss_by_case(s.data) <= 2,] #subset to cases with 0 or 1 NA
s.data = miss_impute(s.data, noise = F) #impute data
#the databins
s.data.bins = list()
starts = seq(20, 50, 5)
for (idx in seq_along(starts)) {
element.name = str_c("age.", starts[idx], ".", (starts[idx]+5))
print(element.name)
s.data.bins[[element.name]] = s.data[s.data$age >= starts[idx] & s.data$age <= (starts[idx]+5), -6]
}
## [1] "age.20.25"
## [1] "age.25.30"
## [1] "age.30.35"
## [1] "age.35.40"
## [1] "age.40.45"
## [1] "age.45.50"
## [1] "age.50.55"
#factor analyses for plotting
fa.bins = llply(.data = s.data.bins, .fun = function(x) {
print(cor(x, use="pair"))
print(table(miss_by_case(x)))
fa(x)
})
## no.job own.place married conviction income
## no.job 1.00 -0.50 0.508 0.372 -0.224
## own.place -0.50 1.00 -0.306 -0.350 0.661
## married 0.51 -0.31 1.000 -0.073 -0.051
## conviction 0.37 -0.35 -0.073 1.000 -0.358
## income -0.22 0.66 -0.051 -0.358 1.000
##
## 0
## 125
## no.job own.place married conviction income
## no.job 1.00 -0.35 0.555 0.328 -0.16
## own.place -0.35 1.00 -0.357 -0.391 0.77
## married 0.55 -0.36 1.000 0.092 -0.17
## conviction 0.33 -0.39 0.092 1.000 -0.29
## income -0.16 0.77 -0.171 -0.285 1.00
##
## 0
## 134
## no.job own.place married conviction income
## no.job 1.00 -0.38 0.6497 0.2177 -0.23
## own.place -0.38 1.00 -0.3843 -0.2650 0.76
## married 0.65 -0.38 1.0000 0.0067 -0.37
## conviction 0.22 -0.27 0.0067 1.0000 -0.18
## income -0.23 0.76 -0.3696 -0.1773 1.00
##
## 0
## 145
## no.job own.place married conviction income
## no.job 1.00 -0.51 0.67 0.28 -0.38
## own.place -0.51 1.00 -0.34 -0.39 0.83
## married 0.67 -0.34 1.00 0.14 -0.30
## conviction 0.28 -0.39 0.14 1.00 -0.29
## income -0.38 0.83 -0.30 -0.29 1.00
##
## 0
## 122
## no.job own.place married conviction income
## no.job 1.00 -0.64 0.690 0.186 -0.39
## own.place -0.64 1.00 -0.327 -0.191 0.70
## married 0.69 -0.33 1.000 0.037 -0.10
## conviction 0.19 -0.19 0.037 1.000 -0.10
## income -0.39 0.70 -0.100 -0.104 1.00
##
## 0
## 101
## no.job own.place married conviction income
## no.job 1.00 -0.43 0.397 0.336 -0.31
## own.place -0.43 1.00 0.128 -0.214 0.67
## married 0.40 0.13 1.000 0.047 0.19
## conviction 0.34 -0.21 0.047 1.000 -0.23
## income -0.31 0.67 0.192 -0.230 1.00
##
## 0
## 88
## no.job own.place married conviction income
## no.job 1.0000 -0.20 0.132 0.245 -0.0082
## own.place -0.2023 1.00 0.729 -0.137 0.7070
## married 0.1316 0.73 1.000 0.046 0.6301
## conviction 0.2447 -0.14 0.046 1.000 -0.2142
## income -0.0082 0.71 0.630 -0.214 1.0000
##
## 0
## 78
#plot
fa_plot_loadings(fa.bins, fa_labels = names(fa.bins), reverse = c(1, 1, 1, 1, -1, 1, 1))
GG_save("figures/S_agebin_females.png")
#factor sizes
fa.sizes.F = ldply(.data = s.data.bins, .fun = function(x) {
mean(fa(x)$communality)
})
# plot factor size and age bin --------------------------------------------
fa.sizes = rbind(fa.sizes.B,
fa.sizes.M,
fa.sizes.F)
fa.sizes$group = c(rep("Both", nrow(fa.sizes.B)),
rep("Male", nrow(fa.sizes.M)),
rep("Female", nrow(fa.sizes.F)))
#plot
ggplot(data = fa.sizes, aes(x = .id, y = V1, color = group, group = group)) +
geom_line(stat="identity", position = "dodge" ) +
xlab("Age bin") + ylab("Factor % of variance")
## Warning: Width not defined. Set with `position_dodge(width = ?)`
GG_save("figures/S_ageBins_factor_sizes.png")
## Warning: Width not defined. Set with `position_dodge(width = ?)`
#loadings plot
fa.list = list(s.fa.B, s.fa.M, s.fa.F, s.fa2.B, s.fa2.M, s.fa2.F)
fa.names = c("S.Both", "S.Male", "S.Female","S.BothNA", "S.MaleNA", "S.FemaleNA")
fa_plot_loadings(fa.list, fa.names)
GG_save("figures/loadings.png")