Data Preperation Step I (segmentation)

Author

Julius Fenn

Notes

Remark: “dat” at first is the data set of N=150 americans who have the political affilication “Democrat”; in opposition to “dat_Republican”

## define global variables: 
runMplus = FALSE

prepare data

load raw data

# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))

########################################
# load packages
########################################
require(pacman)
p_load('tidyverse', 'jsonlite', 'magrittr', 'xlsx',
       'stargazer', 'psych', 'jtools', 'DT', 'ggstatsplot', 
       'lavaan', 
       'regsem', 'MplusAutomation')

########################################
# load data
########################################
##### JATOS file
setwd("data")
# dir()
suppressMessages(read_file('jatos_results_data_20231127130832.txt') %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
  # ... filter empty rows ...
  discard(function(x) x == '') %>%
  # ... parse JSON into a data.frame
  map_dfr(fromJSON, flatten=TRUE)) -> dat


## Republicans
suppressMessages(read_file('jatos_results_data_20231127130931_Republicans.txt') %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
  # ... filter empty rows ...
  discard(function(x) x == '') %>%
  # ... parse JSON into a data.frame
  map_dfr(fromJSON, flatten=TRUE)) -> dat_Republican

  ## Germany
suppressMessages(read_file('jatos_results_data_20231127131018_Germany.txt') %>%
  # ... split it into lines ...
  str_split('\n') %>% first() %>%
  # ... filter empty rows ...
  discard(function(x) x == '') %>%
  # ... parse JSON into a data.frame
  map_dfr(fromJSON, flatten=TRUE)) -> dat_Germany


##### prolific file
#> socio-demographic data
prolific <- read.csv(file = "prolific_export_6560bd631f35feab8a3d13c8.csv", header = TRUE)
prolific$U.s..political.affiliation <- NULL
prolific_Republican <- read.csv(file = "prolific_export_65620efdd4432eb81c6a1206_Republicans.csv", header = TRUE)
prolific_Republican$U.s..political.affiliation <- NULL
prolific_Germany <- read.csv(file = "prolific_export_656235b32d4f1e21cfc7e892_Germany.csv", header = TRUE)

prolific <- rbind(prolific, prolific_Republican, prolific_Germany)

setwd("..")

########################################
# load functions
########################################
setwd("../../functions")
for(i in 1:length(dir())){
  # print(dir()[i])
  source(dir()[i], encoding = "utf-8")
}
rm(i)

set up data.frame

########################################
# create counter variable for both data sets
########################################
## for Democrats
dat$ID <- NA
dat$politicalParty <- NA
dat$country <- NA


tmp_IDcounter <- 0
for(i in 1:nrow(dat)){
  if(!is.na(dat$sender[i]) && dat$sender[i] == "Greetings"){
    # tmp <- dat$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
    dat$politicalParty[i] <- "Democrat" ## add political affiliation
    dat$country[i] <- "USA" ## add country
  }
  dat$ID[i] <- tmp_IDcounter
}


## for Republicans
dat_Republican$ID <- NA
dat_Republican$politicalParty <- NA
dat_Republican$country <- NA


# continue tmp_IDcounter
for(i in 1:nrow(dat_Republican)){
  if(!is.na(dat_Republican$sender[i]) && dat_Republican$sender[i] == "Greetings"){
    # tmp <- dat$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
    dat_Republican$politicalParty[i] <- "Republican" ## add political affiliation
    dat_Republican$country[i] <- "USA" ## add country
  }
  dat_Republican$ID[i] <- tmp_IDcounter
}


## Germany
dat_Germany$ID <- NA
dat_Germany$politicalParty <- NA
dat_Germany$country <- NA

# continue tmp_IDcounter
for(i in 1:nrow(dat_Germany)){
  if(!is.na(dat_Germany$sender[i]) && dat_Germany$sender[i] == "Greetings"){
    # tmp <- dat$prolific_pid[i]
    tmp_IDcounter = tmp_IDcounter + 1
    # dat_Germany$politicalParty[i] <- NA ## no political affiliation
      dat_Germany$country[i] <- "Germany" ## add country
  }
  dat_Germany$ID[i] <- tmp_IDcounter
}
rm(tmp_IDcounter)



dat <- dat[, str_subset(string = colnames(dat), pattern = "^\\.\\.", negate = TRUE)]
dat_Republican <- dat_Republican[, str_subset(string = colnames(dat_Republican), pattern = "^\\.\\.", negate = TRUE)]
dat_Germany <- dat_Germany[, str_subset(string = colnames(dat_Germany), pattern = "^\\.\\.", negate = TRUE)]

##########
# merge two data sets
##########
dim(dat) # all senders, long data format from lab.js
[1] 3288   96
dat <- rbind(dat, dat_Republican, dat_Germany)
dim(dat)
[1] 13317    96
########################################
# keep only complete data sets
########################################
sum(table(dat$ID) != max(table(dat$ID))) # on ID level (participants)
[1] 32
sum(table(dat$ID) == max(table(dat$ID)))
[1] 598
dat <- dat[dat$ID %in% names(table(dat$ID))[table(dat$ID) == max(table(dat$ID))],]

########################################
# json (from JATOS) to 2D data.frame
########################################
tmp_notNumeric <- str_subset(string = colnames(dat), pattern = "^meta|^R")
tmp_notNumeric <- str_subset(string = tmp_notNumeric, pattern = "labjs|location", negate = TRUE)

tmp_numeric <- str_subset(string = colnames(dat), pattern = "^affImgAffect|^CRKQ|^CCSQ|^CMQ|^GCB")


vec_ques <- c("PROLIFIC_PID",
                "dummy_informedconsent",
                "commCheck",
                tmp_notNumeric,
                tmp_numeric,
                "feedback_critic",
                "politicalParty",
                "country")

vec_notNumeric = c("PROLIFIC_PID",
                   tmp_notNumeric,
                   "feedback_critic",
                   "politicalParty",
                   "country")

questionnaire <- questionnairetype(dataset = dat, 
                                        listvars = vec_ques, 
                                        notNumeric = vec_notNumeric)

get reaction times for single components

Plot time taken (in minutes) by participants for single components of study:

dat_duration <- data.frame(duration = NA, sender = NA, ID = NA, PROLIFIC_PID = NA)

for(i in 1:length(unique(dat$ID))){

  tmp_PID <- dat$PROLIFIC_PID[dat$ID ==  unique(dat$ID)[i] & !is.na(dat$PROLIFIC_PID)]
  tmp <- data.frame(duration = dat$duration[dat$ID == unique(dat$ID)[i]] / 1000,
                    sender = dat$sender[dat$ID == unique(dat$ID)[i]])

  tmp <- tmp[str_detect(string = tmp$sender, pattern = "Sequence", negate = TRUE),]
  tmp <- tmp[!is.na(tmp$sender),]
  # tmp <- tmp[!is.na(tmp$duration),]

  sub_tmp <- tmp[13:46,]
  tmp[13:46,] <- sub_tmp[order(sub_tmp$sender),]

  if(all(is.na(dat_duration))){
    dat_duration <- data.frame(duration = tmp$duration,
                              sender = tmp$sender,
                              ID = rep(i, times=nrow(tmp)),
                              PROLIFIC_PID = rep(tmp_PID, times=nrow(tmp)))


  }else{
    dat_duration <- rbind(dat_duration,  data.frame(duration = tmp$duration,
                                                    sender = tmp$sender,
                                                    ID = rep(i, times=nrow(tmp)),
                                                    PROLIFIC_PID = rep(tmp_PID, times=nrow(tmp))))
  }
}

## remove empty sender 
dat_duration <- dat_duration[!is.na(dat_duration$sender), ]

## save as .xlsx
write.xlsx2(x = dat_duration, file = "outputs/para_duration_singleComponents.xlsx")

#### plot
dat_duration$ID <- factor(dat_duration$ID)
p <- dat_duration %>%
  ggplot(aes(x=sender, y=duration, color=PROLIFIC_PID)) +
  geom_point() +
  geom_jitter(width=0.15)+
  theme(axis.text.x = element_text(angle = 90)) + theme(legend.position="none")
p
Warning: Removed 612 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 612 rows containing missing values or values outside the scale range
(`geom_point()`).

## save ggplot as PDF
ggsave(filename = "outputs/durations_components.pdf", p)
Warning: Removed 612 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 612 rows containing missing values or values outside the scale range
(`geom_point()`).
# Calculate the mean duration in seconds for each sender and sort by mean duration
tmp <- dat_duration %>%
  group_by(sender) %>%
  summarise(N = n(), mean_duration = mean(duration, na.rm = TRUE)) %>%
  arrange(desc(mean_duration))
DT::datatable(tmp, options = list(pageLength = 5)) 

Correlation plot of single components of study:

# Assuming your data is named dat_duration, you can pivot it wider
dat_duration_wide <- dat_duration %>%
  pivot_wider(
    names_from = sender,  # Use 'sender' to create new columns
    values_from = duration,  # Fill these columns with 'duration' values
    names_prefix = "duration_"  # Optionally, add a prefix to the new columns
)


dat_duration_wide <- dat_duration_wide[, str_detect(string = colnames(dat_duration_wide), pattern = "^duration")]

psych::cor.plot(r = cor(dat_duration_wide, use = "pairwise.complete.obs"), 
                upper = FALSE, 
                xlas = 2, 
                main = "duration (min.) of components")

add prolific data to questionnaire

Add duration in minutes

questionnaire$total_min <- NA
for(p in unique(questionnaire$PROLIFIC_PID)){
  tmp <- dat_duration$duration[dat_duration$PROLIFIC_PID == p]
  questionnaire$total_min[questionnaire$PROLIFIC_PID == p] <-   sum(tmp, na.rm = TRUE) / 60
}

Add Prolific data to data set:

prolific <- prolific[prolific$Participant.id %in% questionnaire$PROLIFIC_PID,]
prolific <- prolific %>%
  arrange(sapply(Participant.id, function(y) which(y == questionnaire$PROLIFIC_PID)))


if(nrow(prolific) == nrow(questionnaire)){
  print("prolific data sucessfully added")
  
  questionnaire$socio_age <- prolific$Age
  questionnaire$socio_sex <- prolific$Sex
  questionnaire$socio_ethnicity <- prolific$Ethnicity.simplified
  questionnaire$socio_student <- prolific$Student.status
  questionnaire$socio_employment <- prolific$Employment.status
  questionnaire$total_min_prolific <- prolific$Time.taken / 60
  ## all time outs to NA
  questionnaire$total_min_prolific[questionnaire$total_min_prolific > 3000] <- NA

  questionnaire[questionnaire == "DATA_EXPIRED"] <- NA
  questionnaire[questionnaire == ""] <- NA
  
  questionnaire$socio_age <- as.numeric(questionnaire$socio_age)
}
[1] "prolific data sucessfully added"
## plot reaction times to see if a person timed out
plot(questionnaire$total_min, questionnaire$total_min_prolific)

## save raw questionnaire
write.xlsx2(x = questionnaire, file = "outputs/questionnaire_raw.xlsx")

add variables

add mean variables for survey scales

Important: computation of overall mean variables is critical if there are subdimensions or correlated residuals (local item dependency) within a survey scale, see next section. However, for a first impression of any sig. difference it is still meaningful.

########################################
# number of items for each scale
########################################
sum(str_detect(string = colnames(questionnaire), pattern = "^GCB"))
[1] 15
sum(str_detect(string = colnames(questionnaire), pattern = "^CMQ"))
[1] 5
sum(str_detect(string = colnames(questionnaire), pattern = "^CRKQ"))
[1] 16
sum(str_detect(string = colnames(questionnaire), pattern = "^CCSQ"))
[1] 12
########################################
# reverse code all items
########################################
#> see negative correlation between single items
psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^GCB")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "GCB scale")

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CMQ")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "CMQ scale")

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CRKQ")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "CRKQ scale")

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CCSQ")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "CCSQ scale")

#########
# reverse code > CRKQ
#########
summary(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CRKQ.*r$")])
  CRKQ-05fopr     CRKQ-03fopr     CRKQ-04fopr   
 Min.   :1.000   Min.   :1.000   Min.   :1.000  
 1st Qu.:3.000   1st Qu.:2.000   1st Qu.:3.000  
 Median :4.000   Median :3.000   Median :4.000  
 Mean   :3.826   Mean   :3.166   Mean   :3.818  
 3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000  
 Max.   :6.000   Max.   :6.000   Max.   :6.000  
tmp_vars <- str_subset(string = colnames(questionnaire), pattern = "^CRKQ.*r$")

for(v in tmp_vars){
  questionnaire[[v]] <- 7 - questionnaire[[v]]
}

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CRKQ")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "CRKQ scale")

#########
# reverse code > CCSQ
#########
summary(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CCSQ.*r$")])
   CCSQ-01isr      CCSQ-02asr      CCSQ-03isr      CCSQ-02tsr   
 Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
 1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000   1st Qu.:5.000  
 Median :6.000   Median :6.000   Median :6.000   Median :6.000  
 Mean   :5.751   Mean   :5.483   Mean   :5.441   Mean   :5.323  
 3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:7.000  
 Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
tmp_vars <- str_subset(string = colnames(questionnaire), pattern = "^CCSQ.*r$")

for(v in tmp_vars){
  questionnaire[[v]] <- 7 - questionnaire[[v]]
}

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^CCSQ")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "CCSQ scale")

########################################
# compute mean variables OVERALL (!)
########################################
### overall mean variables
questionnaire$mean_GCB  <- questionnaire %>%
  select(matches("^GCB")) %>%
  rowMeans(na.rm = TRUE)

questionnaire$mean_CMQ  <- questionnaire %>%
  select(matches("^CMQ")) %>%
  rowMeans(na.rm = TRUE)

questionnaire$mean_CRKQ  <- questionnaire %>%
  select(matches("^CRKQ")) %>%
  select(ends_with(c("s", "fop", "fopr"))) %>%
  rowMeans(na.rm = TRUE)

questionnaire$mean_CCSQ  <- questionnaire %>%
  select(matches("^CCSQ")) %>%
  rowMeans(na.rm = TRUE)

Correlation plot of computed mean variables:

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^mean")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "mean scales")

add factor scores for survey scales

exploratory factor analysis

overall

parallelAnalysis_overall <- dimensionalityTest(label = "Overall", regEx = "^GCB|^CMQ|^CRKQ|^CCSQ", dataset = questionnaire)

Parallel analysis suggests that the number of factors =  4  and the number of components =  3 
Overall 
Number of components:  3 
EFA_overall <- explorativeFactorAnalysis(label = "Overall", regEx = "^GCB|^CMQ|^CRKQ|^CCSQ", dataset = questionnaire, nfac = 3, showCronbach = TRUE)
Lade nötigen Namensraum: GPArotation
Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
The estimated weights for the factor scores are probably incorrect.  Try a
different factor score estimation method.
Warning in psych::alpha(tmp_cor): Some items were negatively correlated with the first principal component and probably 
should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option
Some items ( CRKQ-03ccc CRKQ-02ccc CRKQ-04ccc CRKQ-01ccc ) were negatively correlated with the first principal component and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' optionCronbachs Alpha: 0.95 
EFA_overall[[1]]
Factor Analysis using method =  minres
Call: fa(r = tmp_dat, nfactors = nfac, rotate = "promax", cor = "cor")
Standardized loadings (pattern matrix) based upon correlation matrix
              MR1   MR2   MR3   h2   u2 com
CMQ-02      -0.16  0.70  0.26 0.51 0.49 1.4
CMQ-03       0.03  0.74 -0.01 0.57 0.43 1.0
CMQ-01      -0.21  0.78  0.26 0.60 0.40 1.4
CMQ-04       0.07  0.79 -0.01 0.68 0.32 1.0
CMQ-05      -0.05  0.82  0.06 0.66 0.34 1.0
CRKQ-05s     0.80 -0.12  0.16 0.69 0.31 1.1
CRKQ-04s     0.64  0.07  0.05 0.49 0.51 1.0
CRKQ-03s     0.74  0.15 -0.07 0.64 0.36 1.1
CRKQ-03ccc  -0.83  0.11 -0.18 0.76 0.24 1.1
CRKQ-05fopr  0.34 -0.16  0.62 0.60 0.40 1.7
CRKQ-02fop  -0.04  0.09  0.65 0.44 0.56 1.0
CRKQ-02ccc  -0.87  0.11 -0.11 0.76 0.24 1.1
CRKQ-02s     0.82  0.14 -0.07 0.75 0.25 1.1
CRKQ-03fopr  0.19  0.01  0.52 0.39 0.61 1.3
CRKQ-01s     0.88  0.02  0.04 0.81 0.19 1.0
CRKQ-07s     0.77  0.05 -0.09 0.58 0.42 1.0
CRKQ-04fopr  0.27 -0.13  0.71 0.67 0.33 1.4
CRKQ-06s     0.63  0.16  0.08 0.56 0.44 1.2
CRKQ-04ccc  -0.90  0.12 -0.05 0.76 0.24 1.0
CRKQ-01ccc  -0.85  0.13 -0.08 0.69 0.31 1.1
CRKQ-01fop   0.03  0.26  0.60 0.50 0.50 1.4
CCSQ-03ts    0.56  0.17 -0.09 0.39 0.61 1.2
CCSQ-01isr   0.95 -0.12  0.05 0.85 0.15 1.0
CCSQ-02asr   0.86 -0.05  0.02 0.71 0.29 1.0
CCSQ-01as    0.85  0.10 -0.11 0.74 0.26 1.1
CCSQ-03rs    0.85  0.00 -0.01 0.71 0.29 1.0
CCSQ-03as    0.69  0.15 -0.13 0.53 0.47 1.2
CCSQ-02rs    0.78  0.01  0.00 0.61 0.39 1.0
CCSQ-03isr   0.85 -0.15  0.14 0.73 0.27 1.1
CCSQ-02is    0.85  0.07  0.01 0.79 0.21 1.0
CCSQ-01rs    0.67  0.00  0.07 0.48 0.52 1.0
CCSQ-01ts    0.59  0.28  0.02 0.58 0.42 1.4
CCSQ-02tsr   0.83 -0.05  0.12 0.75 0.25 1.0
GCB-03gm    -0.02  0.82  0.08 0.69 0.31 1.0
GCB-03mg     0.09  0.82 -0.05 0.74 0.26 1.0
GCB-03pw     0.04  0.77 -0.05 0.62 0.38 1.0
GCB-01gm    -0.09  0.79  0.15 0.62 0.38 1.1
GCB-02mg     0.05  0.81 -0.04 0.69 0.31 1.0
GCB-01mg     0.07  0.82 -0.01 0.72 0.28 1.0
GCB-03et     0.08  0.72 -0.17 0.55 0.45 1.1
GCB-03ci    -0.16  0.78  0.25 0.61 0.39 1.3
GCB-02ci    -0.18  0.70  0.19 0.46 0.54 1.3
GCB-01pw     0.20  0.71 -0.11 0.65 0.35 1.2
GCB-01ci     0.21  0.69 -0.01 0.65 0.35 1.2
GCB-02gm    -0.04  0.78  0.10 0.61 0.39 1.0
GCB-02pw     0.17  0.69 -0.20 0.57 0.43 1.3
GCB-01et     0.05  0.72 -0.25 0.55 0.45 1.3
GCB-02et     0.00  0.69 -0.16 0.46 0.54 1.1

                        MR1   MR2  MR3
SS loadings           15.24 12.06 2.88
Proportion Var         0.32  0.25 0.06
Cumulative Var         0.32  0.57 0.63
Proportion Explained   0.50  0.40 0.10
Cumulative Proportion  0.50  0.90 1.00

 With factor correlations of 
     MR1  MR2  MR3
MR1 1.00 0.45 0.38
MR2 0.45 1.00 0.17
MR3 0.38 0.17 1.00

Mean item complexity =  1.1
Test of the hypothesis that 3 factors are sufficient.

df null model =  1128  with the objective function =  47.17 with Chi Square =  27365.06
df of  the model are 987  and the objective function was  6.38 

The root mean square of the residuals (RMSR) is  0.03 
The df corrected root mean square of the residuals is  0.03 

The harmonic n.obs is  598 with the empirical chi square  1256.02  with prob <  1.1e-08 
The total n.obs was  598  with Likelihood Chi Square =  3691.59  with prob <  1.8e-307 

Tucker Lewis Index of factoring reliability =  0.882
RMSEA index =  0.068  and the 90 % confidence intervals are  0.065 0.07
BIC =  -2618.88
Fit based upon off diagonal values = 1
Measures of factor score adequacy             
                                                   MR1  MR2  MR3
Correlation of (regression) scores with factors   0.99 0.98 0.93
Multiple R square of scores with factors          0.98 0.97 0.86
Minimum correlation of possible factor scores     0.96 0.94 0.72
EFA_overall[[1]]$loadings

Loadings:
            MR1    MR2    MR3   
CMQ-02      -0.159  0.697  0.259
CMQ-03              0.743       
CMQ-01      -0.212  0.779  0.256
CMQ-04              0.792       
CMQ-05              0.821       
CRKQ-05s     0.800 -0.116  0.158
CRKQ-04s     0.645              
CRKQ-03s     0.743  0.153       
CRKQ-03ccc  -0.829  0.113 -0.181
CRKQ-05fopr  0.342 -0.163  0.618
CRKQ-02fop                 0.655
CRKQ-02ccc  -0.868  0.113 -0.109
CRKQ-02s     0.816  0.143       
CRKQ-03fopr  0.193         0.519
CRKQ-01s     0.880              
CRKQ-07s     0.770              
CRKQ-04fopr  0.267 -0.130  0.710
CRKQ-06s     0.630  0.164       
CRKQ-04ccc  -0.901  0.124       
CRKQ-01ccc  -0.845  0.130       
CRKQ-01fop          0.263  0.601
CCSQ-03ts    0.556  0.170       
CCSQ-01isr   0.946 -0.115       
CCSQ-02asr   0.858              
CCSQ-01as    0.848  0.101 -0.112
CCSQ-03rs    0.847              
CCSQ-03as    0.686  0.152 -0.125
CCSQ-02rs    0.778              
CCSQ-03isr   0.850 -0.154  0.136
CCSQ-02is    0.850              
CCSQ-01rs    0.668              
CCSQ-01ts    0.587  0.277       
CCSQ-02tsr   0.834         0.117
GCB-03gm            0.825       
GCB-03mg            0.824       
GCB-03pw            0.774       
GCB-01gm            0.793  0.147
GCB-02mg            0.811       
GCB-01mg            0.816       
GCB-03et            0.717 -0.167
GCB-03ci    -0.158  0.776  0.245
GCB-02ci    -0.180  0.698  0.193
GCB-01pw     0.205  0.714 -0.111
GCB-01ci     0.213  0.690       
GCB-02gm            0.778  0.101
GCB-02pw     0.166  0.691 -0.203
GCB-01et            0.724 -0.254
GCB-02et            0.688 -0.155

                  MR1    MR2   MR3
SS loadings    14.905 11.984 2.577
Proportion Var  0.311  0.250 0.054
Cumulative Var  0.311  0.560 0.614

seperated by countries

by Germany and USA

########################################
# build subsets
########################################
questionnaire_Germany <- questionnaire %>%
  filter(country == "Germany")

questionnaire_USA <- questionnaire %>%
  filter(country == "USA")


########################################
# Germany
########################################
parallelAnalysis_overall_Germany <- dimensionalityTest(label = "Overall", regEx = "^GCB|^CMQ|^CRKQ|^CCSQ", dataset = questionnaire_Germany)

Parallel analysis suggests that the number of factors =  3  and the number of components =  3 
Overall 
Number of components:  3 
EFA_overall_Germany <- explorativeFactorAnalysis(label = "Overall", regEx = "^GCB|^CMQ|^CRKQ|^CCSQ", dataset = questionnaire, nfac = 3, showCronbach = TRUE)
Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
The estimated weights for the factor scores are probably incorrect.  Try a
different factor score estimation method.
Warning in psych::alpha(tmp_cor): Some items were negatively correlated with the first principal component and probably 
should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option
Some items ( CRKQ-03ccc CRKQ-02ccc CRKQ-04ccc CRKQ-01ccc ) were negatively correlated with the first principal component and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' optionCronbachs Alpha: 0.95 
EFA_overall_Germany[[1]]
Factor Analysis using method =  minres
Call: fa(r = tmp_dat, nfactors = nfac, rotate = "promax", cor = "cor")
Standardized loadings (pattern matrix) based upon correlation matrix
              MR1   MR2   MR3   h2   u2 com
CMQ-02      -0.16  0.70  0.26 0.51 0.49 1.4
CMQ-03       0.03  0.74 -0.01 0.57 0.43 1.0
CMQ-01      -0.21  0.78  0.26 0.60 0.40 1.4
CMQ-04       0.07  0.79 -0.01 0.68 0.32 1.0
CMQ-05      -0.05  0.82  0.06 0.66 0.34 1.0
CRKQ-05s     0.80 -0.12  0.16 0.69 0.31 1.1
CRKQ-04s     0.64  0.07  0.05 0.49 0.51 1.0
CRKQ-03s     0.74  0.15 -0.07 0.64 0.36 1.1
CRKQ-03ccc  -0.83  0.11 -0.18 0.76 0.24 1.1
CRKQ-05fopr  0.34 -0.16  0.62 0.60 0.40 1.7
CRKQ-02fop  -0.04  0.09  0.65 0.44 0.56 1.0
CRKQ-02ccc  -0.87  0.11 -0.11 0.76 0.24 1.1
CRKQ-02s     0.82  0.14 -0.07 0.75 0.25 1.1
CRKQ-03fopr  0.19  0.01  0.52 0.39 0.61 1.3
CRKQ-01s     0.88  0.02  0.04 0.81 0.19 1.0
CRKQ-07s     0.77  0.05 -0.09 0.58 0.42 1.0
CRKQ-04fopr  0.27 -0.13  0.71 0.67 0.33 1.4
CRKQ-06s     0.63  0.16  0.08 0.56 0.44 1.2
CRKQ-04ccc  -0.90  0.12 -0.05 0.76 0.24 1.0
CRKQ-01ccc  -0.85  0.13 -0.08 0.69 0.31 1.1
CRKQ-01fop   0.03  0.26  0.60 0.50 0.50 1.4
CCSQ-03ts    0.56  0.17 -0.09 0.39 0.61 1.2
CCSQ-01isr   0.95 -0.12  0.05 0.85 0.15 1.0
CCSQ-02asr   0.86 -0.05  0.02 0.71 0.29 1.0
CCSQ-01as    0.85  0.10 -0.11 0.74 0.26 1.1
CCSQ-03rs    0.85  0.00 -0.01 0.71 0.29 1.0
CCSQ-03as    0.69  0.15 -0.13 0.53 0.47 1.2
CCSQ-02rs    0.78  0.01  0.00 0.61 0.39 1.0
CCSQ-03isr   0.85 -0.15  0.14 0.73 0.27 1.1
CCSQ-02is    0.85  0.07  0.01 0.79 0.21 1.0
CCSQ-01rs    0.67  0.00  0.07 0.48 0.52 1.0
CCSQ-01ts    0.59  0.28  0.02 0.58 0.42 1.4
CCSQ-02tsr   0.83 -0.05  0.12 0.75 0.25 1.0
GCB-03gm    -0.02  0.82  0.08 0.69 0.31 1.0
GCB-03mg     0.09  0.82 -0.05 0.74 0.26 1.0
GCB-03pw     0.04  0.77 -0.05 0.62 0.38 1.0
GCB-01gm    -0.09  0.79  0.15 0.62 0.38 1.1
GCB-02mg     0.05  0.81 -0.04 0.69 0.31 1.0
GCB-01mg     0.07  0.82 -0.01 0.72 0.28 1.0
GCB-03et     0.08  0.72 -0.17 0.55 0.45 1.1
GCB-03ci    -0.16  0.78  0.25 0.61 0.39 1.3
GCB-02ci    -0.18  0.70  0.19 0.46 0.54 1.3
GCB-01pw     0.20  0.71 -0.11 0.65 0.35 1.2
GCB-01ci     0.21  0.69 -0.01 0.65 0.35 1.2
GCB-02gm    -0.04  0.78  0.10 0.61 0.39 1.0
GCB-02pw     0.17  0.69 -0.20 0.57 0.43 1.3
GCB-01et     0.05  0.72 -0.25 0.55 0.45 1.3
GCB-02et     0.00  0.69 -0.16 0.46 0.54 1.1

                        MR1   MR2  MR3
SS loadings           15.24 12.06 2.88
Proportion Var         0.32  0.25 0.06
Cumulative Var         0.32  0.57 0.63
Proportion Explained   0.50  0.40 0.10
Cumulative Proportion  0.50  0.90 1.00

 With factor correlations of 
     MR1  MR2  MR3
MR1 1.00 0.45 0.38
MR2 0.45 1.00 0.17
MR3 0.38 0.17 1.00

Mean item complexity =  1.1
Test of the hypothesis that 3 factors are sufficient.

df null model =  1128  with the objective function =  47.17 with Chi Square =  27365.06
df of  the model are 987  and the objective function was  6.38 

The root mean square of the residuals (RMSR) is  0.03 
The df corrected root mean square of the residuals is  0.03 

The harmonic n.obs is  598 with the empirical chi square  1256.02  with prob <  1.1e-08 
The total n.obs was  598  with Likelihood Chi Square =  3691.59  with prob <  1.8e-307 

Tucker Lewis Index of factoring reliability =  0.882
RMSEA index =  0.068  and the 90 % confidence intervals are  0.065 0.07
BIC =  -2618.88
Fit based upon off diagonal values = 1
Measures of factor score adequacy             
                                                   MR1  MR2  MR3
Correlation of (regression) scores with factors   0.99 0.98 0.93
Multiple R square of scores with factors          0.98 0.97 0.86
Minimum correlation of possible factor scores     0.96 0.94 0.72
EFA_overall_Germany[[1]]$loadings

Loadings:
            MR1    MR2    MR3   
CMQ-02      -0.159  0.697  0.259
CMQ-03              0.743       
CMQ-01      -0.212  0.779  0.256
CMQ-04              0.792       
CMQ-05              0.821       
CRKQ-05s     0.800 -0.116  0.158
CRKQ-04s     0.645              
CRKQ-03s     0.743  0.153       
CRKQ-03ccc  -0.829  0.113 -0.181
CRKQ-05fopr  0.342 -0.163  0.618
CRKQ-02fop                 0.655
CRKQ-02ccc  -0.868  0.113 -0.109
CRKQ-02s     0.816  0.143       
CRKQ-03fopr  0.193         0.519
CRKQ-01s     0.880              
CRKQ-07s     0.770              
CRKQ-04fopr  0.267 -0.130  0.710
CRKQ-06s     0.630  0.164       
CRKQ-04ccc  -0.901  0.124       
CRKQ-01ccc  -0.845  0.130       
CRKQ-01fop          0.263  0.601
CCSQ-03ts    0.556  0.170       
CCSQ-01isr   0.946 -0.115       
CCSQ-02asr   0.858              
CCSQ-01as    0.848  0.101 -0.112
CCSQ-03rs    0.847              
CCSQ-03as    0.686  0.152 -0.125
CCSQ-02rs    0.778              
CCSQ-03isr   0.850 -0.154  0.136
CCSQ-02is    0.850              
CCSQ-01rs    0.668              
CCSQ-01ts    0.587  0.277       
CCSQ-02tsr   0.834         0.117
GCB-03gm            0.825       
GCB-03mg            0.824       
GCB-03pw            0.774       
GCB-01gm            0.793  0.147
GCB-02mg            0.811       
GCB-01mg            0.816       
GCB-03et            0.717 -0.167
GCB-03ci    -0.158  0.776  0.245
GCB-02ci    -0.180  0.698  0.193
GCB-01pw     0.205  0.714 -0.111
GCB-01ci     0.213  0.690       
GCB-02gm            0.778  0.101
GCB-02pw     0.166  0.691 -0.203
GCB-01et            0.724 -0.254
GCB-02et            0.688 -0.155

                  MR1    MR2   MR3
SS loadings    14.905 11.984 2.577
Proportion Var  0.311  0.250 0.054
Cumulative Var  0.311  0.560 0.614
########################################
# USA
########################################
parallelAnalysis_overall_USA <- dimensionalityTest(label = "Overall", regEx = "^GCB|^CMQ|^CRKQ|^CCSQ", dataset = questionnaire_USA)

Parallel analysis suggests that the number of factors =  3  and the number of components =  3 
Overall 
Number of components:  3 
EFA_overall_USA <- explorativeFactorAnalysis(label = "Overall", regEx = "^GCB|^CMQ|^CRKQ|^CCSQ", dataset = questionnaire, nfac = 3, showCronbach = TRUE)
Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, : The estimated weights for the factor scores are probably incorrect.  Try a different factor score estimation method.
Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, : Some items were negatively correlated with the first principal component and probably 
should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option
Some items ( CRKQ-03ccc CRKQ-02ccc CRKQ-04ccc CRKQ-01ccc ) were negatively correlated with the first principal component and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' optionCronbachs Alpha: 0.95 
EFA_overall_USA[[1]]
Factor Analysis using method =  minres
Call: fa(r = tmp_dat, nfactors = nfac, rotate = "promax", cor = "cor")
Standardized loadings (pattern matrix) based upon correlation matrix
              MR1   MR2   MR3   h2   u2 com
CMQ-02      -0.16  0.70  0.26 0.51 0.49 1.4
CMQ-03       0.03  0.74 -0.01 0.57 0.43 1.0
CMQ-01      -0.21  0.78  0.26 0.60 0.40 1.4
CMQ-04       0.07  0.79 -0.01 0.68 0.32 1.0
CMQ-05      -0.05  0.82  0.06 0.66 0.34 1.0
CRKQ-05s     0.80 -0.12  0.16 0.69 0.31 1.1
CRKQ-04s     0.64  0.07  0.05 0.49 0.51 1.0
CRKQ-03s     0.74  0.15 -0.07 0.64 0.36 1.1
CRKQ-03ccc  -0.83  0.11 -0.18 0.76 0.24 1.1
CRKQ-05fopr  0.34 -0.16  0.62 0.60 0.40 1.7
CRKQ-02fop  -0.04  0.09  0.65 0.44 0.56 1.0
CRKQ-02ccc  -0.87  0.11 -0.11 0.76 0.24 1.1
CRKQ-02s     0.82  0.14 -0.07 0.75 0.25 1.1
CRKQ-03fopr  0.19  0.01  0.52 0.39 0.61 1.3
CRKQ-01s     0.88  0.02  0.04 0.81 0.19 1.0
CRKQ-07s     0.77  0.05 -0.09 0.58 0.42 1.0
CRKQ-04fopr  0.27 -0.13  0.71 0.67 0.33 1.4
CRKQ-06s     0.63  0.16  0.08 0.56 0.44 1.2
CRKQ-04ccc  -0.90  0.12 -0.05 0.76 0.24 1.0
CRKQ-01ccc  -0.85  0.13 -0.08 0.69 0.31 1.1
CRKQ-01fop   0.03  0.26  0.60 0.50 0.50 1.4
CCSQ-03ts    0.56  0.17 -0.09 0.39 0.61 1.2
CCSQ-01isr   0.95 -0.12  0.05 0.85 0.15 1.0
CCSQ-02asr   0.86 -0.05  0.02 0.71 0.29 1.0
CCSQ-01as    0.85  0.10 -0.11 0.74 0.26 1.1
CCSQ-03rs    0.85  0.00 -0.01 0.71 0.29 1.0
CCSQ-03as    0.69  0.15 -0.13 0.53 0.47 1.2
CCSQ-02rs    0.78  0.01  0.00 0.61 0.39 1.0
CCSQ-03isr   0.85 -0.15  0.14 0.73 0.27 1.1
CCSQ-02is    0.85  0.07  0.01 0.79 0.21 1.0
CCSQ-01rs    0.67  0.00  0.07 0.48 0.52 1.0
CCSQ-01ts    0.59  0.28  0.02 0.58 0.42 1.4
CCSQ-02tsr   0.83 -0.05  0.12 0.75 0.25 1.0
GCB-03gm    -0.02  0.82  0.08 0.69 0.31 1.0
GCB-03mg     0.09  0.82 -0.05 0.74 0.26 1.0
GCB-03pw     0.04  0.77 -0.05 0.62 0.38 1.0
GCB-01gm    -0.09  0.79  0.15 0.62 0.38 1.1
GCB-02mg     0.05  0.81 -0.04 0.69 0.31 1.0
GCB-01mg     0.07  0.82 -0.01 0.72 0.28 1.0
GCB-03et     0.08  0.72 -0.17 0.55 0.45 1.1
GCB-03ci    -0.16  0.78  0.25 0.61 0.39 1.3
GCB-02ci    -0.18  0.70  0.19 0.46 0.54 1.3
GCB-01pw     0.20  0.71 -0.11 0.65 0.35 1.2
GCB-01ci     0.21  0.69 -0.01 0.65 0.35 1.2
GCB-02gm    -0.04  0.78  0.10 0.61 0.39 1.0
GCB-02pw     0.17  0.69 -0.20 0.57 0.43 1.3
GCB-01et     0.05  0.72 -0.25 0.55 0.45 1.3
GCB-02et     0.00  0.69 -0.16 0.46 0.54 1.1

                        MR1   MR2  MR3
SS loadings           15.24 12.06 2.88
Proportion Var         0.32  0.25 0.06
Cumulative Var         0.32  0.57 0.63
Proportion Explained   0.50  0.40 0.10
Cumulative Proportion  0.50  0.90 1.00

 With factor correlations of 
     MR1  MR2  MR3
MR1 1.00 0.45 0.38
MR2 0.45 1.00 0.17
MR3 0.38 0.17 1.00

Mean item complexity =  1.1
Test of the hypothesis that 3 factors are sufficient.

df null model =  1128  with the objective function =  47.17 with Chi Square =  27365.06
df of  the model are 987  and the objective function was  6.38 

The root mean square of the residuals (RMSR) is  0.03 
The df corrected root mean square of the residuals is  0.03 

The harmonic n.obs is  598 with the empirical chi square  1256.02  with prob <  1.1e-08 
The total n.obs was  598  with Likelihood Chi Square =  3691.59  with prob <  1.8e-307 

Tucker Lewis Index of factoring reliability =  0.882
RMSEA index =  0.068  and the 90 % confidence intervals are  0.065 0.07
BIC =  -2618.88
Fit based upon off diagonal values = 1
Measures of factor score adequacy             
                                                   MR1  MR2  MR3
Correlation of (regression) scores with factors   0.99 0.98 0.93
Multiple R square of scores with factors          0.98 0.97 0.86
Minimum correlation of possible factor scores     0.96 0.94 0.72
EFA_overall_USA[[1]]$loadings

Loadings:
            MR1    MR2    MR3   
CMQ-02      -0.159  0.697  0.259
CMQ-03              0.743       
CMQ-01      -0.212  0.779  0.256
CMQ-04              0.792       
CMQ-05              0.821       
CRKQ-05s     0.800 -0.116  0.158
CRKQ-04s     0.645              
CRKQ-03s     0.743  0.153       
CRKQ-03ccc  -0.829  0.113 -0.181
CRKQ-05fopr  0.342 -0.163  0.618
CRKQ-02fop                 0.655
CRKQ-02ccc  -0.868  0.113 -0.109
CRKQ-02s     0.816  0.143       
CRKQ-03fopr  0.193         0.519
CRKQ-01s     0.880              
CRKQ-07s     0.770              
CRKQ-04fopr  0.267 -0.130  0.710
CRKQ-06s     0.630  0.164       
CRKQ-04ccc  -0.901  0.124       
CRKQ-01ccc  -0.845  0.130       
CRKQ-01fop          0.263  0.601
CCSQ-03ts    0.556  0.170       
CCSQ-01isr   0.946 -0.115       
CCSQ-02asr   0.858              
CCSQ-01as    0.848  0.101 -0.112
CCSQ-03rs    0.847              
CCSQ-03as    0.686  0.152 -0.125
CCSQ-02rs    0.778              
CCSQ-03isr   0.850 -0.154  0.136
CCSQ-02is    0.850              
CCSQ-01rs    0.668              
CCSQ-01ts    0.587  0.277       
CCSQ-02tsr   0.834         0.117
GCB-03gm            0.825       
GCB-03mg            0.824       
GCB-03pw            0.774       
GCB-01gm            0.793  0.147
GCB-02mg            0.811       
GCB-01mg            0.816       
GCB-03et            0.717 -0.167
GCB-03ci    -0.158  0.776  0.245
GCB-02ci    -0.180  0.698  0.193
GCB-01pw     0.205  0.714 -0.111
GCB-01ci     0.213  0.690       
GCB-02gm            0.778  0.101
GCB-02pw     0.166  0.691 -0.203
GCB-01et            0.724 -0.254
GCB-02et            0.688 -0.155

                  MR1    MR2   MR3
SS loadings    14.905 11.984 2.577
Proportion Var  0.311  0.250 0.054
Cumulative Var  0.311  0.560 0.614

confirmatory factor analysis

The self-written function “CFAstats” has the following functionalities: * showPlots = get correlation plot and run self-written function “getDescriptives” * computeEFA = compute parallel analysis and EFA (apply two self-written functions, see section EFA) * computeCFA = apply lavaan package using “MLR” estimator to compute CFA (no residual correlations are specified) * computeCFAMplus = apply package using “MLR” estimator to compute CFA (no residual correlations are specified), possible here to compute CI for McDonald’s Omega

CMQ

CFA_CMQ <- CFAstats(dataset = questionnaire,
                           regularExp = "^CMQ",
                           labelLatent = "CMQ",
                           showPlots = TRUE, computeEFA = FALSE, computeCFA = TRUE, computeCFAMplus = FALSE)
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein



descriptive statistics: 
       Mean   SD Median CoeffofVariation Minimum Maximun Lower Quantile
CMQ-02 7.42 2.58      8             0.35       1      11              1
CMQ-03 5.36 3.02      5             0.56       1      11              1
CMQ-01 7.02 2.95      7             0.42       1      11              1
CMQ-04 4.85 2.82      5             0.58       1      11              1
CMQ-05 5.61 3.06      6             0.54       1      11              1
       Upper Quantile Skewness Kurtosis(-3) KS-Test
CMQ-02             11    -0.54        -0.48       0
CMQ-03             11     0.19        -1.08       0
CMQ-01             11    -0.46        -0.85       0
CMQ-04             11     0.38        -0.90       0
CMQ-05             11     0.08        -1.12       0


variables under investigation:  CMQ02 CMQ03 CMQ01 CMQ04 CMQ05 

Cronbachs Alpha: 0.9 
CFA summary and fit statistics: 
lavaan 0.6.17 ended normally after 26 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        10

  Number of observations                           598

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                                98.237      72.708
  Degrees of freedom                                 5           5
  P-value (Chi-square)                           0.000       0.000
  Scaling correction factor                                  1.351
    Yuan-Bentler correction (Mplus variant)                       

Model Test Baseline Model:

  Test statistic                              1873.954    1363.737
  Degrees of freedom                                10          10
  P-value                                        0.000       0.000
  Scaling correction factor                                  1.374

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.950       0.950
  Tucker-Lewis Index (TLI)                       0.900       0.900
                                                                  
  Robust Comparative Fit Index (CFI)                         0.951
  Robust Tucker-Lewis Index (TLI)                            0.902

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -6514.859   -6514.859
  Scaling correction factor                                  0.977
      for the MLR correction                                      
  Loglikelihood unrestricted model (H1)      -6465.741   -6465.741
  Scaling correction factor                                  1.102
      for the MLR correction                                      
                                                                  
  Akaike (AIC)                               13049.718   13049.718
  Bayesian (BIC)                             13093.654   13093.654
  Sample-size adjusted Bayesian (SABIC)      13061.907   13061.907

Root Mean Square Error of Approximation:

  RMSEA                                          0.177       0.150
  90 Percent confidence interval - lower         0.147       0.125
  90 Percent confidence interval - upper         0.208       0.178
  P-value H_0: RMSEA <= 0.050                    0.000       0.000
  P-value H_0: RMSEA >= 0.080                    1.000       1.000
                                                                  
  Robust RMSEA                                               0.175
  90 Percent confidence interval - lower                     0.141
  90 Percent confidence interval - upper                     0.212
  P-value H_0: Robust RMSEA <= 0.050                         0.000
  P-value H_0: Robust RMSEA >= 0.080                         1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.039       0.039

Parameter Estimates:

  Standard errors                             Sandwich
  Information bread                           Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  CMQ =~                                                                
    CMQ02             1.000                               1.842    0.715
    CMQ03             1.224    0.067   18.239    0.000    2.255    0.748
    CMQ01             1.302    0.053   24.551    0.000    2.399    0.814
    CMQ04             1.298    0.076   17.037    0.000    2.392    0.847
    CMQ05             1.464    0.076   19.182    0.000    2.697    0.882

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .CMQ02             3.241    0.219   14.811    0.000    3.241    0.489
   .CMQ03             4.000    0.314   12.733    0.000    4.000    0.440
   .CMQ01             2.925    0.220   13.315    0.000    2.925    0.337
   .CMQ04             2.245    0.234    9.602    0.000    2.245    0.282
   .CMQ05             2.067    0.220    9.405    0.000    2.067    0.221
    CMQ               3.393    0.340    9.988    0.000    1.000    1.000



CFA first 6 Modification Indices: 
     lhs op   rhs     mi    epc sepc.lv sepc.all sepc.nox
13 CMQ02 ~~ CMQ01 75.156  1.355   1.355    0.440    0.440
21 CMQ04 ~~ CMQ05 43.773  1.160   1.160    0.539    0.539
19 CMQ01 ~~ CMQ04 27.157 -0.843  -0.843   -0.329   -0.329
14 CMQ02 ~~ CMQ04 21.093 -0.668  -0.668   -0.248   -0.248
15 CMQ02 ~~ CMQ05 19.822 -0.689  -0.689   -0.266   -0.266
17 CMQ03 ~~ CMQ04  6.247  0.418   0.418    0.140    0.140

correlated residuals:

Remark: in the following correlated residuals are computed within the same code chunk

questionnaire_changedNames <- questionnaire

########################################
# test for correlated residuals between single items
########################################
regEx <- "^CMQ"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx); tmp_vars
[1] "CMQ02" "CMQ03" "CMQ01" "CMQ04" "CMQ05"
out_SyntaxCorrelatedResiduals <- getCorrelatedResidualsSyntax(vec_variables = tmp_vars,
                                                             labelLatentLabel = "CMQ", verbose = FALSE)

fit <- sem(out_SyntaxCorrelatedResiduals, questionnaire_changedNames, fixed.x=FALSE)
Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
    Could not compute standard errors! The information matrix could
    not be inverted. This may be a symptom that the model is not
    identified.
semPlot::semPaths(object = fit, what = "std", edge.label.cex = 0.5)

fit_regsem <- cv_regsem(model = fit, n.lambda = 40, jump=0.02,
                        pars_pen = 
                          paste0("v", 1:sum(parameterEstimates(fit)$label != "")),
                        type = "enet", verbose = FALSE)
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in pchisq(chisq, df): NaNs wurden erzeugt
Warning in sqrt(ncp/df): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = (N * df * 0.05^2)): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in pchisq(chisq, df): NaNs wurden erzeugt
Warning in sqrt(ncp/df): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = (N * df * 0.05^2)): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in pchisq(chisq, df): NaNs wurden erzeugt
Warning in sqrt(ncp/df): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = (N * df * 0.05^2)): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in pchisq(chisq, df): NaNs wurden erzeugt
Warning in sqrt(ncp/df): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = (N * df * 0.05^2)): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in pchisq(chisq, df): NaNs wurden erzeugt
Warning in sqrt(ncp/df): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = (N * df * 0.05^2)): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in pchisq(chisq, df): NaNs wurden erzeugt
Warning in sqrt(ncp/df): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = lambda): NaNs wurden erzeugt
Warning in pchisq(chisq, df = df, ncp = (N * df * 0.05^2)): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
Warning in qchisq(p = (1 - alpha), df = df): NaNs wurden erzeugt
summary(fit_regsem)
CV regsem Object
 Number of parameters regularized: 10
 Lambda ranging from 0 to 0.2
 Lowest Fit Lambda: 0.02
 Metric: BIC
 Number Converged: 11
plot(fit_regsem)

# head(fit_regsem$fits,10)
fit_regsem$final_pars
  CMQ -> CMQ03   CMQ -> CMQ01   CMQ -> CMQ04   CMQ -> CMQ05 CMQ02 ~~ CMQ03 
         1.223          1.291          1.347          1.479          0.172 
CMQ02 ~~ CMQ01 CMQ02 ~~ CMQ04 CMQ02 ~~ CMQ05 CMQ03 ~~ CMQ01 CMQ03 ~~ CMQ04 
         0.851         -0.443         -0.289         -0.001          0.035 
CMQ03 ~~ CMQ05 CMQ01 ~~ CMQ04 CMQ01 ~~ CMQ05 CMQ04 ~~ CMQ05 CMQ02 ~~ CMQ02 
        -0.139         -0.407          0.000          0.004          3.184 
CMQ03 ~~ CMQ03 CMQ01 ~~ CMQ01 CMQ04 ~~ CMQ04 CMQ05 ~~ CMQ05     CMQ ~~ CMQ 
         4.019          3.011          1.823          1.962          3.375 
########################################
# fit adjusted model
########################################
model_lavaan(vars = tmp_vars, labelLatentVar = "CMQ", verbose = FALSE)
[1] "CMQ =~ CMQ02 + CMQ03 + CMQ01 + CMQ04 + CMQ05"
mod_lavaan <- "
CMQ =~ CMQ02 + CMQ03 + CMQ01 + CMQ04 + CMQ05

#correlated residuls:
CMQ02 ~~ CMQ01
"

### MLR estimator
fit <- cfa(mod_lavaan, data = questionnaire_changedNames, estimator = "MLR")
# summary(fit, standardized = TRUE)
# semPlot::semPaths(object = fit, what = "std", edge.label.cex = 0.5)
round(fitmeasures(fit,
                  fit.measures =c("aic", "bic", "logl", "pvalue",
                                  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
                                  "srmr", "cfi", "tli")), digits = 3)
           aic            bic           logl         pvalue          rmsea 
     12979.123      13027.452      -6478.561          0.000          0.095 
rmsea.ci.lower rmsea.ci.upper           srmr            cfi            tli 
         0.062          0.132          0.021          0.988          0.971 
# head(modificationindices(fit, sort=T)) ## also possible to check out modification indices

### get factor scores
tmp_fc <- lavPredict(fit, method = "Bartlett")
questionnaire$fc_CMQ <- tmp_fc

GCB

no one factor solution -> theoretically driven split according to single sub-dimensions

table(str_remove(string = str_subset(string = colnames(questionnaire), pattern = "^GCB"), pattern = "^GCB-[:digit:]*"))

ci et gm mg pw 
 3  3  3  3  3 
CFA_GCB <- CFAstats(dataset = questionnaire,
                           regularExp = "^GCB",
                           labelLatent = "GCB",
                           showPlots = TRUE, computeEFA = FALSE, computeCFA = TRUE, computeCFAMplus = FALSE)
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein



descriptive statistics: 
         Mean   SD Median CoeffofVariation Minimum Maximun Lower Quantile
GCB-03gm 2.63 1.31      2             0.50       1       5              1
GCB-03mg 2.40 1.30      2             0.54       1       5              1
GCB-03pw 2.41 1.27      2             0.53       1       5              1
GCB-01gm 2.50 1.33      2             0.53       1       5              1
GCB-02mg 2.39 1.31      2             0.55       1       5              1
GCB-01mg 2.39 1.29      2             0.54       1       5              1
GCB-03et 2.04 1.17      2             0.57       1       5              1
GCB-03ci 3.27 1.30      4             0.40       1       5              1
GCB-02ci 3.00 1.22      3             0.41       1       5              1
GCB-01pw 2.31 1.26      2             0.55       1       5              1
GCB-01ci 2.43 1.26      2             0.52       1       5              1
GCB-02gm 2.36 1.30      2             0.55       1       5              1
GCB-02pw 1.96 1.19      1             0.61       1       5              1
GCB-01et 1.91 1.18      1             0.61       1       5              1
GCB-02et 2.35 1.29      2             0.55       1       5              1
         Upper Quantile Skewness Kurtosis(-3) KS-Test
GCB-03gm              5     0.24        -1.21       0
GCB-03mg              5     0.40        -1.21       0
GCB-03pw              5     0.41        -1.12       0
GCB-01gm              5     0.34        -1.19       0
GCB-02mg              5     0.47        -1.07       0
GCB-01mg              5     0.47        -1.04       0
GCB-03et              5     0.85        -0.38       0
GCB-03ci              5    -0.40        -1.04       0
GCB-02ci              5    -0.15        -1.04       0
GCB-01pw              5     0.63        -0.80       0
GCB-01ci              5     0.48        -0.92       0
GCB-02gm              5     0.52        -0.99       0
GCB-02pw              5     1.02        -0.08       0
GCB-01et              5     1.14         0.19       0
GCB-02et              5     0.57        -0.86       0


variables under investigation:  GCB03gm GCB03mg GCB03pw GCB01gm GCB02mg GCB01mg GCB03et GCB03ci GCB02ci GCB01pw GCB01ci GCB02gm GCB02pw GCB01et GCB02et 

Cronbachs Alpha: 0.96 
CFA summary and fit statistics: 
lavaan 0.6.17 ended normally after 23 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        30

  Number of observations                           598

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                               929.647     675.568
  Degrees of freedom                                90          90
  P-value (Chi-square)                           0.000       0.000
  Scaling correction factor                                  1.376
    Yuan-Bentler correction (Mplus variant)                       

Model Test Baseline Model:

  Test statistic                              7415.626    5297.279
  Degrees of freedom                               105         105
  P-value                                        0.000       0.000
  Scaling correction factor                                  1.400

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.885       0.887
  Tucker-Lewis Index (TLI)                       0.866       0.868
                                                                  
  Robust Comparative Fit Index (CFI)                         0.889
  Robust Tucker-Lewis Index (TLI)                            0.871

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -11582.788  -11582.788
  Scaling correction factor                                  1.059
      for the MLR correction                                      
  Loglikelihood unrestricted model (H1)     -11117.964  -11117.964
  Scaling correction factor                                  1.297
      for the MLR correction                                      
                                                                  
  Akaike (AIC)                               23225.576   23225.576
  Bayesian (BIC)                             23357.383   23357.383
  Sample-size adjusted Bayesian (SABIC)      23262.142   23262.142

Root Mean Square Error of Approximation:

  RMSEA                                          0.125       0.104
  90 Percent confidence interval - lower         0.118       0.098
  90 Percent confidence interval - upper         0.132       0.111
  P-value H_0: RMSEA <= 0.050                    0.000       0.000
  P-value H_0: RMSEA >= 0.080                    1.000       1.000
                                                                  
  Robust RMSEA                                               0.122
  90 Percent confidence interval - lower                     0.114
  90 Percent confidence interval - upper                     0.131
  P-value H_0: Robust RMSEA <= 0.050                         0.000
  P-value H_0: Robust RMSEA >= 0.080                         1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.053       0.053

Parameter Estimates:

  Standard errors                             Sandwich
  Information bread                           Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  GCB =~                                                                
    GCB03gm           1.000                               1.081    0.824
    GCB03mg           1.047    0.035   29.812    0.000    1.132    0.870
    GCB03pw           0.923    0.033   27.672    0.000    0.998    0.783
    GCB01gm           0.964    0.031   30.966    0.000    1.042    0.782
    GCB02mg           1.023    0.037   27.467    0.000    1.106    0.845
    GCB01mg           1.024    0.034   29.895    0.000    1.107    0.857
    GCB03et           0.797    0.039   20.658    0.000    0.861    0.735
    GCB03ci           0.862    0.035   24.899    0.000    0.932    0.719
    GCB02ci           0.706    0.037   19.155    0.000    0.763    0.626
    GCB01pw           0.947    0.036   26.618    0.000    1.024    0.811
    GCB01ci           0.913    0.036   25.416    0.000    0.987    0.786
    GCB02gm           0.946    0.031   30.203    0.000    1.023    0.787
    GCB02pw           0.815    0.039   21.135    0.000    0.881    0.743
    GCB01et           0.767    0.040   18.983    0.000    0.829    0.706
    GCB02et           0.784    0.042   18.719    0.000    0.848    0.659

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .GCB03gm           0.550    0.040   13.811    0.000    0.550    0.320
   .GCB03mg           0.411    0.036   11.556    0.000    0.411    0.243
   .GCB03pw           0.627    0.049   12.852    0.000    0.627    0.387
   .GCB01gm           0.690    0.050   13.928    0.000    0.690    0.389
   .GCB02mg           0.489    0.037   13.250    0.000    0.489    0.285
   .GCB01mg           0.444    0.037   12.132    0.000    0.444    0.266
   .GCB03et           0.631    0.051   12.417    0.000    0.631    0.460
   .GCB03ci           0.811    0.045   17.913    0.000    0.811    0.483
   .GCB02ci           0.903    0.047   19.247    0.000    0.903    0.608
   .GCB01pw           0.546    0.039   14.047    0.000    0.546    0.343
   .GCB01ci           0.603    0.045   13.292    0.000    0.603    0.383
   .GCB02gm           0.643    0.049   12.994    0.000    0.643    0.381
   .GCB02pw           0.629    0.046   13.685    0.000    0.629    0.448
   .GCB01et           0.691    0.053   12.987    0.000    0.691    0.501
   .GCB02et           0.935    0.060   15.638    0.000    0.935    0.565
    GCB               1.168    0.073   16.035    0.000    1.000    1.000



CFA first 6 Modification Indices: 
        lhs op     rhs      mi   epc sepc.lv sepc.all sepc.nox
136 GCB01et ~~ GCB02et 161.631 0.434   0.434    0.540    0.540
108 GCB03et ~~ GCB02et 108.108 0.341   0.341    0.444    0.444
78  GCB01gm ~~ GCB02gm 100.465 0.294   0.294    0.442    0.442
107 GCB03et ~~ GCB01et  70.658 0.238   0.238    0.361    0.361
109 GCB03ci ~~ GCB02ci  68.534 0.301   0.301    0.351    0.351
49  GCB03mg ~~ GCB01mg  67.317 0.167   0.167    0.392    0.392
# head(modificationindices(CFA_GCB[[3]], sort=T)) ## also possible to check out modification indices

########################################
# CFA first order
########################################
### get syntax
regEx <- "^GCB.*ci$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "GCBci", verbose = FALSE)
[1] "GCBci =~ GCB03ci + GCB02ci + GCB01ci"
regEx <- "^GCB.*et$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "GCBet", verbose = FALSE)
[1] "GCBet =~ GCB03et + GCB01et + GCB02et"
regEx <- "^GCB.*gm$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "GCBgm", verbose = FALSE)
[1] "GCBgm =~ GCB03gm + GCB01gm + GCB02gm"
regEx <- "^GCB.*mg$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "GCBmg", verbose = FALSE)
[1] "GCBmg =~ GCB03mg + GCB02mg + GCB01mg"
regEx <- "^GCB.*pw$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CMQpw", verbose = FALSE)
[1] "CMQpw =~ GCB03pw + GCB01pw + GCB02pw"
### fit model
mod_lavaan <- "
GCBci =~ GCB03ci + GCB02ci + GCB01ci
GCBet =~ GCB03et + GCB01et + GCB02et
GCBgm =~ GCB03gm + GCB01gm + GCB02gm
GCBmg =~ GCB03mg + GCB02mg + GCB01mg
GCBpw =~ GCB03pw + GCB01pw + GCB02pw

# no correlated residuls:
"

### MLR estimator
fit <- cfa(mod_lavaan, data = questionnaire_changedNames, estimator = "MLR")
# summary(fit, standardized = TRUE)
semPlot::semPaths(object = fit, what = "std", edge.label.cex = 0.5)

round(fitmeasures(fit,
                  fit.measures =c("aic", "bic", "logl", "pvalue",
                                  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
                                  "srmr", "cfi", "tli")), digits = 3)
           aic            bic           logl         pvalue          rmsea 
     22635.014      22810.758     -11277.507          0.000          0.071 
rmsea.ci.lower rmsea.ci.upper           srmr            cfi            tli 
         0.063          0.079          0.032          0.967          0.957 
### get factor scores
tmp_fc <- lavPredict(fit, method = "Bartlett")
questionnaire$fc_GCBci <- tmp_fc[,1]
questionnaire$fc_GCBet <- tmp_fc[,2]
questionnaire$fc_GCBgm <- tmp_fc[,3]
questionnaire$fc_GCBmg <- tmp_fc[,4]
questionnaire$fc_GCBpw <- tmp_fc[,5]

CRKQ

no one factor solution -> theoretically driven split according to single sub-dimensions

table(str_remove(string = str_subset(string = colnames(questionnaire), pattern = "^CRKQ"), pattern = "^CRKQ-[:digit:]*"))

 ccc  fop fopr    s 
   4    2    3    7 
CFA_CRKQ <- CFAstats(dataset = questionnaire,
                           regularExp = "^CRKQ",
                           labelLatent = "CRKQ",
                           showPlots = TRUE, computeEFA = FALSE, computeCFA = TRUE, computeCFAMplus = FALSE)
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein



descriptive statistics: 
            Mean   SD Median CoeffofVariation Minimum Maximun Lower Quantile
CRKQ-05s    2.56 1.53      2             0.60       1       6              1
CRKQ-04s    3.44 1.56      3             0.45       1       6              1
CRKQ-03s    2.46 1.54      2             0.63       1       6              1
CRKQ-03ccc  4.52 1.49      5             0.33       1       6              1
CRKQ-05fopr 3.17 1.31      3             0.41       1       6              1
CRKQ-02fop  3.39 1.42      3             0.42       1       6              1
CRKQ-02ccc  5.04 1.26      5             0.25       1       6              1
CRKQ-02s    2.14 1.56      1             0.73       1       6              1
CRKQ-03fopr 3.83 1.31      4             0.34       1       6              1
CRKQ-01s    2.53 1.63      2             0.65       1       6              1
CRKQ-07s    2.57 1.54      2             0.60       1       6              1
CRKQ-04fopr 3.18 1.35      3             0.43       1       6              1
CRKQ-06s    2.53 1.49      2             0.59       1       6              1
CRKQ-04ccc  4.98 1.33      5             0.27       1       6              1
CRKQ-01ccc  4.88 1.27      5             0.26       1       6              1
CRKQ-01fop  3.25 1.45      3             0.45       1       6              1
            Upper Quantile Skewness Kurtosis(-3) KS-Test
CRKQ-05s                 6     0.83        -0.34       0
CRKQ-04s                 6     0.12        -1.01       0
CRKQ-03s                 6     0.84        -0.41       0
CRKQ-03ccc               6    -0.93        -0.09       0
CRKQ-05fopr              6     0.42        -0.25       0
CRKQ-02fop               6     0.03        -0.78       0
CRKQ-02ccc               6    -1.54         1.98       0
CRKQ-02s                 6     1.22         0.24       0
CRKQ-03fopr              6     0.05        -0.72       0
CRKQ-01s                 6     0.87        -0.47       0
CRKQ-07s                 6     0.74        -0.58       0
CRKQ-04fopr              6     0.46        -0.36       0
CRKQ-06s                 6     0.79        -0.37       0
CRKQ-04ccc               6    -1.39         1.20       0
CRKQ-01ccc               6    -1.28         1.21       0
CRKQ-01fop               6     0.17        -0.86       0


variables under investigation:  CRKQ05s CRKQ04s CRKQ03s CRKQ03ccc CRKQ05fopr CRKQ02fop CRKQ02ccc CRKQ02s CRKQ03fopr CRKQ01s CRKQ07s CRKQ04fopr CRKQ06s CRKQ04ccc CRKQ01ccc CRKQ01fop 
Warning in psych::alpha(tmp_correlation): Some items were negatively correlated with the first principal component and probably 
should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option

Some items ( CRKQ-03ccc CRKQ-02ccc CRKQ-04ccc CRKQ-01ccc ) were negatively correlated with the first principal component and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' optionCronbachs Alpha: 0.57 
CFA summary and fit statistics: 
lavaan 0.6.17 ended normally after 34 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        32

  Number of observations                           598

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                              1451.982    1106.836
  Degrees of freedom                               104         104
  P-value (Chi-square)                           0.000       0.000
  Scaling correction factor                                  1.312
    Yuan-Bentler correction (Mplus variant)                       

Model Test Baseline Model:

  Test statistic                              7705.448    5489.489
  Degrees of freedom                               120         120
  P-value                                        0.000       0.000
  Scaling correction factor                                  1.404

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.822       0.813
  Tucker-Lewis Index (TLI)                       0.795       0.785
                                                                  
  Robust Comparative Fit Index (CFI)                         0.825
  Robust Tucker-Lewis Index (TLI)                            0.799

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -13905.142  -13905.142
  Scaling correction factor                                  1.475
      for the MLR correction                                      
  Loglikelihood unrestricted model (H1)     -13179.150  -13179.150
  Scaling correction factor                                  1.350
      for the MLR correction                                      
                                                                  
  Akaike (AIC)                               27874.283   27874.283
  Bayesian (BIC)                             28014.878   28014.878
  Sample-size adjusted Bayesian (SABIC)      27913.287   27913.287

Root Mean Square Error of Approximation:

  RMSEA                                          0.147       0.127
  90 Percent confidence interval - lower         0.141       0.121
  90 Percent confidence interval - upper         0.154       0.133
  P-value H_0: RMSEA <= 0.050                    0.000       0.000
  P-value H_0: RMSEA >= 0.080                    1.000       1.000
                                                                  
  Robust RMSEA                                               0.145
  90 Percent confidence interval - lower                     0.138
  90 Percent confidence interval - upper                     0.153
  P-value H_0: Robust RMSEA <= 0.050                         0.000
  P-value H_0: Robust RMSEA >= 0.080                         1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.096       0.096

Parameter Estimates:

  Standard errors                             Sandwich
  Information bread                           Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  CRKQ =~                                                               
    CRKQ05s           1.000                               1.276    0.835
    CRKQ04s           0.860    0.038   22.582    0.000    1.098    0.707
    CRKQ03s           0.915    0.038   23.951    0.000    1.167    0.756
    CRKQ03ccc        -1.020    0.032  -32.212    0.000   -1.301   -0.875
    CRKQ05fopr        0.581    0.041   14.098    0.000    0.742    0.565
    CRKQ02fop         0.349    0.051    6.836    0.000    0.445    0.313
    CRKQ02ccc        -0.862    0.040  -21.455    0.000   -1.100   -0.871
    CRKQ02s           1.010    0.039   25.746    0.000    1.288    0.825
    CRKQ03fopr        0.454    0.038   11.899    0.000    0.579    0.444
    CRKQ01s           1.143    0.034   33.304    0.000    1.459    0.893
    CRKQ07s           0.891    0.046   19.577    0.000    1.137    0.738
    CRKQ04fopr        0.586    0.042   14.083    0.000    0.748    0.553
    CRKQ06s           0.850    0.046   18.418    0.000    1.084    0.730
    CRKQ04ccc        -0.905    0.037  -24.150    0.000   -1.155   -0.871
    CRKQ01ccc        -0.827    0.039  -21.066    0.000   -1.055   -0.831
    CRKQ01fop         0.482    0.051    9.513    0.000    0.614    0.423

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .CRKQ05s           0.705    0.068   10.343    0.000    0.705    0.302
   .CRKQ04s           1.209    0.083   14.534    0.000    1.209    0.501
   .CRKQ03s           1.020    0.087   11.730    0.000    1.020    0.428
   .CRKQ03ccc         0.517    0.066    7.817    0.000    0.517    0.234
   .CRKQ05fopr        1.172    0.073   16.066    0.000    1.172    0.681
   .CRKQ02fop         1.823    0.094   19.440    0.000    1.823    0.902
   .CRKQ02ccc         0.386    0.037   10.575    0.000    0.386    0.242
   .CRKQ02s           0.776    0.075   10.377    0.000    0.776    0.319
   .CRKQ03fopr        1.368    0.073   18.781    0.000    1.368    0.803
   .CRKQ01s           0.539    0.046   11.612    0.000    0.539    0.202
   .CRKQ07s           1.080    0.100   10.833    0.000    1.080    0.455
   .CRKQ04fopr        1.268    0.080   15.826    0.000    1.268    0.694
   .CRKQ06s           1.029    0.104    9.913    0.000    1.029    0.467
   .CRKQ04ccc         0.426    0.051    8.310    0.000    0.426    0.242
   .CRKQ01ccc         0.499    0.054    9.216    0.000    0.499    0.309
   .CRKQ01fop         1.732    0.094   18.487    0.000    1.732    0.821
    CRKQ              1.628    0.132   12.346    0.000    1.000    1.000



CFA first 6 Modification Indices: 
           lhs op        rhs      mi   epc sepc.lv sepc.all sepc.nox
94  CRKQ05fopr ~~ CRKQ04fopr 200.987 0.720   0.720    0.590    0.590
108  CRKQ02fop ~~  CRKQ01fop 184.143 0.992   0.992    0.558    0.558
104  CRKQ02fop ~~ CRKQ04fopr 116.859 0.679   0.679    0.447    0.447
147 CRKQ04fopr ~~  CRKQ01fop  94.674 0.597   0.597    0.403    0.403
128 CRKQ03fopr ~~ CRKQ04fopr  93.263 0.527   0.527    0.400    0.400
88  CRKQ05fopr ~~  CRKQ02fop  81.129 0.544   0.544    0.372    0.372
# head(modificationindices(CFA_GCB[[3]], sort=T)) ## also possible to check out modification indices




########################################
# CFA first order
########################################
### get syntax
regEx <- "^CRKQ.*ccc$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CRKQccc", verbose = FALSE)
[1] "CRKQccc =~ CRKQ03ccc + CRKQ02ccc + CRKQ04ccc + CRKQ01ccc"
regEx <- "^CRKQ.*fop$|^CRKQ.*fopr$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CRKQfop", verbose = FALSE)
[1] "CRKQfop =~ CRKQ05fopr + CRKQ02fop + CRKQ03fopr + CRKQ04fopr + CRKQ01fop"
regEx <- "^CRKQ.*s$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CRKQs", verbose = FALSE)
[1] "CRKQs =~ CRKQ05s + CRKQ04s + CRKQ03s + CRKQ02s + CRKQ01s + CRKQ07s + CRKQ06s"
### fit model
mod_lavaan <- "
CRKQccc =~ CRKQ03ccc + CRKQ02ccc + CRKQ04ccc + CRKQ01ccc
CRKQfop =~ CRKQ05fopr + CRKQ02fop + CRKQ03fopr + CRKQ04fopr + CRKQ01fop
CRKQs =~ CRKQ05s + CRKQ04s + CRKQ03s + CRKQ02s + CRKQ01s + CRKQ07s + CRKQ06s

# correlated residuls:
CRKQ02fop ~~ CRKQ01fop
"

### MLR estimator
fit <- cfa(mod_lavaan, data = questionnaire_changedNames, estimator = "MLR")
# summary(fit, standardized = TRUE)
semPlot::semPaths(object = fit, what = "std", edge.label.cex = 0.5)

round(fitmeasures(fit,
                  fit.measures =c("aic", "bic", "logl", "pvalue",
                                  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
                                  "srmr", "cfi", "tli")), digits = 3)
           aic            bic           logl         pvalue          rmsea 
     26894.349      27052.518     -13411.174          0.000          0.078 
rmsea.ci.lower rmsea.ci.upper           srmr            cfi            tli 
         0.071          0.085          0.044          0.952          0.942 
# head(modificationindices(fit, sort=T)) ## also possible to check out modification indices

### get factor scores
tmp_fc <- lavPredict(fit, method = "Bartlett")
questionnaire$fc_CRKQccc <- tmp_fc[,1]
questionnaire$fc_CRKQfop <- tmp_fc[,2]
questionnaire$fc_CRKQs <- tmp_fc[,3]

CCSQ

no one factor solution -> theoretically driven split according to single sub-dimensions (only slight misfit)

a first order factor model indicates that factors are strongly correlated - extracting only factor scores for an overall factor (see below)

table(str_remove(string = str_subset(string = colnames(questionnaire), pattern = "^CCSQ"), pattern = "^CCSQ-[:digit:]*"))

 as asr  is isr  rs  ts tsr 
  2   1   1   2   3   2   1 
CFA_CRKQ <- CFAstats(dataset = questionnaire,
                           regularExp = "^CCSQ",
                           labelLatent = "CCSQ",
                           showPlots = TRUE, computeEFA = FALSE, computeCFA = TRUE, computeCFAMplus = FALSE)
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein
Warning in ks.test.default(x = x, y = "pnorm", mean(x), sd(x)): für den
Komogorov-Smirnov-Test sollten keine Bindungen vorhanden sein



descriptive statistics: 
           Mean   SD Median CoeffofVariation Minimum Maximun Lower Quantile
CCSQ-03ts  2.25 1.73      1             0.77       1       7              1
CCSQ-01isr 1.25 1.69      1             1.35       0       6              0
CCSQ-02asr 1.52 1.64      1             1.08       0       6              0
CCSQ-01as  2.74 1.83      2             0.67       1       7              1
CCSQ-03rs  2.47 1.76      2             0.71       1       7              1
CCSQ-03as  2.48 1.82      2             0.73       1       7              1
CCSQ-02rs  2.02 1.48      1             0.73       1       7              1
CCSQ-03isr 1.56 1.72      1             1.10       0       6              0
CCSQ-02is  2.65 1.88      2             0.71       1       7              1
CCSQ-01rs  2.44 1.55      2             0.64       1       7              1
CCSQ-01ts  3.20 1.93      3             0.60       1       7              1
CCSQ-02tsr 1.68 1.68      1             1.00       0       6              0
           Upper Quantile Skewness Kurtosis(-3) KS-Test
CCSQ-03ts               7     1.37         0.74       0
CCSQ-01isr              6     1.55         1.50       0
CCSQ-02asr              6     1.24         0.85       0
CCSQ-01as               7     0.91        -0.31       0
CCSQ-03rs               7     1.21         0.42       0
CCSQ-03as               7     1.16         0.15       0
CCSQ-02rs               7     1.79         2.73       0
CCSQ-03isr              6     1.23         0.65       0
CCSQ-02is               7     1.04        -0.13       0
CCSQ-01rs               7     1.24         0.92       0
CCSQ-01ts               7     0.53        -0.96       0
CCSQ-02tsr              6     1.08         0.34       0


variables under investigation:  CCSQ03ts CCSQ01isr CCSQ02asr CCSQ01as CCSQ03rs CCSQ03as CCSQ02rs CCSQ03isr CCSQ02is CCSQ01rs CCSQ01ts CCSQ02tsr 

Cronbachs Alpha: 0.95 
CFA summary and fit statistics: 
lavaan 0.6.17 ended normally after 25 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        24

  Number of observations                           598

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                               620.278     402.726
  Degrees of freedom                                54          54
  P-value (Chi-square)                           0.000       0.000
  Scaling correction factor                                  1.540
    Yuan-Bentler correction (Mplus variant)                       

Model Test Baseline Model:

  Test statistic                              6544.991    3645.157
  Degrees of freedom                                66          66
  P-value                                        0.000       0.000
  Scaling correction factor                                  1.796

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.913       0.903
  Tucker-Lewis Index (TLI)                       0.893       0.881
                                                                  
  Robust Comparative Fit Index (CFI)                         0.916
  Robust Tucker-Lewis Index (TLI)                            0.898

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -11110.597  -11110.597
  Scaling correction factor                                  2.116
      for the MLR correction                                      
  Loglikelihood unrestricted model (H1)     -10800.457  -10800.457
  Scaling correction factor                                  1.717
      for the MLR correction                                      
                                                                  
  Akaike (AIC)                               22269.193   22269.193
  Bayesian (BIC)                             22374.639   22374.639
  Sample-size adjusted Bayesian (SABIC)      22298.446   22298.446

Root Mean Square Error of Approximation:

  RMSEA                                          0.132       0.104
  90 Percent confidence interval - lower         0.123       0.096
  90 Percent confidence interval - upper         0.142       0.112
  P-value H_0: RMSEA <= 0.050                    0.000       0.000
  P-value H_0: RMSEA >= 0.080                    1.000       1.000
                                                                  
  Robust RMSEA                                               0.129
  90 Percent confidence interval - lower                     0.117
  90 Percent confidence interval - upper                     0.141
  P-value H_0: Robust RMSEA <= 0.050                         0.000
  P-value H_0: Robust RMSEA >= 0.080                         1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.047       0.047

Parameter Estimates:

  Standard errors                             Sandwich
  Information bread                           Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  CCSQ =~                                                               
    CCSQ03ts          1.000                               1.012    0.586
    CCSQ01isr         1.517    0.138   10.955    0.000    1.535    0.911
    CCSQ02asr         1.398    0.131   10.636    0.000    1.415    0.862
    CCSQ01as          1.543    0.127   12.117    0.000    1.561    0.856
    CCSQ03rs          1.495    0.130   11.538    0.000    1.513    0.860
    CCSQ03as          1.274    0.089   14.239    0.000    1.289    0.707
    CCSQ02rs          1.138    0.116    9.800    0.000    1.152    0.780
    CCSQ03isr         1.406    0.136   10.376    0.000    1.423    0.827
    CCSQ02is          1.622    0.128   12.684    0.000    1.641    0.875
    CCSQ01rs          1.075    0.108    9.912    0.000    1.088    0.702
    CCSQ01ts          1.372    0.114   12.056    0.000    1.389    0.720
    CCSQ02tsr         1.435    0.128   11.242    0.000    1.452    0.864

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .CCSQ03ts          1.961    0.215    9.112    0.000    1.961    0.657
   .CCSQ01isr         0.483    0.062    7.751    0.000    0.483    0.170
   .CCSQ02asr         0.692    0.070    9.874    0.000    0.692    0.257
   .CCSQ01as          0.892    0.102    8.720    0.000    0.892    0.268
   .CCSQ03rs          0.803    0.092    8.762    0.000    0.803    0.260
   .CCSQ03as          1.661    0.195    8.497    0.000    1.661    0.500
   .CCSQ02rs          0.854    0.097    8.838    0.000    0.854    0.391
   .CCSQ03isr         0.934    0.118    7.901    0.000    0.934    0.316
   .CCSQ02is          0.823    0.093    8.842    0.000    0.823    0.234
   .CCSQ01rs          1.217    0.126    9.639    0.000    1.217    0.507
   .CCSQ01ts          1.790    0.139   12.918    0.000    1.790    0.481
   .CCSQ02tsr         0.718    0.078    9.159    0.000    0.718    0.254
    CCSQ              1.024    0.168    6.102    0.000    1.000    1.000



CFA first 6 Modification Indices: 
         lhs op       rhs      mi    epc sepc.lv sepc.all sepc.nox
42 CCSQ01isr ~~ CCSQ03isr 115.873  0.352   0.352    0.524    0.524
30  CCSQ03ts ~~  CCSQ03as  99.463  0.758   0.758    0.420    0.420
79  CCSQ02rs ~~  CCSQ01rs  54.973  0.326   0.326    0.319    0.319
46 CCSQ01isr ~~ CCSQ02tsr  52.356  0.214   0.214    0.362    0.362
52 CCSQ02asr ~~  CCSQ02is  42.228 -0.233  -0.233   -0.309   -0.309
85 CCSQ03isr ~~ CCSQ02tsr  30.919  0.210   0.210    0.256    0.256
head(modificationindices(CFA_CRKQ[[3]], sort=T)) ## also possible to check out modification indices
         lhs op       rhs      mi    epc sepc.lv sepc.all sepc.nox
42 CCSQ01isr ~~ CCSQ03isr 115.873  0.352   0.352    0.524    0.524
30  CCSQ03ts ~~  CCSQ03as  99.463  0.758   0.758    0.420    0.420
79  CCSQ02rs ~~  CCSQ01rs  54.973  0.326   0.326    0.319    0.319
46 CCSQ01isr ~~ CCSQ02tsr  52.356  0.214   0.214    0.362    0.362
52 CCSQ02asr ~~  CCSQ02is  42.228 -0.233  -0.233   -0.309   -0.309
85 CCSQ03isr ~~ CCSQ02tsr  30.919  0.210   0.210    0.256    0.256
########################################
# CFA first order
########################################
### get syntax
regEx <- "^CCSQ.*as$|^CCSQ.*asr$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CCSQas", verbose = FALSE)
[1] "CCSQas =~ CCSQ02asr + CCSQ01as + CCSQ03as"
regEx <- "^CCSQ.*is$|^CCSQ.*isr$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CCSQis", verbose = FALSE)
[1] "CCSQis =~ CCSQ01isr + CCSQ03isr + CCSQ02is"
regEx <- "^CCSQ.*rs"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CCSQrs", verbose = FALSE)
[1] "CCSQrs =~ CCSQ03rs + CCSQ02rs + CCSQ01rs"
regEx <- "^CCSQ.*ts$|^CCSQ.*tsr$"
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
names(questionnaire_changedNames)[names(questionnaire_changedNames) %in% tmp_vars] <- str_remove_all(string = tmp_vars, pattern = "-")
tmp_vars <- str_subset(string = colnames(questionnaire_changedNames), pattern = regEx)
model_lavaan(vars = tmp_vars, labelLatentVar = "CCSQts", verbose = FALSE)
[1] "CCSQts =~ CCSQ03ts + CCSQ01ts + CCSQ02tsr"
### fit model
mod_lavaan <- "
CCSQas =~ CCSQ02asr + CCSQ01as + CCSQ03as
CCSQis =~ CCSQ01isr + CCSQ03isr + CCSQ02is
CCSQrs =~ CCSQ03rs + CCSQ02rs + CCSQ01rs
CCSQts =~ CCSQ03ts + CCSQ01ts + CCSQ02tsr


# no correlated residuls:
"

### MLR estimator
fit <- cfa(mod_lavaan, data = questionnaire_changedNames, estimator = "MLR")
Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
                is not positive definite;
                use lavInspect(fit, "cov.lv") to investigate.
# summary(fit, standardized = TRUE)
semPlot::semPaths(object = fit, what = "std", edge.label.cex = 0.5)

round(fitmeasures(fit,
                  fit.measures =c("aic", "bic", "logl", "pvalue",
                                  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
                                  "srmr", "cfi", "tli")), digits = 3)
           aic            bic           logl         pvalue          rmsea 
     22162.337      22294.145     -11051.169          0.000          0.126 
rmsea.ci.lower rmsea.ci.upper           srmr            cfi            tli 
         0.116          0.136          0.044          0.930          0.904 
# head(modificationindices(fit, sort=T)) ## also possible to check out modification indices

########################################
# CFA one factor solution
########################################
### fit model
mod_lavaan <- "
CCSQ =~ CCSQ02asr + CCSQ01as + CCSQ03as + CCSQ01isr + CCSQ03isr + CCSQ02is + CCSQ03rs + CCSQ02rs + CCSQ01rs + CCSQ03ts + CCSQ01ts + CCSQ02tsr


#correlated residuls:
CCSQ01isr ~~ CCSQ03isr
CCSQ03as ~~  CCSQ03ts
CCSQ02rs ~~  CCSQ01rs
"

### MLR estimator
fit <- cfa(mod_lavaan, data = questionnaire_changedNames, estimator = "MLR")
semPlot::semPaths(object = fit, what = "std", edge.label.cex = 0.5)

round(fitmeasures(fit,
                  fit.measures =c("aic", "bic", "logl", "pvalue",
                                  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
                                  "srmr", "cfi", "tli")), digits = 3)
           aic            bic           logl         pvalue          rmsea 
     22010.634      22129.261     -10978.317          0.000          0.100 
rmsea.ci.lower rmsea.ci.upper           srmr            cfi            tli 
         0.090          0.110          0.033          0.953          0.939 
# head(modificationindices(fit, sort=T)) ## also possible to check out modification indices


### get factor scores
tmp_fc <- lavPredict(fit, method = "Bartlett")
questionnaire$fc_CCSQ <- tmp_fc[,1]

compare factor scores and mean scores

normally they are highly correlated if items are relatively similar in their loading coefficients

plot(questionnaire$fc_CMQ, questionnaire$mean_CMQ)

cor(questionnaire$fc_CMQ, questionnaire$mean_CMQ)
         [,1]
CMQ 0.9822706

add group based on LCA

Remark: LCA = latent class analysis

Compute an LCA over computed factor scores of single scales, except “fc_CRKQccc” and “fc_CRKQfop”

psych::cor.plot(r = cor(questionnaire[, str_detect(string = colnames(questionnaire),
                                                   pattern = "^fc")], 
                                                   use = "pairwise.complete.obs"),
                                                   upper = FALSE, xlas = 2, main = "factor scores of scales")

run LCA

if(runMplus){
  LCArunsDef = 6 # define !
  setwd("outputs/LCA")

  tmp_vars <- str_subset(string = colnames(questionnaire), pattern = "fc_")
  tmp_vars <- tmp_vars[!tmp_vars %in% c("fc_CRKQccc", "fc_CRKQfop")]
  
  LCA_dat <- questionnaire[, c("PROLIFIC_PID", tmp_vars)]
  LCA_dat <- as.data.frame(as.matrix(LCA_dat))
  
  LCA_dat[,2:9] <- lapply(LCA_dat[,2:9],as.numeric)
  # prepareMplusData(df = LCA_dat, filename = "test.dat")


  l = 1
  list_LCA <- list()
  for(i in 2:LCArunsDef){
    print(i)

    numClasses <- i

    LCA_conspiracy  <- mplusObject(

      TITLE = paste0("Latent Class Analysis", " c=", numClasses),

      VARIABLE =paste0("
  usevariables = fc_CMQ fc_GCBci fc_GCBet fc_GCBgm fc_GCBmg fc_GCBpw fc_CRKQs fc_CCSQ;

  classes      = c(", numClasses, ")"),

  ANALYSIS =
    "
    Type=mixture; ! LCA analysis
    STARTS= 500 100;
    !LRTstarts=0 0 300 20;
  ",

  PLOT =
    "
    type = plot3;
    series is fc_CMQ (1) fc_GCBci (2) fc_GCBet (3) fc_GCBgm (4) 
              fc_GCBmg (5) fc_GCBpw (6) fc_CRKQs (7) fc_CCSQ (8);
  ",

  SAVEDATA = paste0("file = lca_", numClasses, ".txt ;
    save = cprob;
    format = free;
  "),

  OUTPUT = "tech11 tech14;", rdata = LCA_dat)

    list_LCA[[l]] <- mplusModeler(LCA_conspiracy,
                                         modelout = paste0("lca_", numClasses, ".inp"),
                                         run = 1L)

    l = l + 1
  }

  setwd("..")
  # comment out to avoid overwriting LCA outputs
  # saveRDS(list_LCA, file="list_LCA.RData")

  saveRDS(list_LCA, file="outputs/list_LCA.RData")

}else{
  list_LCA <- readRDS("outputs/list_LCA.RData" )
}

fit statistics of LCA

getLCAfitstatistics(listMplusOutput = list_LCA)

  Classes        LL      AIC      BIC    SABIC     CAIC BLRTp VLMRLRTp Entropy
1       2 -6580.711 13211.42 13321.26 13241.89 13346.26     0   0.0000   0.943
2       3 -6265.263 12598.53 12747.91 12639.97 12781.91     0   0.0109   0.900
3       4 -6069.591 12225.18 12414.11 12277.59 12457.11     0   0.2444   0.898
4       5 -5890.506 11885.01 12113.48 11948.39 12165.48     0   0.0988   0.912
5       6 -5754.694 11631.39 11899.40 11705.74 11960.40     0   0.0078   0.927

add to questionnaire

questionnaire$classes_conspiracy <- read.table(file = "outputs/lca_3.txt")$V12


### some descriptives
table(questionnaire$classes_conspiracy, questionnaire$country)
   
    Germany USA
  1     141  83
  2      98  91
  3      60 125
table(questionnaire$classes_conspiracy, questionnaire$politicalParty)
   
    Democrat Republican
  1       57         26
  2       50         41
  3       41         84
ggbetweenstats(
  data = questionnaire,
  x = classes_conspiracy,
  y = mean_CMQ
)
Warning in min(x): kein nicht-fehlendes Argument für min; gebe Inf zurück
Warning in max(x): kein nicht-fehlendes Argument für max; gebe -Inf zurück

randomly draw participants for step II

draw randomly 35 Persons for each country with high low conspiracy beliefs, assuming a drop-out of about 16.6%

table(questionnaire$classes_conspiracy, questionnaire$country)
   
    Germany USA
  1     141  83
  2      98  91
  3      60 125
set.seed(12345) # seed to make analysis replicable
## low and Germany
tmp_ids <- questionnaire$PROLIFIC_PID[questionnaire$country == "Germany" & questionnaire$classes_conspiracy == 1]
tmp_dat <- data.frame(country = "Germany", classes_conspiracy = 1, ID = sample(x = tmp_ids, size = 35, replace = FALSE))
## high and Germany
tmp_ids <- questionnaire$PROLIFIC_PID[questionnaire$country == "Germany" & questionnaire$classes_conspiracy == 3]
tmp_dat_out <- rbind(tmp_dat,
                     data.frame(country = "Germany", classes_conspiracy = 3, ID = sample(x = tmp_ids, size = 35, replace = FALSE)))

## low and USA
tmp_ids <- questionnaire$PROLIFIC_PID[questionnaire$country == "USA" & questionnaire$classes_conspiracy == 1]
tmp_dat_out <- rbind(tmp_dat_out,
                     data.frame(country = "USA", classes_conspiracy = 1, ID = sample(x = tmp_ids, size = 35, replace = FALSE)))
## high and USA
tmp_ids <- questionnaire$PROLIFIC_PID[questionnaire$country == "USA" & questionnaire$classes_conspiracy == 3]
tmp_dat_out <- rbind(tmp_dat_out,
                     data.frame(country = "USA", classes_conspiracy = 3, ID = sample(x = tmp_ids, size = 35, replace = FALSE)))


table(tmp_dat_out$classes_conspiracy, tmp_dat_out$country)
   
    Germany USA
  1      35  35
  3      35  35
length(unique(tmp_dat_out$ID)) == nrow(tmp_dat_out)
[1] TRUE
## save final IDs for step II
write.xlsx2(x = tmp_dat_out, file = "outputs/IDs_t2.xlsx")
### add participants 

usedIDs <- read.xlsx2(file = "outputs/IDs_t2_coloured.xlsx", sheetIndex = 1)


table(questionnaire$classes_conspiracy, questionnaire$country)
   
    Germany USA
  1     141  83
  2      98  91
  3      60 125
additionalParticipants <- 20

set.seed(55555)
## low and Germany
tmp_dat <- questionnaire[questionnaire$country == "Germany" & questionnaire$classes_conspiracy == 1, ]
tmp_ids <- tmp_dat$PROLIFIC_PID[! tmp_dat$PROLIFIC_PID %in% usedIDs$ID]
tmp_dat_out <- data.frame(country = "Germany", classes_conspiracy = 1, ID = sample(x = tmp_ids, size = additionalParticipants, replace = FALSE))
## high and Germany
tmp_dat <- questionnaire[questionnaire$country == "Germany" & questionnaire$classes_conspiracy == 3, ] # max 25
tmp_ids <- tmp_dat$PROLIFIC_PID[! tmp_dat$PROLIFIC_PID %in% usedIDs$ID]
tmp_dat_IDs <- rbind(tmp_dat_out,
                     data.frame(country = "Germany", classes_conspiracy = 3, 
                                ID = sample(x = tmp_ids, size = additionalParticipants, replace = FALSE)))

## low and USA
tmp_dat <- questionnaire[questionnaire$country == "USA" & questionnaire$classes_conspiracy == 1, ]
tmp_ids <- tmp_dat$PROLIFIC_PID[! tmp_dat$PROLIFIC_PID %in% usedIDs$ID]
tmp_dat_IDs <- rbind(tmp_dat_IDs,
                     data.frame(country = "USA", classes_conspiracy = 1, ID = sample(x = tmp_ids, size = additionalParticipants, replace = FALSE)))




## high and USA
tmp_dat <- questionnaire[questionnaire$country == "USA" & questionnaire$classes_conspiracy == 3, ]
tmp_ids <- tmp_dat$PROLIFIC_PID[! tmp_dat$PROLIFIC_PID %in% usedIDs$ID]
tmp_dat_IDs <- rbind(tmp_dat_IDs,
                     data.frame(country = "USA", classes_conspiracy = 3, ID = sample(x = tmp_ids, size = additionalParticipants, replace = FALSE)))


table(tmp_dat_IDs$classes_conspiracy, tmp_dat_IDs$country)
   
    Germany USA
  1      20  20
  3      20  20
length(unique(tmp_dat_IDs$ID)) == nrow(tmp_dat_IDs)
[1] TRUE
## save final IDs for step II
write.xlsx2(x = tmp_dat_IDs, file = "outputs/IDs_t2_additional.xlsx")

save final data

## save final questionnaire
dim(questionnaire)
[1] 598 100
write.xlsx2(x = questionnaire, file = "outputs/questionnaire_final.xlsx")
write.csv2(x = questionnaire, file = "outputs/questionnaire_final.csv")
saveRDS(object = questionnaire, file = "outputs/questionnaire_final.rds")