ess <- foreign::read.spss("Baza ESS1.sav")
re-encoding from UTF-8
ess <-  as.data.frame(ess)
data <- dplyr::mutate_if(ess[,517:537],is.factor, as.character)




# covert string represenation of variables to numeric values
data[data == "Very much like me"] <- 1
data[data == "Like me"] <- 2
data[data == "Somewhat like me"] <- 3
data[data == "A little like me"] <- 4
data[data == "Not like me"] <- 5
data[data == "Not like me at all"] <- 6
data[data == "Don't know"] <- NA
data[data == "Refusal"] <- NA
data[data == "No answer"] <- NA

#handling mising data
VIM::aggr(data)

# extract basic statistic for dataset
summary_data <- describe(data)
summary_data <- as.data.frame(summary_data)

# add rownames from dataset to names of variables in tibble format
var_names <- rownames(summary_data)
summary_data$var_names <- var_names

# select only relevant data from summary
summary_data <- summary_data %>% dplyr::select(var_names,sd, min,max,skew,kurtosis,se)

# reorganize data to make var_names as first column in dataset
summary_data <- summary_data %>% dplyr::select(var_names, everything())
#round floating numeric data in dataset to 2 digits
dplyr::mutate_if(summary_data, is.numeric, round,digits = 2)
# get output in tabular form
summary_data %>%gt::gt()
var_names sd min max skew kurtosis se
Q1* 1.3771286 1 6 0.5790435 -0.5862819 0.03275166
Q2* 1.2263964 1 6 -0.9549656 0.3144621 0.02905207
Q3* 1.0818659 1 6 1.3889382 1.9383678 0.02565710
Q4* 1.5148398 1 6 0.2040218 -1.1666722 0.03596581
Q5* 1.1547939 1 6 1.2873721 1.2723298 0.02736355
Q6* 1.5692259 1 6 0.1345306 -1.2065370 0.03725707
Q7* 1.5429304 1 6 0.2224145 -1.1582473 0.03674685
Q8* 1.0294342 1 6 1.1761362 1.4268837 0.02439308
Q9* 1.2996622 1 6 0.6751729 -0.3219481 0.03079630
Q10* 1.3113493 1 6 0.5377279 -0.5507595 0.03110819
Q11* 1.1155229 1 6 1.0674035 0.7711320 0.02641818
Q12* 0.9646045 1 6 1.2560505 1.8694577 0.02283128
Q13* 1.3625622 1 6 0.5218108 -0.6268963 0.03230490
Q14* 1.1848085 1 6 1.1725628 0.8610250 0.02811426
Q15* 1.4325961 1 6 -0.8191307 -0.3494632 0.03391770
Q16* 1.0845117 1 6 1.0367127 0.8147598 0.02568376
Q17* 1.3988355 1 6 0.5686463 -0.6813000 0.03312768
Q18* 0.8679235 1 6 1.2903332 2.1089633 0.02053718
Q19* 0.9558014 1 6 1.2544859 1.6364556 0.02261025
Q20* 1.3444597 1 6 1.0235842 0.3273875 0.03183101
Q21* 1.5009709 1 6 0.3251656 -0.9690770 0.03558642
#exclude NAs from dataset
ess.data <- na.omit(data)
#convert dataset coulmns that were coverted to character during impot back o numeric form
ess.data <- mutate_if(ess.data,is.character, as.numeric)


ess.model <-'
SE =~  Q2 + Q17 + Q4 + Q13 
TD =~  Q3 + Q8 + Q19 + Q12 + Q18
OP =~  Q1  + Q11 + Q6  + Q15 +  Q10  + Q21 
CON =~ Q5 + Q14 + Q7 + Q16 + Q9 + Q20'


#fit the cfa function to model
ess.fit <- lavaan::cfa(ess.model, data = ess.data)

#get modification indices with higest suggested impact on model (in this case more than 50 lower value of chi-square test)
modification_indices <- modificationindices(ess.fit) %>%
  as_data_frame()%>%
  arrange(-mi)%>%
  filter(mi > 50 & op == "=~")%>%
  dplyr::select(lhs, op, rhs, mi,sepc.all)

modification_indices %>%gt::gt()
lhs op rhs mi sepc.all
TD =~ Q11 249.13276 0.4612053
CON =~ Q15 223.61241 -0.4254639
CON =~ Q2 216.31693 -0.4722874
TD =~ Q15 215.62325 -0.4239874
CON =~ Q11 200.19414 0.4071356
TD =~ Q2 192.51620 -0.4109670
CON =~ Q17 96.79052 0.3075721
OP =~ Q17 90.58969 -0.5418538
OP =~ Q13 88.53532 0.6390393
SE =~ Q10 56.10605 0.4032900
#plot path diagram
lavaanPlot::lavaanPlot(mode = ess.fit,coefs = T,covs = T,stand = T,stars = c("latent","covs"))