I obtained the dataset by searching for misinformation-related data on Open Science Framework (OSF). Link to the article. The study was carried out in an Australian sample. The authors ran regression analyses (multiple and logistic) to figure out predictors of vaccine hesitancy and predictors of misinformation sharing behavior. For the current project, I wanted to examine the relationships among the following variables: education (Edu); Age; Gender; Conspiracy Mentality (CM); Doubt in Science (DS); Vulnerability to Misinformation (VM) and Vaccine Hesitancy (VH) Here, CM (five items) and DS (six items) are latent variables. Note that the authors of the source study didn’t have VM as a separate construct. They asked five questions pertaining to misinformation sharing and I tried to see whether responses to those five questions load on to the construct (VM) of my making.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
raw_data <- read_excel("C:/Users/sanja/OneDrive/Desktop/Green Card/Homeworks/vaccine misinformation raw data .xlsx")
head(raw_data)
## # A tibble: 6 x 45
## `Start Date` `End Date` Progress `Duration (in secon~ Finished
## <dttm> <dttm> <dbl> <dbl> <chr>
## 1 NA NA NA NA <NA>
## 2 2020-12-14 04:19:01 2020-12-14 04:43:53 100 1491 True
## 3 2020-12-17 13:23:47 2020-12-17 13:32:06 100 499 True
## 4 2020-12-17 14:47:12 2020-12-17 14:55:42 100 510 True
## 5 2020-12-10 18:05:29 2020-12-10 18:15:19 100 589 True
## 6 2020-12-10 17:35:04 2020-12-10 17:45:32 100 627 True
## # ... with 40 more variables: Recorded Date <dttm>, Response ID <chr>,
## # Age <chr>, Gender <chr>, HighestEduLevel <chr>, Industry <chr>,
## # Other industry Please specify - Text <chr>,
## # FrequencyReadingCoronaCheck <chr>, HearaboutCoronaCheck <chr>,
## # HearaboutCoronaCheckNewsletter <chr>, SubscribeCoronaCheck <chr>,
## # Last3MonthsSharedCoronaCheck <chr>, ItemCoronaCheckChangedMind <chr>,
## # Please elaborate... <chr>, ...
table(raw_data$Age)
##
## 18-29 30-39 40-49 50-59 60-69 70-79 80+
## 17 41 98 257 573 368 42
class(raw_data$Age)
## [1] "character"
df <- raw_data
#Converting age into ordinal data is helpful
df["Age"][df["Age"] == "18-29"] <- '1'
df["Age"][df["Age"] == "30-39"] <- '2'
df["Age"][df["Age"] == "40-49"] <- '3'
df["Age"][df["Age"] == "50-59"] <- '4'
df["Age"][df["Age"] == "60-69"] <- '5'
df["Age"][df["Age"] == "70-79"] <- '6'
df["Age"][df["Age"] == "80+"] <- '7'
df$Age <- as.double(df$Age)
class(df$Age) #it's numeric.
## [1] "numeric"
#the same with level of education. Also is better as ordinal
table(df$HighestEduLevel)
##
## Bachelor's Degree Did not complete school Diploma
## 419 60 270
## Post-graduate Degree Year 12 or equivalent
## 404 242
df$edu <- df$HighestEduLevel
df$edu[df$edu == "Did not complete school"] <- "1"
df$edu[df$edu == "Year 12 or equivalent"] <- "2"
df$edu[df$edu == "Bachelor's Degree"] <- "3"
df$edu[df$edu == "Diploma"] <- "4"
df$edu[df$edu == "Post-graduate Degree"] <- "5"
df$edu <- as.double(df$edu)
class(df$edu)
## [1] "numeric"
#gender is fine the way it is.
class(df$Gender)
## [1] "character"
What follow are potential items to load on ‘vulnerability to misinformation’.
df$msf <- df$FrequencypostssocialmediafeedLaterDebunkedCoronaCheck
#here msf = misinformation sharing frequency
table(df$msf)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 202 78 84 76 66 252 123 149 143 43 47
class(df$msf)
## [1] "numeric"
table(df$LevelConcernFakeNews)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 2 6 8 15 10 57 48 96 196 287 571
class(df$LevelConcernFakeNews)
## [1] "numeric"
df$mc <- df$LevelConcernFakeNews
#here mc = misinformation caution
df$Rmc <- 10 - df$mc
table(df$mc)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 2 6 8 15 10 57 48 96 196 287 571
table(df$SharedInfoLaterDiscoveredFake)
##
## No Yes
## 942 440
class(df$SharedInfoLaterDiscoveredFake)
## [1] "character"
df$SM <- df$SharedInfoLaterDiscoveredFake
#here SM = shared info later discovered fake
table(df$SM)
##
## No Yes
## 942 440
df$SM[df$SM=="Yes"] = 10
df$SM[df$SM == "No"] = 0
table(df$SM)
##
## 0 10
## 942 440
df$SM <- as.numeric(df$SM)
table(df$CapabilityDifferentiateFakeNewsOtherNews)
##
## A little bit capable Moderately capable Not at all capable
## 106 766 12
## Unsure Very capable
## 13 495
df$csm <- df$CapabilityDifferentiateFakeNewsOtherNews
#here csm = self-perceived capacity to spot misinformation
table(df$csm)
##
## A little bit capable Moderately capable Not at all capable
## 106 766 12
## Unsure Very capable
## 13 495
df$csm[df$csm == "Not at all capable"|df$csm == "Unsure"] <- "1"
df$csm[df$csm == "A little bit capable"] <- "2"
df$csm[df$csm == "Moderately capable"] <- "3"
df$csm[df$csm == "Very capable"] <- "4"
df$csm <- as.numeric(df$csm)
df$Rcsm <- 5 - df$csm
table(df$csm)
##
## 1 2 3 4
## 25 106 766 495
df$SPM <- df$SharedNewsNotSureTrue
#SPM = shared possible misinformation
table(df$SPM)
##
## No Yes
## 1052 339
df$SPM[df$SPM == "Yes"] <- "10"
df$SPM[df$SPM == "No"] <- "0"
df$SPM <- as.numeric(df$SPM)
table(df$SPM)
##
## 0 10
## 1052 339
Vaccine hesitancy construct prep:
# VH = vaccine hesitancy
df$VH <- df$LikelyHaveVaccine
class(df$VH)
## [1] "numeric"
table(df$VH)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 10 13 9 11 12 69 42 81 165 297 603
df$VH <- 10 - df$VH
table(df$VH)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 603 297 165 81 42 69 12 11 9 13 10
Note: Very few vaccine hesitant people in the sample
The tedious code that follow pertain to the two main (and actual) latent variables.
#now conspiracy mentality items
table(df$ConspiracyMentalityItem1)
##
## 1 2
## 1 4
## 3 4
## 9 2
## 5 6
## 19 11
## 7 Agree Somewhat
## 5 460
## Agree Strongly Agree Very Strongly
## 347 165
## Disagree Somewhat Disagree Strongly
## 136 101
## Disagree Very Strongly Neither Agree nor Disagree
## 13 119
df$CM1 <- df$ConspiracyMentalityItem1
df$CM1[df$CM1 == "Disagree Very Strongly"] <- "1"
df$CM1[df$CM1 == "Disagree Strongly"] <- "2"
df$CM1[df$CM1 == "Disagree Somewhat"] <- "3"
df$CM1[df$CM1 == "Neither Agree nor Disagree"] <- "4"
df$CM1[df$CM1 == "Agree Somewhat"] <- "5"
df$CM1[df$CM1 == "Agree Strongly"] <- "6"
df$CM1[df$CM1 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem2)
##
## Agree Somewhat Agree Strongly
## 513 436
## Agree Very Strongly Disagree Somewhat
## 241 80
## Disagree Strongly Disagree Very Strongly
## 16 3
## Neither Agree nor Disagree
## 98
df$CM2 <- df$ConspiracyMentalityItem2
df$CM2[df$CM2 == "Disagree Very Strongly"] <- "1"
df$CM2[df$CM2 == "Disagree Strongly"] <- "2"
df$CM2[df$CM2 == "Disagree Somewhat"] <- "3"
df$CM2[df$CM2 == "Neither Agree nor Disagree"] <- "4"
df$CM2[df$CM2 == "Agree Somewhat"] <- "5"
df$CM2[df$CM2 == "Agree Strongly"] <- "6"
df$CM2[df$CM2 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem3)
##
## Agree Somewhat Agree Strongly
## 230 46
## Agree Very Strongly Disagree Somewhat
## 16 340
## Disagree Strongly Disagree Very Strongly
## 356 114
## Neither Agree nor Disagree
## 279
df$CM3 <- df$ConspiracyMentalityItem3
df$CM3[df$CM3 == "Disagree Very Strongly"] <- "1"
df$CM3[df$CM3 == "Disagree Strongly"] <- "2"
df$CM3[df$CM3 == "Disagree Somewhat"] <- "3"
df$CM3[df$CM3 == "Neither Agree nor Disagree"] <- "4"
df$CM3[df$CM3 == "Agree Somewhat"] <- "5"
df$CM3[df$CM3 == "Agree Strongly"] <- "6"
df$CM3[df$CM3 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem4)
##
## Agree Somewhat Agree Strongly
## 128 23
## Agree Very Strongly Disagree Somewhat
## 4 219
## Disagree Strongly Disagree Very Strongly
## 401 262
## Neither Agree nor Disagree
## 350
df$CM4 <- df$ConspiracyMentalityItem4
df$CM4[df$CM4 == "Disagree Very Strongly"] <- "1"
df$CM4[df$CM4 == "Disagree Strongly"] <- "2"
df$CM4[df$CM4 == "Disagree Somewhat"] <- "3"
df$CM4[df$CM4 == "Neither Agree nor Disagree"] <- "4"
df$CM4[df$CM4 == "Agree Somewhat"] <- "5"
df$CM4[df$CM4 == "Agree Strongly"] <- "6"
df$CM4[df$CM4 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem5)
##
## Agree Somewhat Agree Strongly
## 267 68
## Agree Very Strongly Disagree Somewhat
## 29 224
## Disagree Strongly Disagree Very Strongly
## 314 210
## Neither Agree nor Disagree
## 269
df$CM5 <- df$ConspiracyMentalityItem5
df$CM5[df$CM5 == "Disagree Very Strongly"] <- "1"
df$CM5[df$CM5 == "Disagree Strongly"] <- "2"
df$CM5[df$CM5 == "Disagree Somewhat"] <- "3"
df$CM5[df$CM5 == "Neither Agree nor Disagree"] <- "4"
df$CM5[df$CM5 == "Agree Somewhat"] <- "5"
df$CM5[df$CM5 == "Agree Strongly"] <- "6"
df$CM5[df$CM5 == "Agree Very Strongly"] <- "7"
#Now belief in science construct
table(df$BeliefinScienceItem1)
##
## Agree Somewhat Agree Strongly
## 52 25
## Agree Very Strongly Disagree Somewhat
## 14 223
## Disagree Strongly Disagree Very Strongly
## 584 352
## Neither Agree nor Disagree
## 88
df$DS1 <- df$BeliefinScienceItem1
#DS = Doubt in Science
df$DS1[df$DS1 == "Disagree Very Strongly"] <- "1"
df$DS1[df$DS1 == "Disagree Strongly"] <- "2"
df$DS1[df$DS1 == "Disagree Somewhat"] <- "3"
df$DS1[df$DS1 == "Neither Agree nor Disagree"] <- "4"
df$DS1[df$DS1 == "Agree Somewhat"] <- "5"
df$DS1[df$DS1 == "Agree Strongly"] <- "6"
df$DS1[df$DS1 == "Agree Very Strongly"] <- "7"
df$DS2 <- df$BeliefinScienceItem2
df$DS2[df$DS2 == "Disagree Very Strongly"] <- "1"
df$DS2[df$DS2 == "Disagree Strongly"] <- "2"
df$DS2[df$DS2 == "Disagree Somewhat"] <- "3"
df$DS2[df$DS2 == "Neither Agree nor Disagree"] <- "4"
df$DS2[df$DS2 == "Agree Somewhat"] <- "5"
df$DS2[df$DS2 == "Agree Strongly"] <- "6"
df$DS2[df$DS2 == "Agree Very Strongly"] <- "7"
df$DS3 <- df$BeliefinScienceItem3
df$DS3[df$DS3 == "Disagree Very Strongly"] <- "1"
df$DS3[df$DS3 == "Disagree Strongly"] <- "2"
df$DS3[df$DS3 == "Disagree Somewhat"] <- "3"
df$DS3[df$DS3 == "Neither Agree nor Disagree"] <- "4"
df$DS3[df$DS3 == "Agree Somewhat"] <- "5"
df$DS3[df$DS3 == "Agree Strongly"] <- "6"
df$DS3[df$DS3 == "Agree Very Strongly"] <- "7"
df$DS4 <- df$BeliefinScienceItem4
df$DS4[df$DS4 == "Disagree Very Strongly"] <- "1"
df$DS4[df$DS4 == "Disagree Strongly"] <- "2"
df$DS4[df$DS4 == "Disagree Somewhat"] <- "3"
df$DS4[df$DS4 == "Neither Agree nor Disagree"] <- "4"
df$DS4[df$DS4 == "Agree Somewhat"] <- "5"
df$DS4[df$DS4 == "Agree Strongly"] <- "6"
df$DS4[df$DS4 == "Agree Very Strongly"] <- "7"
df$DS5 <- df$BeliefinScienceItem5
df$DS5[df$DS5 == "Disagree Very Strongly"] <- "1"
df$DS5[df$DS5 == "Disagree Strongly"] <- "2"
df$DS5[df$DS5 == "Disagree Somewhat"] <- "3"
df$DS5[df$DS5 == "Neither Agree nor Disagree"] <- "4"
df$DS5[df$DS5 == "Agree Somewhat"] <- "5"
df$DS5[df$DS5 == "Agree Strongly"] <- "6"
df$DS5[df$DS5 == "Agree Very Strongly"] <- "7"
df$DS6 <- df$BeliefinScienceItem6
df$DS6[df$DS6 == "Disagree Very Strongly"] <- "1"
df$DS6[df$DS6 == "Disagree Strongly"] <- "2"
df$DS6[df$DS6 == "Disagree Somewhat"] <- "3"
df$DS6[df$DS6 == "Neither Agree nor Disagree"] <- "4"
df$DS6[df$DS6 == "Agree Somewhat"] <- "5"
df$DS6[df$DS6 == "Agree Strongly"] <- "6"
df$DS6[df$DS6 == "Agree Very Strongly"] <- "7"
Wd = working dataset CM = conspiracy mindset (1 through 7) higher score means higher CM DS = Doubt in science (1 through 7) higher score means higher DS
VM = Vulnerability to Misinformation VM1_msf = misinformation sharing frequency (1-10) higher is more vulnerable VM2_Rmc = misinfo caution, reversed (1-10). Higher means more vulnerable VM3_sm = shared info later found fake (Yes = 10, No = 0). Higher means more vulnerable VM4_Rcsm = capacity to spot misinformation, reserved. Higher means more vulnerable VM5_spm = shared possible misinformation (Yes = 10, No=0)
wd <- df %>% transmute(Age, Edu = edu, Gender,
CM1, CM2, CM3, CM4, CM5,
DS1, DS2, DS3, DS4, DS5, DS6,
VM1_msf = msf, VM2_Rmc = mc, VM3_sm = SM,
VM4_Rcsm = csm, VM5_spm = SPM,
VH)
View(wd)
wd <- wd[-1,]
View(wd)
str(wd)
## tibble [1,397 x 20] (S3: tbl_df/tbl/data.frame)
## $ Age : num [1:1397] 4 5 6 5 5 1 4 5 4 6 ...
## $ Edu : num [1:1397] 4 2 4 5 5 2 2 3 3 3 ...
## $ Gender : chr [1:1397] "Male" "Female" "Female" "Female" ...
## $ CM1 : chr [1:1397] "5" "3" "6" "5" ...
## $ CM2 : chr [1:1397] "6" "3" "6" "4" ...
## $ CM3 : chr [1:1397] "2" "2" "4" "2" ...
## $ CM4 : chr [1:1397] "4" "2" "5" "4" ...
## $ CM5 : chr [1:1397] "4" "2" "6" "3" ...
## $ DS1 : chr [1:1397] "1" "2" "2" "1" ...
## $ DS2 : chr [1:1397] "1" "2" "1" "3" ...
## $ DS3 : chr [1:1397] "2" "2" "2" "2" ...
## $ DS4 : chr [1:1397] "1" "2" "2" "2" ...
## $ DS5 : chr [1:1397] "2" "2" "1" "1" ...
## $ DS6 : chr [1:1397] "2" "2" "1" "1" ...
## $ VM1_msf : num [1:1397] 5 7 8 6 3 NA 3 8 8 5 ...
## $ VM2_Rmc : num [1:1397] 9 10 10 10 7 8 7 9 10 8 ...
## $ VM3_sm : num [1:1397] 0 0 10 0 10 0 0 0 10 0 ...
## $ VM4_Rcsm: num [1:1397] 3 4 4 3 3 3 4 3 4 3 ...
## $ VM5_spm : num [1:1397] 0 0 0 0 10 10 0 0 0 0 ...
## $ VH : num [1:1397] 0 2 0 3 1 0 0 0 2 1 ...
#let's convert CM and DS items into numberic
wd[, 4:14] <- lapply(wd[ , 4:14], as.numeric)
str(wd)
## tibble [1,397 x 20] (S3: tbl_df/tbl/data.frame)
## $ Age : num [1:1397] 4 5 6 5 5 1 4 5 4 6 ...
## $ Edu : num [1:1397] 4 2 4 5 5 2 2 3 3 3 ...
## $ Gender : chr [1:1397] "Male" "Female" "Female" "Female" ...
## $ CM1 : num [1:1397] 5 3 6 5 5 5 2 3 7 6 ...
## $ CM2 : num [1:1397] 6 3 6 4 5 6 3 5 6 6 ...
## $ CM3 : num [1:1397] 2 2 4 2 5 3 1 3 5 4 ...
## $ CM4 : num [1:1397] 4 2 5 4 3 2 2 3 2 4 ...
## $ CM5 : num [1:1397] 4 2 6 3 2 3 2 3 2 5 ...
## $ DS1 : num [1:1397] 1 2 2 1 2 1 2 1 4 2 ...
## $ DS2 : num [1:1397] 1 2 1 3 3 1 3 1 4 2 ...
## $ DS3 : num [1:1397] 2 2 2 2 3 1 2 1 1 1 ...
## $ DS4 : num [1:1397] 1 2 2 2 3 1 2 1 2 2 ...
## $ DS5 : num [1:1397] 2 2 1 1 2 1 1 1 1 2 ...
## $ DS6 : num [1:1397] 2 2 1 1 2 1 1 1 1 2 ...
## $ VM1_msf : num [1:1397] 5 7 8 6 3 NA 3 8 8 5 ...
## $ VM2_Rmc : num [1:1397] 9 10 10 10 7 8 7 9 10 8 ...
## $ VM3_sm : num [1:1397] 0 0 10 0 10 0 0 0 10 0 ...
## $ VM4_Rcsm: num [1:1397] 3 4 4 3 3 3 4 3 4 3 ...
## $ VM5_spm : num [1:1397] 0 0 0 0 10 10 0 0 0 0 ...
## $ VH : num [1:1397] 0 2 0 3 1 0 0 0 2 1 ...
First I hypothesize a model in which CM, DS, and VM are latent constructs. CM is the exogenous variable. VH is the DV of concern and it is a measured construct. So, I first test the measurement model for CM, DS, and MS. And then I test a model with the following hypotheses: (i) CM leads DS (ii) CM leads to VM (iii) CM leads to VH (iv) DS leads to VH (v) VM leads to VH (vi) DS leads to VM
library(lavaan)
## This is lavaan 0.6-9
## lavaan is FREE software! Please report any bugs.
model_measure <- '
#The measurement models
CM =~ CM1 + CM2 + CM3 + CM4 + CM5
DS =~ DS1 + DS2 + DS3 + DS4 + DS5 + DS6
VM =~ VM1_msf + VM2_Rmc + VM3_sm + VM4_Rcsm + VM5_spm
'
fit_m <- cfa(model_measure, data = wd)
summary(fit_m, standardized = T, rsq = T, fit.measures = T)
## lavaan 0.6-9 ended normally after 93 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 35
##
## Used Total
## Number of observations 1084 1397
##
## Model Test User Model:
##
## Test statistic 676.753
## Degrees of freedom 101
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 6048.464
## Degrees of freedom 120
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.903
## Tucker-Lewis Index (TLI) 0.885
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -29111.110
## Loglikelihood unrestricted model (H1) -28772.733
##
## Akaike (AIC) 58292.220
## Bayesian (BIC) 58466.814
## Sample-size adjusted Bayesian (BIC) 58355.647
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.073
## 90 Percent confidence interval - lower 0.067
## 90 Percent confidence interval - upper 0.078
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.058
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CM =~
## CM1 1.000 0.763 0.529
## CM2 0.720 0.058 12.473 0.000 0.550 0.485
## CM3 1.149 0.077 14.996 0.000 0.877 0.643
## CM4 1.465 0.087 16.932 0.000 1.119 0.836
## CM5 1.657 0.099 16.735 0.000 1.265 0.803
## DS =~
## DS1 1.000 0.745 0.628
## DS2 1.165 0.064 18.176 0.000 0.868 0.640
## DS3 1.100 0.060 18.334 0.000 0.819 0.647
## DS4 1.262 0.055 23.072 0.000 0.941 0.885
## DS5 1.177 0.051 23.004 0.000 0.877 0.880
## DS6 1.101 0.052 21.194 0.000 0.821 0.781
## VM =~
## VM1_msf 1.000 0.370 0.125
## VM2_Rmc 0.234 0.206 1.135 0.257 0.087 0.048
## VM3_sm 8.346 2.820 2.959 0.003 3.087 0.663
## VM4_Rcsm -0.395 0.144 -2.737 0.006 -0.146 -0.216
## VM5_spm 6.838 2.260 3.026 0.002 2.529 0.592
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CM ~~
## DS 0.257 0.027 9.570 0.000 0.451 0.451
## VM 0.035 0.017 2.097 0.036 0.123 0.123
## DS ~~
## VM 0.034 0.016 2.157 0.031 0.125 0.125
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CM1 1.497 0.069 21.685 0.000 1.497 0.720
## .CM2 0.984 0.045 22.027 0.000 0.984 0.765
## .CM3 1.088 0.054 20.334 0.000 1.088 0.586
## .CM4 0.537 0.041 13.165 0.000 0.537 0.300
## .CM5 0.878 0.058 15.157 0.000 0.878 0.354
## .DS1 0.851 0.039 21.870 0.000 0.851 0.605
## .DS2 1.083 0.050 21.776 0.000 1.083 0.590
## .DS3 0.931 0.043 21.719 0.000 0.931 0.581
## .DS4 0.245 0.016 15.117 0.000 0.245 0.217
## .DS5 0.223 0.014 15.457 0.000 0.223 0.225
## .DS6 0.430 0.022 19.832 0.000 0.430 0.390
## .VM1_msf 8.616 0.374 23.016 0.000 8.616 0.984
## .VM2_Rmc 3.208 0.138 23.242 0.000 3.208 0.998
## .VM3_sm 12.135 1.863 6.512 0.000 12.135 0.560
## .VM4_Rcsm 0.435 0.019 22.446 0.000 0.435 0.953
## .VM5_spm 11.883 1.307 9.092 0.000 11.883 0.650
## CM 0.583 0.067 8.709 0.000 1.000 1.000
## DS 0.555 0.050 11.126 0.000 1.000 1.000
## VM 0.137 0.087 1.574 0.115 1.000 1.000
##
## R-Square:
## Estimate
## CM1 0.280
## CM2 0.235
## CM3 0.414
## CM4 0.700
## CM5 0.646
## DS1 0.395
## DS2 0.410
## DS3 0.419
## DS4 0.783
## DS5 0.775
## DS6 0.610
## VM1_msf 0.016
## VM2_Rmc 0.002
## VM3_sm 0.440
## VM4_Rcsm 0.047
## VM5_spm 0.350
While all the items for CM and DS loaded well to their respective constructs, only two out of five items for VM loaded. Thus, I got rid the items that didn’t load.
model1<- '
#The measurement models
CM =~ CM1 + CM2 + CM3 + CM4 + CM5
DS =~ DS1 + DS2 + DS3 + DS4 + DS5 + DS6
VM =~ VM3_sm + VM5_spm
#Regressions
DS ~ a*CM
VH ~ b*DS + c*CM + e*VM
VM ~ d*CM + f*DS
ind1 := a*b
ind2 := d*e
ind3 := a*f
ind4 := a*f*e
#exogenous covariances
CM ~~ CM
'
fit1 <- sem(model1, data = wd)
summary(fit1, standardized = T, rsq = T, fit.measures = T)
## lavaan 0.6-9 ended normally after 81 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 33
##
## Used Total
## Number of observations 1194 1397
##
## Model Test User Model:
##
## Test statistic 605.418
## Degrees of freedom 72
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 6924.940
## Degrees of freedom 91
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.922
## Tucker-Lewis Index (TLI) 0.901
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -28005.748
## Loglikelihood unrestricted model (H1) -27703.039
##
## Akaike (AIC) 56077.495
## Bayesian (BIC) 56245.303
## Sample-size adjusted Bayesian (BIC) 56140.482
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.079
## 90 Percent confidence interval - lower 0.073
## 90 Percent confidence interval - upper 0.085
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.045
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CM =~
## CM1 1.000 0.761 0.525
## CM2 0.700 0.054 12.880 0.000 0.532 0.475
## CM3 1.151 0.073 15.725 0.000 0.875 0.646
## CM4 1.489 0.084 17.739 0.000 1.133 0.842
## CM5 1.681 0.096 17.497 0.000 1.278 0.804
## DS =~
## DS1 1.000 0.797 0.649
## DS2 1.133 0.056 20.119 0.000 0.903 0.658
## DS3 1.086 0.053 20.611 0.000 0.866 0.677
## DS4 1.241 0.049 25.492 0.000 0.989 0.888
## DS5 1.144 0.045 25.367 0.000 0.912 0.881
## DS6 1.060 0.045 23.400 0.000 0.845 0.791
## VM =~
## VM3_sm 1.000 1.809 0.383
## VM5_spm 2.357 1.475 1.598 0.110 4.264 0.981
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## DS ~
## CM (a) 0.522 0.044 11.810 0.000 0.499 0.499
## VH ~
## DS (b) 0.865 0.090 9.644 0.000 0.689 0.344
## CM (c) 0.244 0.092 2.651 0.008 0.186 0.093
## VM (e) -0.060 0.031 -1.959 0.050 -0.109 -0.054
## VM ~
## CM (d) 0.072 0.101 0.713 0.476 0.030 0.030
## DS (f) 0.187 0.143 1.305 0.192 0.082 0.082
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CM 0.579 0.064 9.080 0.000 1.000 1.000
## .CM1 1.519 0.066 22.845 0.000 1.519 0.724
## .CM2 0.972 0.042 23.224 0.000 0.972 0.774
## .CM3 1.067 0.050 21.386 0.000 1.067 0.582
## .CM4 0.525 0.038 13.681 0.000 0.525 0.291
## .CM5 0.896 0.055 16.142 0.000 0.896 0.354
## .DS1 0.873 0.038 22.888 0.000 0.873 0.579
## .DS2 1.067 0.047 22.809 0.000 1.067 0.567
## .DS3 0.885 0.039 22.630 0.000 0.885 0.542
## .DS4 0.263 0.016 16.226 0.000 0.263 0.212
## .DS5 0.239 0.014 16.739 0.000 0.239 0.223
## .DS6 0.427 0.020 20.817 0.000 0.427 0.374
## .VM3_sm 18.978 2.178 8.713 0.000 18.978 0.853
## .VM5_spm 0.718 11.306 0.063 0.949 0.718 0.038
## .VH 3.373 0.140 24.051 0.000 3.373 0.842
## .DS 0.477 0.040 11.900 0.000 0.751 0.751
## .VM 3.238 2.049 1.581 0.114 0.990 0.990
##
## R-Square:
## Estimate
## CM1 0.276
## CM2 0.226
## CM3 0.418
## CM4 0.709
## CM5 0.646
## DS1 0.421
## DS2 0.433
## DS3 0.458
## DS4 0.788
## DS5 0.777
## DS6 0.626
## VM3_sm 0.147
## VM5_spm 0.962
## VH 0.158
## DS 0.249
## VM 0.010
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ind1 0.452 0.056 8.060 0.000 0.344 0.172
## ind2 -0.004 0.006 -0.671 0.502 -0.003 -0.002
## ind3 0.098 0.075 1.300 0.194 0.041 0.041
## ind4 -0.006 0.005 -1.080 0.280 -0.004 -0.002
#visualization
library(semPlot)
## Warning: package 'semPlot' was built under R version 4.1.3
semPlot::semPaths(fit1, edge.color = "green")
##Alternative Model
Next, I test a second model in which only CM and DS are the latent constructs. Here I see how age, education, and gender relate to the prior model. These three are the exogenous variables. Model hypotheses: Age, Edu, and Gender each predict both CM and DS Age, Edu, and Gender also each predict both MS and VH CM and DS each predict both MS and VH I am ignoring indirect effects.
wd$MS <- (wd$VM3_sm + wd$VM5_spm)/2
#MS = Misinformation Sharing
model2 <- '
#The measurement models
CM =~ CM1 + CM2 + CM3 + CM4 + CM5
DS =~ DS1 + DS2 + DS3 + DS4 + DS5 + DS6
#regressions
MS ~ Age + Edu + Gender + CM + DS
VH ~ Age + Edu + Gender + CM + DS
CM ~ Age + Edu + Gender
DS ~ Age + Edu + Gender
#exogenous variances and covariances
Age ~~ Age
Edu ~~ Edu
Gender ~~ Gender
Age ~~ Edu
Age ~~ Gender
Gender ~~ Edu
'
fit2 <- sem(model2, data = wd)
summary(fit2, standardized = T, rsq = T, fit.measures = T)
## lavaan 0.6-9 ended normally after 61 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 47
##
## Used Total
## Number of observations 1186 1397
##
## Model Test User Model:
##
## Test statistic 894.572
## Degrees of freedom 89
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 6902.813
## Degrees of freedom 120
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.881
## Tucker-Lewis Index (TLI) 0.840
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -28967.328
## Loglikelihood unrestricted model (H1) NA
##
## Akaike (AIC) 58028.655
## Bayesian (BIC) 58267.337
## Sample-size adjusted Bayesian (BIC) 58118.048
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.087
## 90 Percent confidence interval - lower 0.082
## 90 Percent confidence interval - upper 0.093
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.122
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CM =~
## CM1 1.000 0.765 0.527
## CM2 0.708 0.055 12.983 0.000 0.541 0.483
## CM3 1.142 0.073 15.643 0.000 0.873 0.644
## CM4 1.498 0.085 17.681 0.000 1.146 0.852
## CM5 1.657 0.095 17.362 0.000 1.267 0.794
## DS =~
## DS1 1.000 0.791 0.643
## DS2 1.129 0.057 19.691 0.000 0.893 0.650
## DS3 1.087 0.054 20.231 0.000 0.859 0.671
## DS4 1.260 0.050 25.196 0.000 0.996 0.893
## DS5 1.163 0.046 25.069 0.000 0.919 0.886
## DS6 1.069 0.046 23.061 0.000 0.845 0.790
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## MS ~
## Age -0.088 0.098 -0.893 0.372 -0.088 -0.027
## Edu 0.192 0.094 2.040 0.041 0.192 0.061
## Gender 0.303 0.204 1.484 0.138 0.303 0.043
## CM 0.290 0.162 1.788 0.074 0.221 0.059
## DS 0.260 0.146 1.780 0.075 0.206 0.055
## VH ~
## Age -0.107 0.049 -2.197 0.028 -0.107 -0.062
## Edu 0.038 0.046 0.820 0.412 0.038 0.023
## Gender -0.172 0.101 -1.706 0.088 -0.172 -0.047
## CM 0.298 0.081 3.683 0.000 0.228 0.116
## DS 0.891 0.079 11.346 0.000 0.704 0.357
## CM ~
## Age 0.130 0.021 6.064 0.000 0.169 0.194
## Edu -0.125 0.020 -6.127 0.000 -0.164 -0.196
## Gender -0.050 0.044 -1.152 0.249 -0.066 -0.035
## DS ~
## Age 0.091 0.021 4.334 0.000 0.115 0.131
## Edu -0.067 0.020 -3.369 0.001 -0.085 -0.102
## Gender 0.060 0.044 1.353 0.176 0.076 0.041
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Age ~~
## Edu -0.061 0.040 -1.538 0.124 -0.061 -0.045
## Gender 0.059 0.018 3.271 0.001 0.059 0.095
## Edu ~~
## Gender -0.048 0.019 -2.566 0.010 -0.048 -0.075
## .MS ~~
## .VH -0.395 0.201 -1.966 0.049 -0.395 -0.058
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Age 1.316 0.054 24.352 0.000 1.316 1.000
## Edu 1.430 0.059 24.352 0.000 1.430 1.000
## Gender 0.289 0.012 24.352 0.000 0.289 1.000
## .CM1 1.523 0.067 22.717 0.000 1.523 0.723
## .CM2 0.965 0.042 23.065 0.000 0.965 0.767
## .CM3 1.077 0.051 21.266 0.000 1.077 0.585
## .CM4 0.496 0.040 12.465 0.000 0.496 0.274
## .CM5 0.938 0.058 16.208 0.000 0.938 0.369
## .DS1 0.886 0.039 22.864 0.000 0.886 0.586
## .DS2 1.091 0.048 22.810 0.000 1.091 0.578
## .DS3 0.901 0.040 22.620 0.000 0.901 0.550
## .DS4 0.252 0.016 15.656 0.000 0.252 0.202
## .DS5 0.231 0.014 16.255 0.000 0.231 0.215
## .DS6 0.430 0.021 20.783 0.000 0.430 0.376
## .MS 13.977 0.574 24.330 0.000 13.977 0.990
## .VH 3.362 0.140 23.979 0.000 3.362 0.862
## .CM 0.539 0.060 9.042 0.000 0.921 0.921
## .DS 0.605 0.050 12.008 0.000 0.968 0.968
##
## R-Square:
## Estimate
## CM1 0.277
## CM2 0.233
## CM3 0.415
## CM4 0.726
## CM5 0.631
## DS1 0.414
## DS2 0.422
## DS3 0.450
## DS4 0.798
## DS5 0.785
## DS6 0.624
## MS 0.010
## VH 0.138
## CM 0.079
## DS 0.032
This was not as good a model in terms of fitness as was model1.
semPlot::semPaths(fit2, edge.color = "blue")
##Discussions (and especially limitations)
Normality test Sample: Australian sample, disproportionate number of the educated and the elderly Vaccine hesitant people only 24% of the sample. MS construct purely speculative/No validation.