What Underlies Vaccine Hesitancy

Data Source

I obtained the dataset by searching for misinformation-related data on Open Science Framework (OSF). Link to the article. The study was carried out in an Australian sample. The authors ran regression analyses (multiple and logistic) to figure out predictors of vaccine hesitancy and predictors of misinformation sharing behavior. For the current project, I wanted to examine the relationships among the following variables: education (Edu); Age; Gender; Conspiracy Mentality (CM); Doubt in Science (DS); Vulnerability to Misinformation (VM) and Vaccine Hesitancy (VH) Here, CM (five items) and DS (six items) are latent variables. Note that the authors of the source study didn’t have VM as a separate construct. They asked five questions pertaining to misinformation sharing and I tried to see whether responses to those five questions load on to the construct (VM) of my making.

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(readxl)
raw_data <- read_excel("C:/Users/sanja/OneDrive/Desktop/Green Card/Homeworks/vaccine misinformation raw data .xlsx")
head(raw_data)

## # A tibble: 6 x 45
##   `Start Date`        `End Date`          Progress `Duration (in secon~ Finished
##   <dttm>              <dttm>                 <dbl>                <dbl> <chr>   
## 1 NA                  NA                        NA                   NA <NA>    
## 2 2020-12-14 04:19:01 2020-12-14 04:43:53      100                 1491 True    
## 3 2020-12-17 13:23:47 2020-12-17 13:32:06      100                  499 True    
## 4 2020-12-17 14:47:12 2020-12-17 14:55:42      100                  510 True    
## 5 2020-12-10 18:05:29 2020-12-10 18:15:19      100                  589 True    
## 6 2020-12-10 17:35:04 2020-12-10 17:45:32      100                  627 True    
## # ... with 40 more variables: Recorded Date <dttm>, Response ID <chr>,
## #   Age <chr>, Gender <chr>, HighestEduLevel <chr>, Industry <chr>,
## #   Other industry Please specify - Text <chr>,
## #   FrequencyReadingCoronaCheck <chr>, HearaboutCoronaCheck <chr>,
## #   HearaboutCoronaCheckNewsletter <chr>, SubscribeCoronaCheck <chr>,
## #   Last3MonthsSharedCoronaCheck <chr>, ItemCoronaCheckChangedMind <chr>,
## #   Please elaborate... <chr>, ...

Data Prep

table(raw_data$Age)

## 
## 18-29 30-39 40-49 50-59 60-69 70-79   80+ 
##    17    41    98   257   573   368    42

class(raw_data$Age)

## [1] "character"

df <- raw_data
#Converting age into ordinal data is helpful
df["Age"][df["Age"] == "18-29"] <- '1'
df["Age"][df["Age"] == "30-39"] <- '2'
df["Age"][df["Age"] == "40-49"] <- '3'
df["Age"][df["Age"] == "50-59"] <- '4'
df["Age"][df["Age"] == "60-69"] <- '5'
df["Age"][df["Age"] == "70-79"] <- '6'
df["Age"][df["Age"] == "80+"] <- '7'
df$Age <- as.double(df$Age)
class(df$Age)  #it's numeric.

## [1] "numeric"

#the same with level of education. Also is better as ordinal
table(df$HighestEduLevel)

## 
##       Bachelor's Degree Did not complete school                 Diploma 
##                     419                      60                     270 
##    Post-graduate Degree   Year 12 or equivalent 
##                     404                     242

df$edu <- df$HighestEduLevel 
df$edu[df$edu == "Did not complete school"] <- "1"
df$edu[df$edu == "Year 12 or equivalent"] <- "2"
df$edu[df$edu == "Bachelor's Degree"] <- "3"
df$edu[df$edu == "Diploma"] <- "4"
df$edu[df$edu == "Post-graduate Degree"] <- "5"
df$edu <- as.double(df$edu)
class(df$edu)

## [1] "numeric"

#gender is fine the way it is.
class(df$Gender)

## [1] "character"

What follow are potential items to load on ‘vulnerability to misinformation’.

df$msf <- df$FrequencypostssocialmediafeedLaterDebunkedCoronaCheck
#here msf = misinformation sharing frequency
table(df$msf)

## 
##   0   1   2   3   4   5   6   7   8   9  10 
## 202  78  84  76  66 252 123 149 143  43  47

class(df$msf)

## [1] "numeric"

table(df$LevelConcernFakeNews)

## 
##   0   1   2   3   4   5   6   7   8   9  10 
##   2   6   8  15  10  57  48  96 196 287 571

class(df$LevelConcernFakeNews)

## [1] "numeric"

df$mc <- df$LevelConcernFakeNews
#here mc = misinformation caution
df$Rmc <- 10 - df$mc
table(df$mc)

## 
##   0   1   2   3   4   5   6   7   8   9  10 
##   2   6   8  15  10  57  48  96 196 287 571

table(df$SharedInfoLaterDiscoveredFake)

## 
##  No Yes 
## 942 440

class(df$SharedInfoLaterDiscoveredFake)

## [1] "character"

df$SM <- df$SharedInfoLaterDiscoveredFake
#here SM = shared info later discovered fake
table(df$SM)

## 
##  No Yes 
## 942 440

df$SM[df$SM=="Yes"] = 10
df$SM[df$SM == "No"] = 0
table(df$SM)

## 
##   0  10 
## 942 440

df$SM <- as.numeric(df$SM)
table(df$CapabilityDifferentiateFakeNewsOtherNews)

## 
## A little bit capable   Moderately capable   Not at all capable 
##                  106                  766                   12 
##               Unsure         Very capable 
##                   13                  495

df$csm <- df$CapabilityDifferentiateFakeNewsOtherNews
#here csm = self-perceived capacity to spot misinformation
table(df$csm)

## 
## A little bit capable   Moderately capable   Not at all capable 
##                  106                  766                   12 
##               Unsure         Very capable 
##                   13                  495

df$csm[df$csm == "Not at all capable"|df$csm == "Unsure"] <- "1"
df$csm[df$csm == "A little bit capable"] <- "2"
df$csm[df$csm == "Moderately capable"] <- "3"
df$csm[df$csm == "Very capable"] <- "4"
df$csm <- as.numeric(df$csm)
df$Rcsm <- 5 - df$csm
table(df$csm)

## 
##   1   2   3   4 
##  25 106 766 495

df$SPM <- df$SharedNewsNotSureTrue
#SPM = shared possible misinformation
table(df$SPM)

## 
##   No  Yes 
## 1052  339

df$SPM[df$SPM == "Yes"] <- "10"
df$SPM[df$SPM == "No"] <- "0"
df$SPM <- as.numeric(df$SPM)
table(df$SPM)

## 
##    0   10 
## 1052  339

Vaccine hesitancy construct prep:

# VH = vaccine hesitancy
df$VH <- df$LikelyHaveVaccine
class(df$VH)

## [1] "numeric"

table(df$VH)

## 
##   0   1   2   3   4   5   6   7   8   9  10 
##  10  13   9  11  12  69  42  81 165 297 603

df$VH <- 10 - df$VH
table(df$VH)

## 
##   0   1   2   3   4   5   6   7   8   9  10 
## 603 297 165  81  42  69  12  11   9  13  10

Note: Very few vaccine hesitant people in the sample

The tedious code that follow pertain to the two main (and actual) latent variables.

#now conspiracy mentality items
table(df$ConspiracyMentalityItem1)

## 
##                          1                          2 
##                          1                          4 
##                          3                          4 
##                          9                          2 
##                          5                          6 
##                         19                         11 
##                          7             Agree Somewhat 
##                          5                        460 
##             Agree Strongly        Agree Very Strongly 
##                        347                        165 
##          Disagree Somewhat          Disagree Strongly 
##                        136                        101 
##     Disagree Very Strongly Neither Agree nor Disagree 
##                         13                        119

df$CM1 <- df$ConspiracyMentalityItem1
df$CM1[df$CM1 == "Disagree Very Strongly"] <- "1"
df$CM1[df$CM1 == "Disagree Strongly"] <- "2"
df$CM1[df$CM1 == "Disagree Somewhat"] <- "3"
df$CM1[df$CM1 == "Neither Agree nor Disagree"] <- "4"
df$CM1[df$CM1 == "Agree Somewhat"] <- "5"
df$CM1[df$CM1 == "Agree Strongly"] <- "6"
df$CM1[df$CM1 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem2)

## 
##             Agree Somewhat             Agree Strongly 
##                        513                        436 
##        Agree Very Strongly          Disagree Somewhat 
##                        241                         80 
##          Disagree Strongly     Disagree Very Strongly 
##                         16                          3 
## Neither Agree nor Disagree 
##                         98

df$CM2 <- df$ConspiracyMentalityItem2
df$CM2[df$CM2 == "Disagree Very Strongly"] <- "1"
df$CM2[df$CM2 == "Disagree Strongly"] <- "2"
df$CM2[df$CM2 == "Disagree Somewhat"] <- "3"
df$CM2[df$CM2 == "Neither Agree nor Disagree"] <- "4"
df$CM2[df$CM2 == "Agree Somewhat"] <- "5"
df$CM2[df$CM2 == "Agree Strongly"] <- "6"
df$CM2[df$CM2 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem3)

## 
##             Agree Somewhat             Agree Strongly 
##                        230                         46 
##        Agree Very Strongly          Disagree Somewhat 
##                         16                        340 
##          Disagree Strongly     Disagree Very Strongly 
##                        356                        114 
## Neither Agree nor Disagree 
##                        279

df$CM3 <- df$ConspiracyMentalityItem3
df$CM3[df$CM3 == "Disagree Very Strongly"] <- "1"
df$CM3[df$CM3 == "Disagree Strongly"] <- "2"
df$CM3[df$CM3 == "Disagree Somewhat"] <- "3"
df$CM3[df$CM3 == "Neither Agree nor Disagree"] <- "4"
df$CM3[df$CM3 == "Agree Somewhat"] <- "5"
df$CM3[df$CM3 == "Agree Strongly"] <- "6"
df$CM3[df$CM3 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem4)

## 
##             Agree Somewhat             Agree Strongly 
##                        128                         23 
##        Agree Very Strongly          Disagree Somewhat 
##                          4                        219 
##          Disagree Strongly     Disagree Very Strongly 
##                        401                        262 
## Neither Agree nor Disagree 
##                        350

df$CM4 <- df$ConspiracyMentalityItem4
df$CM4[df$CM4 == "Disagree Very Strongly"] <- "1"
df$CM4[df$CM4 == "Disagree Strongly"] <- "2"
df$CM4[df$CM4 == "Disagree Somewhat"] <- "3"
df$CM4[df$CM4 == "Neither Agree nor Disagree"] <- "4"
df$CM4[df$CM4 == "Agree Somewhat"] <- "5"
df$CM4[df$CM4 == "Agree Strongly"] <- "6"
df$CM4[df$CM4 == "Agree Very Strongly"] <- "7"
table(df$ConspiracyMentalityItem5)

## 
##             Agree Somewhat             Agree Strongly 
##                        267                         68 
##        Agree Very Strongly          Disagree Somewhat 
##                         29                        224 
##          Disagree Strongly     Disagree Very Strongly 
##                        314                        210 
## Neither Agree nor Disagree 
##                        269

df$CM5 <- df$ConspiracyMentalityItem5
df$CM5[df$CM5 == "Disagree Very Strongly"] <- "1"
df$CM5[df$CM5 == "Disagree Strongly"] <- "2"
df$CM5[df$CM5 == "Disagree Somewhat"] <- "3"
df$CM5[df$CM5 == "Neither Agree nor Disagree"] <- "4"
df$CM5[df$CM5 == "Agree Somewhat"] <- "5"
df$CM5[df$CM5 == "Agree Strongly"] <- "6"
df$CM5[df$CM5 == "Agree Very Strongly"] <- "7"

#Now belief in science construct
table(df$BeliefinScienceItem1)

## 
##             Agree Somewhat             Agree Strongly 
##                         52                         25 
##        Agree Very Strongly          Disagree Somewhat 
##                         14                        223 
##          Disagree Strongly     Disagree Very Strongly 
##                        584                        352 
## Neither Agree nor Disagree 
##                         88

df$DS1 <- df$BeliefinScienceItem1
#DS = Doubt in Science
df$DS1[df$DS1 == "Disagree Very Strongly"] <- "1"
df$DS1[df$DS1 == "Disagree Strongly"] <- "2"
df$DS1[df$DS1 == "Disagree Somewhat"] <- "3"
df$DS1[df$DS1 == "Neither Agree nor Disagree"] <- "4"
df$DS1[df$DS1 == "Agree Somewhat"] <- "5"
df$DS1[df$DS1 == "Agree Strongly"] <- "6"
df$DS1[df$DS1 == "Agree Very Strongly"] <- "7"
df$DS2 <- df$BeliefinScienceItem2
df$DS2[df$DS2 == "Disagree Very Strongly"] <- "1"
df$DS2[df$DS2 == "Disagree Strongly"] <- "2"
df$DS2[df$DS2 == "Disagree Somewhat"] <- "3"
df$DS2[df$DS2 == "Neither Agree nor Disagree"] <- "4"
df$DS2[df$DS2 == "Agree Somewhat"] <- "5"
df$DS2[df$DS2 == "Agree Strongly"] <- "6"
df$DS2[df$DS2 == "Agree Very Strongly"] <- "7"
df$DS3 <- df$BeliefinScienceItem3
df$DS3[df$DS3 == "Disagree Very Strongly"] <- "1"
df$DS3[df$DS3 == "Disagree Strongly"] <- "2"
df$DS3[df$DS3 == "Disagree Somewhat"] <- "3"
df$DS3[df$DS3 == "Neither Agree nor Disagree"] <- "4"
df$DS3[df$DS3 == "Agree Somewhat"] <- "5"
df$DS3[df$DS3 == "Agree Strongly"] <- "6"
df$DS3[df$DS3 == "Agree Very Strongly"] <- "7"
df$DS4 <- df$BeliefinScienceItem4
df$DS4[df$DS4 == "Disagree Very Strongly"] <- "1"
df$DS4[df$DS4 == "Disagree Strongly"] <- "2"
df$DS4[df$DS4 == "Disagree Somewhat"] <- "3"
df$DS4[df$DS4 == "Neither Agree nor Disagree"] <- "4"
df$DS4[df$DS4 == "Agree Somewhat"] <- "5"
df$DS4[df$DS4 == "Agree Strongly"] <- "6"
df$DS4[df$DS4 == "Agree Very Strongly"] <- "7"
df$DS5 <- df$BeliefinScienceItem5
df$DS5[df$DS5 == "Disagree Very Strongly"] <- "1"
df$DS5[df$DS5 == "Disagree Strongly"] <- "2"
df$DS5[df$DS5 == "Disagree Somewhat"] <- "3"
df$DS5[df$DS5 == "Neither Agree nor Disagree"] <- "4"
df$DS5[df$DS5 == "Agree Somewhat"] <- "5"
df$DS5[df$DS5 == "Agree Strongly"] <- "6"
df$DS5[df$DS5 == "Agree Very Strongly"] <- "7"
df$DS6 <- df$BeliefinScienceItem6
df$DS6[df$DS6 == "Disagree Very Strongly"] <- "1"
df$DS6[df$DS6 == "Disagree Strongly"] <- "2"
df$DS6[df$DS6 == "Disagree Somewhat"] <- "3"
df$DS6[df$DS6 == "Neither Agree nor Disagree"] <- "4"
df$DS6[df$DS6 == "Agree Somewhat"] <- "5"
df$DS6[df$DS6 == "Agree Strongly"] <- "6"
df$DS6[df$DS6 == "Agree Very Strongly"] <- "7"

Wd = working dataset CM = conspiracy mindset (1 through 7) higher score means higher CM DS = Doubt in science (1 through 7) higher score means higher DS

VM = Vulnerability to Misinformation VM1_msf = misinformation sharing frequency (1-10) higher is more vulnerable VM2_Rmc = misinfo caution, reversed (1-10). Higher means more vulnerable VM3_sm = shared info later found fake (Yes = 10, No = 0). Higher means more vulnerable VM4_Rcsm = capacity to spot misinformation, reserved. Higher means more vulnerable VM5_spm = shared possible misinformation (Yes = 10, No=0)

wd <- df %>% transmute(Age, Edu = edu, Gender, 
                    CM1, CM2, CM3, CM4, CM5, 
                    DS1, DS2, DS3, DS4, DS5, DS6,
                    VM1_msf = msf, VM2_Rmc = mc, VM3_sm = SM, 
                    VM4_Rcsm = csm, VM5_spm = SPM,
                    VH)
View(wd)  
wd <- wd[-1,]
View(wd)
str(wd)

## tibble [1,397 x 20] (S3: tbl_df/tbl/data.frame)
##  $ Age     : num [1:1397] 4 5 6 5 5 1 4 5 4 6 ...
##  $ Edu     : num [1:1397] 4 2 4 5 5 2 2 3 3 3 ...
##  $ Gender  : chr [1:1397] "Male" "Female" "Female" "Female" ...
##  $ CM1     : chr [1:1397] "5" "3" "6" "5" ...
##  $ CM2     : chr [1:1397] "6" "3" "6" "4" ...
##  $ CM3     : chr [1:1397] "2" "2" "4" "2" ...
##  $ CM4     : chr [1:1397] "4" "2" "5" "4" ...
##  $ CM5     : chr [1:1397] "4" "2" "6" "3" ...
##  $ DS1     : chr [1:1397] "1" "2" "2" "1" ...
##  $ DS2     : chr [1:1397] "1" "2" "1" "3" ...
##  $ DS3     : chr [1:1397] "2" "2" "2" "2" ...
##  $ DS4     : chr [1:1397] "1" "2" "2" "2" ...
##  $ DS5     : chr [1:1397] "2" "2" "1" "1" ...
##  $ DS6     : chr [1:1397] "2" "2" "1" "1" ...
##  $ VM1_msf : num [1:1397] 5 7 8 6 3 NA 3 8 8 5 ...
##  $ VM2_Rmc : num [1:1397] 9 10 10 10 7 8 7 9 10 8 ...
##  $ VM3_sm  : num [1:1397] 0 0 10 0 10 0 0 0 10 0 ...
##  $ VM4_Rcsm: num [1:1397] 3 4 4 3 3 3 4 3 4 3 ...
##  $ VM5_spm : num [1:1397] 0 0 0 0 10 10 0 0 0 0 ...
##  $ VH      : num [1:1397] 0 2 0 3 1 0 0 0 2 1 ...

#let's convert CM and DS items into numberic
wd[, 4:14] <- lapply(wd[ , 4:14], as.numeric)
str(wd)

## tibble [1,397 x 20] (S3: tbl_df/tbl/data.frame)
##  $ Age     : num [1:1397] 4 5 6 5 5 1 4 5 4 6 ...
##  $ Edu     : num [1:1397] 4 2 4 5 5 2 2 3 3 3 ...
##  $ Gender  : chr [1:1397] "Male" "Female" "Female" "Female" ...
##  $ CM1     : num [1:1397] 5 3 6 5 5 5 2 3 7 6 ...
##  $ CM2     : num [1:1397] 6 3 6 4 5 6 3 5 6 6 ...
##  $ CM3     : num [1:1397] 2 2 4 2 5 3 1 3 5 4 ...
##  $ CM4     : num [1:1397] 4 2 5 4 3 2 2 3 2 4 ...
##  $ CM5     : num [1:1397] 4 2 6 3 2 3 2 3 2 5 ...
##  $ DS1     : num [1:1397] 1 2 2 1 2 1 2 1 4 2 ...
##  $ DS2     : num [1:1397] 1 2 1 3 3 1 3 1 4 2 ...
##  $ DS3     : num [1:1397] 2 2 2 2 3 1 2 1 1 1 ...
##  $ DS4     : num [1:1397] 1 2 2 2 3 1 2 1 2 2 ...
##  $ DS5     : num [1:1397] 2 2 1 1 2 1 1 1 1 2 ...
##  $ DS6     : num [1:1397] 2 2 1 1 2 1 1 1 1 2 ...
##  $ VM1_msf : num [1:1397] 5 7 8 6 3 NA 3 8 8 5 ...
##  $ VM2_Rmc : num [1:1397] 9 10 10 10 7 8 7 9 10 8 ...
##  $ VM3_sm  : num [1:1397] 0 0 10 0 10 0 0 0 10 0 ...
##  $ VM4_Rcsm: num [1:1397] 3 4 4 3 3 3 4 3 4 3 ...
##  $ VM5_spm : num [1:1397] 0 0 0 0 10 10 0 0 0 0 ...
##  $ VH      : num [1:1397] 0 2 0 3 1 0 0 0 2 1 ...

Model Testing

First I hypothesize a model in which CM, DS, and VM are latent constructs. CM is the exogenous variable. VH is the DV of concern and it is a measured construct. So, I first test the measurement model for CM, DS, and MS. And then I test a model with the following hypotheses: (i) CM leads DS (ii) CM leads to VM (iii) CM leads to VH (iv) DS leads to VH (v) VM leads to VH (vi) DS leads to VM

library(lavaan)

## This is lavaan 0.6-9
## lavaan is FREE software! Please report any bugs.

model_measure <- '
#The measurement models
CM =~  CM1 + CM2 + CM3 + CM4 + CM5 
DS =~ DS1 + DS2 + DS3 + DS4 + DS5 + DS6
VM =~ VM1_msf + VM2_Rmc + VM3_sm + VM4_Rcsm + VM5_spm
'

fit_m <- cfa(model_measure, data = wd)
summary(fit_m, standardized = T, rsq = T, fit.measures = T)

## lavaan 0.6-9 ended normally after 93 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        35
##                                                       
##                                                   Used       Total
##   Number of observations                          1084        1397
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                               676.753
##   Degrees of freedom                               101
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              6048.464
##   Degrees of freedom                               120
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.903
##   Tucker-Lewis Index (TLI)                       0.885
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -29111.110
##   Loglikelihood unrestricted model (H1)     -28772.733
##                                                       
##   Akaike (AIC)                               58292.220
##   Bayesian (BIC)                             58466.814
##   Sample-size adjusted Bayesian (BIC)        58355.647
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.073
##   90 Percent confidence interval - lower         0.067
##   90 Percent confidence interval - upper         0.078
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.058
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   CM =~                                                                 
##     CM1               1.000                               0.763    0.529
##     CM2               0.720    0.058   12.473    0.000    0.550    0.485
##     CM3               1.149    0.077   14.996    0.000    0.877    0.643
##     CM4               1.465    0.087   16.932    0.000    1.119    0.836
##     CM5               1.657    0.099   16.735    0.000    1.265    0.803
##   DS =~                                                                 
##     DS1               1.000                               0.745    0.628
##     DS2               1.165    0.064   18.176    0.000    0.868    0.640
##     DS3               1.100    0.060   18.334    0.000    0.819    0.647
##     DS4               1.262    0.055   23.072    0.000    0.941    0.885
##     DS5               1.177    0.051   23.004    0.000    0.877    0.880
##     DS6               1.101    0.052   21.194    0.000    0.821    0.781
##   VM =~                                                                 
##     VM1_msf           1.000                               0.370    0.125
##     VM2_Rmc           0.234    0.206    1.135    0.257    0.087    0.048
##     VM3_sm            8.346    2.820    2.959    0.003    3.087    0.663
##     VM4_Rcsm         -0.395    0.144   -2.737    0.006   -0.146   -0.216
##     VM5_spm           6.838    2.260    3.026    0.002    2.529    0.592
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   CM ~~                                                                 
##     DS                0.257    0.027    9.570    0.000    0.451    0.451
##     VM                0.035    0.017    2.097    0.036    0.123    0.123
##   DS ~~                                                                 
##     VM                0.034    0.016    2.157    0.031    0.125    0.125
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .CM1               1.497    0.069   21.685    0.000    1.497    0.720
##    .CM2               0.984    0.045   22.027    0.000    0.984    0.765
##    .CM3               1.088    0.054   20.334    0.000    1.088    0.586
##    .CM4               0.537    0.041   13.165    0.000    0.537    0.300
##    .CM5               0.878    0.058   15.157    0.000    0.878    0.354
##    .DS1               0.851    0.039   21.870    0.000    0.851    0.605
##    .DS2               1.083    0.050   21.776    0.000    1.083    0.590
##    .DS3               0.931    0.043   21.719    0.000    0.931    0.581
##    .DS4               0.245    0.016   15.117    0.000    0.245    0.217
##    .DS5               0.223    0.014   15.457    0.000    0.223    0.225
##    .DS6               0.430    0.022   19.832    0.000    0.430    0.390
##    .VM1_msf           8.616    0.374   23.016    0.000    8.616    0.984
##    .VM2_Rmc           3.208    0.138   23.242    0.000    3.208    0.998
##    .VM3_sm           12.135    1.863    6.512    0.000   12.135    0.560
##    .VM4_Rcsm          0.435    0.019   22.446    0.000    0.435    0.953
##    .VM5_spm          11.883    1.307    9.092    0.000   11.883    0.650
##     CM                0.583    0.067    8.709    0.000    1.000    1.000
##     DS                0.555    0.050   11.126    0.000    1.000    1.000
##     VM                0.137    0.087    1.574    0.115    1.000    1.000
## 
## R-Square:
##                    Estimate
##     CM1               0.280
##     CM2               0.235
##     CM3               0.414
##     CM4               0.700
##     CM5               0.646
##     DS1               0.395
##     DS2               0.410
##     DS3               0.419
##     DS4               0.783
##     DS5               0.775
##     DS6               0.610
##     VM1_msf           0.016
##     VM2_Rmc           0.002
##     VM3_sm            0.440
##     VM4_Rcsm          0.047
##     VM5_spm           0.350

While all the items for CM and DS loaded well to their respective constructs, only two out of five items for VM loaded. Thus, I got rid the items that didn’t load.

model1<- '
#The measurement models
CM =~  CM1 + CM2 + CM3 + CM4 + CM5 
DS =~ DS1 + DS2 + DS3 + DS4 + DS5 + DS6
VM =~ VM3_sm + VM5_spm

#Regressions 
DS ~ a*CM
VH ~ b*DS + c*CM + e*VM
VM ~ d*CM + f*DS
ind1 := a*b
ind2 := d*e
ind3 := a*f
ind4 := a*f*e

#exogenous covariances
CM ~~ CM
'
fit1 <- sem(model1, data = wd)
summary(fit1, standardized = T, rsq = T, fit.measures = T)

## lavaan 0.6-9 ended normally after 81 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        33
##                                                       
##                                                   Used       Total
##   Number of observations                          1194        1397
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                               605.418
##   Degrees of freedom                                72
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              6924.940
##   Degrees of freedom                                91
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.922
##   Tucker-Lewis Index (TLI)                       0.901
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -28005.748
##   Loglikelihood unrestricted model (H1)     -27703.039
##                                                       
##   Akaike (AIC)                               56077.495
##   Bayesian (BIC)                             56245.303
##   Sample-size adjusted Bayesian (BIC)        56140.482
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.079
##   90 Percent confidence interval - lower         0.073
##   90 Percent confidence interval - upper         0.085
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.045
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   CM =~                                                                 
##     CM1               1.000                               0.761    0.525
##     CM2               0.700    0.054   12.880    0.000    0.532    0.475
##     CM3               1.151    0.073   15.725    0.000    0.875    0.646
##     CM4               1.489    0.084   17.739    0.000    1.133    0.842
##     CM5               1.681    0.096   17.497    0.000    1.278    0.804
##   DS =~                                                                 
##     DS1               1.000                               0.797    0.649
##     DS2               1.133    0.056   20.119    0.000    0.903    0.658
##     DS3               1.086    0.053   20.611    0.000    0.866    0.677
##     DS4               1.241    0.049   25.492    0.000    0.989    0.888
##     DS5               1.144    0.045   25.367    0.000    0.912    0.881
##     DS6               1.060    0.045   23.400    0.000    0.845    0.791
##   VM =~                                                                 
##     VM3_sm            1.000                               1.809    0.383
##     VM5_spm           2.357    1.475    1.598    0.110    4.264    0.981
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   DS ~                                                                  
##     CM         (a)    0.522    0.044   11.810    0.000    0.499    0.499
##   VH ~                                                                  
##     DS         (b)    0.865    0.090    9.644    0.000    0.689    0.344
##     CM         (c)    0.244    0.092    2.651    0.008    0.186    0.093
##     VM         (e)   -0.060    0.031   -1.959    0.050   -0.109   -0.054
##   VM ~                                                                  
##     CM         (d)    0.072    0.101    0.713    0.476    0.030    0.030
##     DS         (f)    0.187    0.143    1.305    0.192    0.082    0.082
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     CM                0.579    0.064    9.080    0.000    1.000    1.000
##    .CM1               1.519    0.066   22.845    0.000    1.519    0.724
##    .CM2               0.972    0.042   23.224    0.000    0.972    0.774
##    .CM3               1.067    0.050   21.386    0.000    1.067    0.582
##    .CM4               0.525    0.038   13.681    0.000    0.525    0.291
##    .CM5               0.896    0.055   16.142    0.000    0.896    0.354
##    .DS1               0.873    0.038   22.888    0.000    0.873    0.579
##    .DS2               1.067    0.047   22.809    0.000    1.067    0.567
##    .DS3               0.885    0.039   22.630    0.000    0.885    0.542
##    .DS4               0.263    0.016   16.226    0.000    0.263    0.212
##    .DS5               0.239    0.014   16.739    0.000    0.239    0.223
##    .DS6               0.427    0.020   20.817    0.000    0.427    0.374
##    .VM3_sm           18.978    2.178    8.713    0.000   18.978    0.853
##    .VM5_spm           0.718   11.306    0.063    0.949    0.718    0.038
##    .VH                3.373    0.140   24.051    0.000    3.373    0.842
##    .DS                0.477    0.040   11.900    0.000    0.751    0.751
##    .VM                3.238    2.049    1.581    0.114    0.990    0.990
## 
## R-Square:
##                    Estimate
##     CM1               0.276
##     CM2               0.226
##     CM3               0.418
##     CM4               0.709
##     CM5               0.646
##     DS1               0.421
##     DS2               0.433
##     DS3               0.458
##     DS4               0.788
##     DS5               0.777
##     DS6               0.626
##     VM3_sm            0.147
##     VM5_spm           0.962
##     VH                0.158
##     DS                0.249
##     VM                0.010
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ind1              0.452    0.056    8.060    0.000    0.344    0.172
##     ind2             -0.004    0.006   -0.671    0.502   -0.003   -0.002
##     ind3              0.098    0.075    1.300    0.194    0.041    0.041
##     ind4             -0.006    0.005   -1.080    0.280   -0.004   -0.002

#visualization 
library(semPlot)

## Warning: package 'semPlot' was built under R version 4.1.3

semPlot::semPaths(fit1, edge.color = "green")

##Alternative Model

Next, I test a second model in which only CM and DS are the latent constructs. Here I see how age, education, and gender relate to the prior model. These three are the exogenous variables. Model hypotheses: Age, Edu, and Gender each predict both CM and DS Age, Edu, and Gender also each predict both MS and VH CM and DS each predict both MS and VH I am ignoring indirect effects.

wd$MS <- (wd$VM3_sm + wd$VM5_spm)/2
#MS = Misinformation Sharing
model2 <- '
#The measurement models
CM =~  CM1 + CM2 + CM3 + CM4 + CM5 
DS =~ DS1 + DS2 + DS3 + DS4 + DS5 + DS6

#regressions 
MS ~ Age + Edu + Gender + CM + DS 
VH ~ Age + Edu + Gender + CM + DS
CM ~ Age + Edu + Gender
DS ~ Age + Edu + Gender

#exogenous variances and covariances
Age ~~ Age
Edu ~~ Edu
Gender ~~ Gender
Age ~~ Edu
Age ~~ Gender
Gender ~~ Edu
'
fit2 <- sem(model2, data = wd)
summary(fit2, standardized = T, rsq = T, fit.measures = T)

## lavaan 0.6-9 ended normally after 61 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        47
##                                                       
##                                                   Used       Total
##   Number of observations                          1186        1397
##                                                                   
## Model Test User Model:
##                                                       
##   Test statistic                               894.572
##   Degrees of freedom                                89
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              6902.813
##   Degrees of freedom                               120
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.881
##   Tucker-Lewis Index (TLI)                       0.840
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -28967.328
##   Loglikelihood unrestricted model (H1)             NA
##                                                       
##   Akaike (AIC)                               58028.655
##   Bayesian (BIC)                             58267.337
##   Sample-size adjusted Bayesian (BIC)        58118.048
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.087
##   90 Percent confidence interval - lower         0.082
##   90 Percent confidence interval - upper         0.093
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.122
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   CM =~                                                                 
##     CM1               1.000                               0.765    0.527
##     CM2               0.708    0.055   12.983    0.000    0.541    0.483
##     CM3               1.142    0.073   15.643    0.000    0.873    0.644
##     CM4               1.498    0.085   17.681    0.000    1.146    0.852
##     CM5               1.657    0.095   17.362    0.000    1.267    0.794
##   DS =~                                                                 
##     DS1               1.000                               0.791    0.643
##     DS2               1.129    0.057   19.691    0.000    0.893    0.650
##     DS3               1.087    0.054   20.231    0.000    0.859    0.671
##     DS4               1.260    0.050   25.196    0.000    0.996    0.893
##     DS5               1.163    0.046   25.069    0.000    0.919    0.886
##     DS6               1.069    0.046   23.061    0.000    0.845    0.790
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   MS ~                                                                  
##     Age              -0.088    0.098   -0.893    0.372   -0.088   -0.027
##     Edu               0.192    0.094    2.040    0.041    0.192    0.061
##     Gender            0.303    0.204    1.484    0.138    0.303    0.043
##     CM                0.290    0.162    1.788    0.074    0.221    0.059
##     DS                0.260    0.146    1.780    0.075    0.206    0.055
##   VH ~                                                                  
##     Age              -0.107    0.049   -2.197    0.028   -0.107   -0.062
##     Edu               0.038    0.046    0.820    0.412    0.038    0.023
##     Gender           -0.172    0.101   -1.706    0.088   -0.172   -0.047
##     CM                0.298    0.081    3.683    0.000    0.228    0.116
##     DS                0.891    0.079   11.346    0.000    0.704    0.357
##   CM ~                                                                  
##     Age               0.130    0.021    6.064    0.000    0.169    0.194
##     Edu              -0.125    0.020   -6.127    0.000   -0.164   -0.196
##     Gender           -0.050    0.044   -1.152    0.249   -0.066   -0.035
##   DS ~                                                                  
##     Age               0.091    0.021    4.334    0.000    0.115    0.131
##     Edu              -0.067    0.020   -3.369    0.001   -0.085   -0.102
##     Gender            0.060    0.044    1.353    0.176    0.076    0.041
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Age ~~                                                                
##     Edu              -0.061    0.040   -1.538    0.124   -0.061   -0.045
##     Gender            0.059    0.018    3.271    0.001    0.059    0.095
##   Edu ~~                                                                
##     Gender           -0.048    0.019   -2.566    0.010   -0.048   -0.075
##  .MS ~~                                                                 
##    .VH               -0.395    0.201   -1.966    0.049   -0.395   -0.058
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     Age               1.316    0.054   24.352    0.000    1.316    1.000
##     Edu               1.430    0.059   24.352    0.000    1.430    1.000
##     Gender            0.289    0.012   24.352    0.000    0.289    1.000
##    .CM1               1.523    0.067   22.717    0.000    1.523    0.723
##    .CM2               0.965    0.042   23.065    0.000    0.965    0.767
##    .CM3               1.077    0.051   21.266    0.000    1.077    0.585
##    .CM4               0.496    0.040   12.465    0.000    0.496    0.274
##    .CM5               0.938    0.058   16.208    0.000    0.938    0.369
##    .DS1               0.886    0.039   22.864    0.000    0.886    0.586
##    .DS2               1.091    0.048   22.810    0.000    1.091    0.578
##    .DS3               0.901    0.040   22.620    0.000    0.901    0.550
##    .DS4               0.252    0.016   15.656    0.000    0.252    0.202
##    .DS5               0.231    0.014   16.255    0.000    0.231    0.215
##    .DS6               0.430    0.021   20.783    0.000    0.430    0.376
##    .MS               13.977    0.574   24.330    0.000   13.977    0.990
##    .VH                3.362    0.140   23.979    0.000    3.362    0.862
##    .CM                0.539    0.060    9.042    0.000    0.921    0.921
##    .DS                0.605    0.050   12.008    0.000    0.968    0.968
## 
## R-Square:
##                    Estimate
##     CM1               0.277
##     CM2               0.233
##     CM3               0.415
##     CM4               0.726
##     CM5               0.631
##     DS1               0.414
##     DS2               0.422
##     DS3               0.450
##     DS4               0.798
##     DS5               0.785
##     DS6               0.624
##     MS                0.010
##     VH                0.138
##     CM                0.079
##     DS                0.032

This was not as good a model in terms of fitness as was model1.

semPlot::semPaths(fit2, edge.color = "blue")

##Discussions (and especially limitations)

Normality test Sample: Australian sample, disproportionate number of the educated and the elderly Vaccine hesitant people only 24% of the sample. MS construct purely speculative/No validation.

What Underlies Vaccine Hesitancy

Sanjay Chhetri

2022-04-25

Data Source

Data Prep

Model Testing