Data Analysis and Visualization
Paper 2

Author

Gagan Atreya

Published

November 25, 2023

Section 1. Data importation and cleaning

Display code

## clear environment:
rm(list=ls())
options(digits = 2)

## Correct asterisk pattern for stargazer tables:
starpattern <- "<em>&#42;p&lt;0.05;&nbsp;&#42;&#42;p&lt;0.01;&nbsp;&#42;&#42;&#42;p&lt;0.001</em>"

## Install "pacman" package if not installed
# (remove the # symbol from the line below):
# install.packages("pacman")

## Load R packages:
pacman::p_load(data.table, tidyverse, haven, labelled, vtable, 
               psych, scales, weights, clipr, forcats,
               stargazer, ggthemes, ggcharts, geomtextpath,
               corrplot, tm, readxl, patchwork, modelsummary,
               gt, lme4, car, lmerTest, lavaan,
               ggeffects, magrittr, broom, broom.mixed,
               backports, effects, interactions, plyr, sjPlot)

## Import datasets. These are cleaned datasets with variables created and named in the individual country R scripts:

# Gambia:
dsgmb <- fread("~/Dropbox/2023_Gagan/Paper02/data/dsgmb.csv")

# Pakistan:
dspak <- fread("~/Dropbox/2023_Gagan/Paper02/data/dspak.csv")

# Tanzania:
dstza <- fread("~/Dropbox/2023_Gagan/Paper02/data/dstza.csv")

# Uganda:
dsuga <- fread("~/Dropbox/2023_Gagan/Paper02/data/dsuga.csv")

# Combine into one dataset:
ds <- rbind(dsgmb, dspak, dstza, dsuga)

## Clean demographic variables: 

## gender:
ds$gender <- ifelse(ds$gender == "Male", "Male",
             ifelse(ds$gender == "Female", "Female", NA))

## ses:
ds$ses <- ifelse(ds$ses == "", NA, ds$ses)

## Religion:
ds$religion <- ifelse(ds$religion == "", NA, ds$religion)

ds$religion <- ifelse(ds$religion == "Christian (Catholic)", "Christian: Catholic", 
               ifelse(ds$religion == "Christian (Protestant)", "Christian: Protestant",
               ifelse(ds$religion == "Muslim (Shia)", "Muslim: Shia",
               ifelse(ds$religion == "Muslim (Sunni)", "Muslim: Sunni", ds$religion))))

## Marital status
ds$married <- ifelse(ds$married == "Not married", "Unmarried", ds$married)
ds$married <- ifelse(ds$married == "", NA, ds$married)

## Separate into individual country datasets:
dsgmb <- ds[ds$country == "Gambia", ]
dspak <- ds[ds$country == "Pakistan", ]
dstza <- ds[ds$country == "Tanzania", ]
dsuga <- ds[ds$country == "Uganda", ]

Missing data: Gambia

Display code

missing_ds <- map(dsgmb, ~mean(is.na(.))*100) 
missing_ds <- as.data.frame(missing_ds)
missing_ds <- as_tibble(cbind(variable = names(missing_ds), t(missing_ds)))
missing_ds$percent_missing <- missing_ds$V2
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)

missing_ds <- select(missing_ds,-c(2))
missing_ds <- as.data.frame(missing_ds)
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)
missing_ds <- as.data.table(missing_ds)

missing_ds_01 <- missing_ds[1:31,]
missing_ds_02 <- missing_ds[32:61,]

lp01 <- missing_ds_01 %>%
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "Gambia")+
  theme_bw()

lp02 <- missing_ds_02 %>% 
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "")+
  theme_bw()

lp01| lp02

Missing data: Pakistan

Display code

missing_ds <- map(dspak, ~mean(is.na(.))*100) 
missing_ds <- as.data.frame(missing_ds)
missing_ds <- as_tibble(cbind(variable = names(missing_ds), t(missing_ds)))
missing_ds$percent_missing <- missing_ds$V2
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)

missing_ds <- select(missing_ds,-c(2))
missing_ds <- as.data.frame(missing_ds)
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)
missing_ds <- as.data.table(missing_ds)

missing_ds_01 <- missing_ds[1:31,]
missing_ds_02 <- missing_ds[32:61,]

lp01 <- missing_ds_01 %>%
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "Pakistan")+
  theme_bw()

lp02 <- missing_ds_02 %>% 
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "")+
  theme_bw()

lp01| lp02

Missing data: Tanzania

Display code

missing_ds <- map(dstza, ~mean(is.na(.))*100) 
missing_ds <- as.data.frame(missing_ds)
missing_ds <- as_tibble(cbind(variable = names(missing_ds), t(missing_ds)))
missing_ds$percent_missing <- missing_ds$V2
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)

missing_ds <- select(missing_ds,-c(2))
missing_ds <- as.data.frame(missing_ds)
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)
missing_ds <- as.data.table(missing_ds)

missing_ds_01 <- missing_ds[1:31,]
missing_ds_02 <- missing_ds[32:61,]

lp01 <- missing_ds_01 %>%
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "Tanzania")+
  theme_bw()

lp02 <- missing_ds_02 %>% 
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "")+
  theme_bw()

lp01| lp02

Missing data: Uganda

Display code

missing_ds <- map(dsuga, ~mean(is.na(.))*100) 
missing_ds <- as.data.frame(missing_ds)
missing_ds <- as_tibble(cbind(variable = names(missing_ds), t(missing_ds)))
missing_ds$percent_missing <- missing_ds$V2
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)

missing_ds <- select(missing_ds,-c(2))
missing_ds <- as.data.frame(missing_ds)
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)
missing_ds <- as.data.table(missing_ds)

missing_ds_01 <- missing_ds[1:31,]
missing_ds_02 <- missing_ds[32:61,]

lp01 <- missing_ds_01 %>%
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "Uganda")+
  theme_bw()

lp02 <- missing_ds_02 %>% 
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "")+
  theme_bw()

lp01| lp02

Missing data: full dataset

Display code

missing_ds <- map(ds, ~mean(is.na(.))*100) 
missing_ds <- as.data.frame(missing_ds)
missing_ds <- as_tibble(cbind(variable = names(missing_ds), t(missing_ds)))
missing_ds$percent_missing <- missing_ds$V2
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)

missing_ds <- select(missing_ds,-c(2))
missing_ds <- as.data.frame(missing_ds)
missing_ds$percent_missing <- as.numeric(missing_ds$percent_missing)
missing_ds <- as.data.table(missing_ds)

missing_ds_01 <- missing_ds[1:31,]
missing_ds_02 <- missing_ds[32:61,]

lp01 <- missing_ds_01 %>%
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "Full dataset")+
  theme_bw()

lp02 <- missing_ds_02 %>% 
  lollipop_chart(x = variable,
                 y = percent_missing,
               line_color = "black",
               point_color = "black")+
  labs(y = "% missing data",
       x = "",
       title = "")+
  theme_bw()

lp01| lp02

Section 2: Demographics

Demographic variables in the analysis:

Age
Gender
Socio-economic status
Religious affiliation
Education (NEEDS MAJOR WORK)
Marital status
Ethnicity

Variable: Sample size by Country

Display code

tbl01 <- table(ds$country)

## Table of user language by country:
tbl01


  Gambia Pakistan Tanzania   Uganda 
     237      505      352      448

Display code

## Sample size by country:

lp01 <- ds %>% 
  # drop_na(Country) %>%
  lollipop_chart(x = country,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Sample size by country")+
  theme_bw()

lp01

Variable: Age

Display code

summary(ds$age)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     18      26      33      37      45      92      29

Display code

ds %>% drop_na(age)%>%
ggplot(aes(x = age))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 37.00", 
                 xintercept = 37.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Age", 
       y = "Frequency", 
       title = "Age distribution (full sample)")+
  theme_bw()

Display code

ds %>% drop_na(age)%>%
ggplot(aes(x = age))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  labs(x = "Age", 
       y = "Frequency", 
       title = "Age distribution by country")+
  facet_wrap(~country, nrow = 2)+
  theme_bw()

Variable: Gender

Display code

lp02 <- ds %>% 
  drop_na(gender, age) %>%
lollipop_chart(x = gender,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gender distribution (full sample)")+
#  facet_wrap(~Country, nrow = 2)+
  theme_bw()

lp02

Display code

bp01 <- ds %>% drop_na(gender, age) %>% 
  ggplot(aes(y = age, 
             x = gender))+
geom_boxplot(fill = "grey")+
  labs(y = "Age",
       x = "",
       title = "Age and gender distribution by country")+
  facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

bp01

Variable: Socio-economic status

Display code

ds %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Socioeconomic status (full sample)")+
  theme_bw()

Display code

sesgmb <- dsgmb %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gambia")+
  theme_bw()


sespak <- dspak %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Pakistan")+
  theme_bw()


sestza <- dstza %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Tanzania")+
  theme_bw()

sesuga <- dsuga %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Uganda")+
  theme_bw()

## All four plots together:

sesplot <- (sesgmb | sespak) / (sestza | sesuga) 

sesplot + plot_annotation("Socio-economic status by country")

Variable: Marital status

Display code

ds %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Marital status (full sample)")+
  theme_bw()

Display code

maritalgmb <- dsgmb %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gambia")+
  theme_bw()


maritalpak <- dspak %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Pakistan")+
  theme_bw()


maritaltza <- dstza %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Tanzania")+
  theme_bw()

maritaluga <- dsuga %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Uganda")+
  theme_bw()

## All four plots together:

maritalplot <- (maritalgmb | maritalpak) / (maritaltza | maritaluga) 

maritalplot + plot_annotation("Marital status by country")

Variable: Religious affiliation

Display code

lp05 <- ds %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Religious distribution (full sample)")+
  theme_bw()

lp05

Display code

religiongmb <- dsgmb %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gambia")+
  theme_bw()


religionpak <- dspak %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Pakistan")+
  theme_bw()


religiontza <- dstza %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Tanzania")+
  theme_bw()

religionuga <- dsuga %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Uganda")+
  theme_bw()

## All four plots together:

religionplot <- (religiongmb | religionpak) / (religiontza | religionuga) 

religionplot + plot_annotation("Religious affiliation by country")

Variable: Education

EDUCATION VARIABLE NEEDS A LOT OF WORK. SPECIFICALLY FOR GAMBIA AND TANZANIA

Display code

# table(dsgmb$education)
# table(dspak$education)
# table(dstza$education)
# table(dsuga$education)

Variable: Ethnicity

Display code

## Gambia:
eth <- as.data.frame(table(dsgmb$ethnicity))
eth$Var1 <- as.character(eth$Var1)
eth$ethnicity <- ifelse(eth$Freq < 2, "Other", eth$Var1)
l1 <- as.list(eth$ethnicity)

dsgmb2 <- dsgmb[, c("ethnicity")]

dsgmb2$ethnicity <- ifelse(dsgmb2$ethnicity %in% l1, dsgmb2$ethnicity, "Other")
dsgmb2$ethnicity <- ifelse(dsgmb2$ethnicity == "Serere", "Serer",
                 ifelse(dsgmb2$ethnicity == "", NA, dsgmb2$ethnicity))

ethgmb <- dsgmb2 %>% drop_na(ethnicity) %>%
lollipop_chart(x = ethnicity,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gambia")+
  theme_bw()

## Pakistan:
eth <- as.data.frame(table(dspak$ethnicity))
eth$Var1 <- as.character(eth$Var1)
eth$ethnicity <- ifelse(eth$Freq < 7, "Other", eth$Var1)
l1 <- as.list(eth$ethnicity)

dspak2 <- dspak[, c("ethnicity")]

dspak2$ethnicity <- ifelse(dspak2$ethnicity %in% l1, dspak2$ethnicity, "Other")
dspak2$ethnicity <- ifelse(dspak2$ethnicity == "Urduspeaking", "Urdu speaking",
                 ifelse(dspak2$ethnicity == "", NA, dspak2$ethnicity))

ethpak <- dspak2 %>% drop_na(ethnicity) %>%
lollipop_chart(x = ethnicity,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Pakistan")+
  theme_bw()

## Tanzania:
eth <- as.data.frame(table(dstza$ethnicity))
eth$Var1 <- as.character(eth$Var1)
eth$ethnicity <- ifelse(eth$Freq < 7, "Other", eth$Var1)
l1 <- as.list(eth$ethnicity)

dstza2 <- dstza[, c("ethnicity")]

dstza2$ethnicity <- ifelse(dstza2$ethnicity %in% l1, dstza2$ethnicity, "Other")
dstza2$ethnicity <- ifelse(dstza2$ethnicity == "Serere", "Serer",
                 ifelse(dstza2$ethnicity == "", NA, dstza2$ethnicity))

ethtza <- dstza2 %>% drop_na(ethnicity) %>%
lollipop_chart(x = ethnicity,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Tanzania")+
  theme_bw()

## Uganda:
eth <- as.data.frame(table(dsuga$ethnicity))
eth$Var1 <- as.character(eth$Var1)
eth$ethnicity <- ifelse(eth$Freq < 7, "Other", eth$Var1)
l1 <- as.list(eth$ethnicity)

dsuga2 <- dsuga[, c("ethnicity")]

dsuga2$ethnicity <- ifelse(dsuga2$ethnicity %in% l1, dsuga2$ethnicity, "Other")
dsuga2$ethnicity <- ifelse(dsuga2$ethnicity == "japhadollah", "Japhadollah",
                    ifelse(dsuga2$ethnicity == "Atesot", "Iteso",
                    ifelse(dsuga2$ethnicity == "Etesot", "Iteso",
                    ifelse(dsuga2$ethnicity == "Itesot", "Iteso",
                    ifelse(dsuga2$ethnicity == "", NA, dsuga2$ethnicity)))))

ethuga <- dsuga2 %>% drop_na(ethnicity) %>%
lollipop_chart(x = ethnicity,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Uganda")+
  theme_bw()

## All four plots together:

ethplot <- (ethgmb | ethpak) / (ethtza | ethuga) 

ethplot + plot_annotation("Ethnic distribution by country")

Section 3. Measures of interest

Variables of interest in this analysis:

Endorsement of Barrier Bound Leadership (BBL)
Endorsement of Barrier Crossing Leadership (BCL)
Imagistic items:
- Event: Positive affect
- Event: Negative affect
- Event: Episodic recall
- Event: Shared perception
- Event: Reflection
- Event: Transformative for individual
- Event: Transformative for group
Ingroup fusion
Ingroup identification
Outgroup fusion
Outgroup identification
Perception of religious freedom
Experience of religious freedom
Empathic concern
Perspective taking
Perceived history of discrimination
Frequency of positive contact with OG
Frequency of negative contact with OG
Outgroup cooperation
Outgroup hostility
Willingness to fight outgroup
Outgroup affect (made up of five sub-items)

Section 3b: Outgroup attitudinal measures

Perceived history of discrimination

Display code

# single item measure

ogp01 <- ds %>% drop_na(history_discrimination) %>% 
  ggplot(aes(y = history_discrimination, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Perceived discrimination",
       x = "",
       title = "Perceived history of discrimination by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp01

Positive contact with outgroup:

Display code

# single item measure

ogp02 <- ds %>% drop_na(freq_positive_contact) %>% 
  ggplot(aes(y = freq_positive_contact, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Frequency",
       x = "",
       title = "Positive contact with outgroup by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp02

Negative contact with outgroup:

Display code

# single item measure

ogp03 <- ds %>% drop_na(freq_negative_contact) %>% 
  ggplot(aes(y = freq_negative_contact, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Frequency",
       x = "",
       title = "Negative contact with outgroup by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp03

Willingness to fight outgroup:

Display code

# single item measure

ogp04 <- ds %>% drop_na(fight_outgroup) %>% 
  ggplot(aes(y = fight_outgroup, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Willingness",
       x = "",
       title = "Willingness to fight outgroup by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp04

Outgroup hostility:

Display code

# 2 items make up full scale:
# og_host_01
# og_host_02

ogp05 <- ds %>% drop_na(og_hostility) %>% 
  ggplot(aes(y = og_hostility, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Hostility",
       x = "",
       title = "Outgroup hostility by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp05

Outgroup cooperation:

Display code

# 2 items make up full scale:
# og_coop_01
# og_coop_02

ogp06 <- ds %>% drop_na(og_cooperation) %>% 
  ggplot(aes(y = og_cooperation, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Cooperation",
       x = "",
       title = "Outgroup cooperation by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp06

Outgroup affect:

Display code

# 5 items make up full scale:

#NegativeToPositive
#HostileToFriendly
#SuspiciousToTrusting
#ContemptToRespect
#ThreatenedToRelaxed


ogp07 <- ds %>% drop_na(og_affect) %>% 
  ggplot(aes(y = og_affect, 
             x = country))+
geom_boxplot(fill = "grey")+
  labs(y = "Affect",
       x = "",
       title = "Outgroup affect by country")+
  #facet_wrap(~country, nrow = 2)+
  coord_flip()+
  theme_bw()

ogp07

Section 4: TO DO

Replicate Table 2 from paper 1 for all of the above variables
- This is quite easy. Copy R code from one of the individual country Rpubs files.
Replicate Figure 6 from Paper 1
- This will take a bit of work.
Replicate Table 3 from paper 1 (CFA for BCL and BBL)
- Done. See section 5 below
Replicate Table 4 from paper 1 (CFA for imagistic items)
Replicate Table 5 from paper 1 (Standardized Loadings from Three Factor EFA)
Replicate Table 6 from paper 1 (CFA for Fusion/Identification measures)
- Done. See sections 6 and 7 below
Everything else from Table 6 onwards…

Section 5: EFA and CFA: BCL and BBL

EFA:

Display code

leadership <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03, 
                               ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03)

leadership <- na.omit(leadership)

scree(leadership)

Display code

fit02 <- factanal(leadership, 2, rotation="promax")
fit02


Call:
factanal(x = leadership, factors = 2, rotation = "promax")

Uniquenesses:
ds$ENDBCL01 ds$ENDBCL02 ds$ENDBCL03 ds$ENDBBL01 ds$ENDBBL02 ds$ENDBBL03 
       0.66        0.38        0.53        0.49        0.41        0.33 

Loadings:
            Factor1 Factor2
ds$ENDBCL01          0.578 
ds$ENDBCL02          0.785 
ds$ENDBCL03          0.684 
ds$ENDBBL01  0.711         
ds$ENDBBL02  0.766         
ds$ENDBBL03  0.818         

               Factor1 Factor2
SS loadings       1.76    1.42
Proportion Var    0.29    0.24
Cumulative Var    0.29    0.53

Factor Correlations:
        Factor1 Factor2
Factor1   1.000  -0.038
Factor2  -0.038   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 2.1 on 4 degrees of freedom.
The p-value is 0.72

CFA:

Display code

## Remove 'ds$END' from variable names:
names(leadership) <- sub(".*\\END", "", names(leadership))

#correlated two factor solution, marker method
twofacs <- 'BCL =~ BCL01+BCL02+BCL03
            BBL =~ BBL01+BBL02+BBL03' 

cfa02 <- cfa(twofacs, 
             data=leadership, 
             std.lv=TRUE) 

summary(cfa02,
        fit.measures=TRUE,
        standardized=TRUE)

lavaan 0.6.16 ended normally after 25 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        13

  Number of observations                          1487

Model Test User Model:
                                                      
  Test statistic                                18.951
  Degrees of freedom                                 8
  P-value (Chi-square)                           0.015

Model Test Baseline Model:

  Test statistic                              2408.619
  Degrees of freedom                                15
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.995
  Tucker-Lewis Index (TLI)                       0.991

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -16240.095
  Loglikelihood unrestricted model (H1)     -16230.619
                                                      
  Akaike (AIC)                               32506.189
  Bayesian (BIC)                             32575.148
  Sample-size adjusted Bayesian (SABIC)      32533.851

Root Mean Square Error of Approximation:

  RMSEA                                          0.030
  90 Percent confidence interval - lower         0.013
  90 Percent confidence interval - upper         0.048
  P-value H_0: RMSEA <= 0.050                    0.966
  P-value H_0: RMSEA >= 0.080                    0.000

Standardized Root Mean Square Residual:

  SRMR                                           0.023

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  BCL =~                                                                
    BCL01             0.872    0.042   20.553    0.000    0.872    0.580
    BCL02             1.175    0.045   26.213    0.000    1.175    0.786
    BCL03             0.980    0.042   23.466    0.000    0.980    0.682
  BBL =~                                                                
    BBL01             1.401    0.050   28.018    0.000    1.401    0.705
    BBL02             1.460    0.048   30.687    0.000    1.460    0.768
    BBL03             1.666    0.051   32.906    0.000    1.666    0.819

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  BCL ~~                                                                
    BBL              -0.046    0.033   -1.398    0.162   -0.046   -0.046

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .BCL01             1.501    0.068   21.974    0.000    1.501    0.663
   .BCL02             0.853    0.080   10.700    0.000    0.853    0.382
   .BCL03             1.107    0.065   16.990    0.000    1.107    0.536
   .BBL01             1.982    0.096   20.597    0.000    1.982    0.503
   .BBL02             1.481    0.087   16.949    0.000    1.481    0.410
   .BBL03             1.359    0.102   13.301    0.000    1.359    0.329
    BCL               1.000                               1.000    1.000
    BBL               1.000                               1.000    1.000

Section 6: EFA and CFA: Ingroup Fusion and Identification

EFA

Display code

## IG IGbonds dataframe:
IGbonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03,
                            ds$IGI01, ds$IGI02, ds$IGI03)

names(IGbonds) <- sub('ds\\$', '', names(IGbonds))

IGbonds <- na.omit(IGbonds)

scree(IGbonds)

Display code

fit02 <- factanal(IGbonds, 2, rotation="promax")
fit02


Call:
factanal(x = IGbonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 
 0.55  0.27  0.54  0.65  0.36  0.41 

Loadings:
      Factor1 Factor2
IGF01  0.138   0.555 
IGF02          0.930 
IGF03          0.601 
IGI01  0.519         
IGI02  0.870         
IGI03  0.761         

               Factor1 Factor2
SS loadings       1.64    1.55
Proportion Var    0.27    0.26
Cumulative Var    0.27    0.53

Factor Correlations:
        Factor1 Factor2
Factor1   1.000  -0.776
Factor2  -0.776   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 5.5 on 4 degrees of freedom.
The p-value is 0.23

CFA

Display code

#correlated two factor solution, marker method
twofacs <- 'IGFusion =~ IGF01+IGF02+IGF03
            IGIdentification =~ IGI01+IGI02+IGI03' 

cfa02 <- cfa(twofacs, 
             data=IGbonds, 
             std.lv=TRUE) 

summary(cfa02,
        fit.measures=TRUE,
        standardized=TRUE)

lavaan 0.6.16 ended normally after 18 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        13

  Number of observations                          1475

Model Test User Model:
                                                      
  Test statistic                                23.032
  Degrees of freedom                                 8
  P-value (Chi-square)                           0.003

Model Test Baseline Model:

  Test statistic                              3096.744
  Degrees of freedom                                15
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.995
  Tucker-Lewis Index (TLI)                       0.991

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -12303.905
  Loglikelihood unrestricted model (H1)     -12292.388
                                                      
  Akaike (AIC)                               24633.809
  Bayesian (BIC)                             24702.662
  Sample-size adjusted Bayesian (SABIC)      24661.365

Root Mean Square Error of Approximation:

  RMSEA                                          0.036
  90 Percent confidence interval - lower         0.019
  90 Percent confidence interval - upper         0.053
  P-value H_0: RMSEA <= 0.050                    0.907
  P-value H_0: RMSEA >= 0.080                    0.000

Standardized Root Mean Square Residual:

  SRMR                                           0.016

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                      Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  IGFusion =~                                                              
    IGF01                0.741    0.027   27.415    0.000    0.741    0.689
    IGF02                0.915    0.027   33.423    0.000    0.915    0.807
    IGF03                0.967    0.035   27.746    0.000    0.967    0.696
  IGIdentification =~                                                      
    IGI01                0.778    0.033   23.390    0.000    0.778    0.606
    IGI02                0.837    0.026   31.754    0.000    0.837    0.776
    IGI03                0.796    0.025   32.147    0.000    0.796    0.783

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  IGFusion ~~                                                           
    IGIdentificatn    0.790    0.018   43.038    0.000    0.790    0.790

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .IGF01             0.608    0.028   21.603    0.000    0.608    0.525
   .IGF02             0.447    0.029   15.672    0.000    0.447    0.348
   .IGF03             0.998    0.047   21.380    0.000    0.998    0.516
   .IGI01             1.045    0.044   23.663    0.000    1.045    0.633
   .IGI02             0.464    0.026   17.644    0.000    0.464    0.398
   .IGI03             0.399    0.023   17.174    0.000    0.399    0.386
    IGFusion          1.000                               1.000    1.000
    IGIdentificatn    1.000                               1.000    1.000

Section 7: EFA and CFA: Outgroup Fusion and Identification

EFA

Display code

## OG OGbonds dataframe:
OGbonds <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03,
                            ds$OGI01, ds$OGI02, ds$OGI03)

names(OGbonds) <- sub('ds\\$', '', names(OGbonds))

OGbonds <- na.omit(OGbonds)

scree(OGbonds)

Display code

fit02 <- factanal(OGbonds, 2, rotation="promax")
fit02


Call:
factanal(x = OGbonds, factors = 2, rotation = "promax")

Uniquenesses:
OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.45  0.27  0.30  0.46  0.23  0.18 

Loadings:
      Factor1 Factor2
OGF01  0.635   0.127 
OGF02  0.886         
OGF03  0.806         
OGI01  0.227   0.539 
OGI02          0.862 
OGI03          0.873 

               Factor1 Factor2
SS loadings       1.89    1.81
Proportion Var    0.32    0.30
Cumulative Var    0.32    0.62

Factor Correlations:
        Factor1 Factor2
Factor1   1.000   0.802
Factor2   0.802   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 24 on 4 degrees of freedom.
The p-value is 0.000098

CFA

Display code

#correlated two factor solution, marker method
twofacs <- 'OGFusion =~ OGF01+OGF02+OGF03
            OGIdentification =~ OGI01+OGI02+OGI03' 

cfa02 <- cfa(twofacs, 
             data=OGbonds, 
             std.lv=TRUE) 

summary(cfa02,
        fit.measures=TRUE,
        standardized=TRUE)

lavaan 0.6.16 ended normally after 20 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        13

  Number of observations                          1440

Model Test User Model:
                                                      
  Test statistic                                52.168
  Degrees of freedom                                 8
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              5435.930
  Degrees of freedom                                15
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.992
  Tucker-Lewis Index (TLI)                       0.985

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -14710.228
  Loglikelihood unrestricted model (H1)     -14684.144
                                                      
  Akaike (AIC)                               29446.456
  Bayesian (BIC)                             29514.997
  Sample-size adjusted Bayesian (SABIC)      29473.701

Root Mean Square Error of Approximation:

  RMSEA                                          0.062
  90 Percent confidence interval - lower         0.047
  90 Percent confidence interval - upper         0.078
  P-value H_0: RMSEA <= 0.050                    0.098
  P-value H_0: RMSEA >= 0.080                    0.035

Standardized Root Mean Square Residual:

  SRMR                                           0.019

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                      Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  OGFusion =~                                                              
    OGF01                1.412    0.044   32.015    0.000    1.412    0.752
    OGF02                1.487    0.040   37.478    0.000    1.487    0.839
    OGF03                1.524    0.040   37.696    0.000    1.524    0.842
  OGIdentification =~                                                      
    OGI01                1.320    0.042   31.694    0.000    1.320    0.739
    OGI02                1.600    0.040   40.328    0.000    1.600    0.871
    OGI03                1.625    0.038   42.805    0.000    1.625    0.904

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  OGFusion ~~                                                           
    OGIdentificatn    0.841    0.012   69.956    0.000    0.841    0.841

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .OGF01             1.534    0.069   22.233    0.000    1.534    0.435
   .OGF02             0.932    0.051   18.238    0.000    0.932    0.297
   .OGF03             0.953    0.053   18.008    0.000    0.953    0.291
   .OGI01             1.451    0.061   23.601    0.000    1.451    0.454
   .OGI02             0.817    0.046   17.666    0.000    0.817    0.242
   .OGI03             0.588    0.041   14.219    0.000    0.588    0.182
    OGFusion          1.000                               1.000    1.000
    OGIdentificatn    1.000                               1.000    1.000

Section 8: EFA and CFA: Imagistic items (after removing certain items)

EFA

Display code

imagistic <- cbind.data.frame(ds$event_episodic_recall_01, 
                              ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, 
                              ds$event_shared_perception_02,
                              ds$event_reflection_01, 
                              ds$event_reflection_02,
                              ds$event_transformative_indiv_01,
                              #ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01
                              #ds$event_transformative_group_02
                              )

Section 8: EFA: Imagistic items

Display code

imagistic <- cbind.data.frame(ds$event_episodic_recall_01, 
                              ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, 
                              ds$event_shared_perception_02,
                              ds$event_reflection_01, 
                              ds$event_reflection_02,
                              #ds$event_transformative_indiv_01,
                              ds$event_transformative_indiv_02,
                              #ds$event_transformative_group_01,
                              ds$event_transformative_group_02
                              )
names(imagistic) <- sub('ds\\$event\\_', '', names(imagistic))

imagistic <- na.omit(imagistic)

mtx <- cor(imagistic[, c(1:8)])

Correlation plot:

Display code

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("darkred", "red", "red",
               "darkgrey", "blue", "darkblue"))

Scree plot:

Display code

scree(imagistic)

Four factor model:

Display code

fit04 <- factanal(imagistic, 4, rotation="promax")

print(fit04$loadings, cutoff = 0.35)


Loadings:
                        Factor1 Factor2 Factor3 Factor4
episodic_recall_01               0.716                 
episodic_recall_02               0.801                 
shared_perception_01                     0.607         
shared_perception_02                     0.866         
reflection_01            0.867                         
reflection_02            0.810                         
transformative_indiv_02                          0.483 
transformative_group_02                          0.859 

               Factor1 Factor2 Factor3 Factor4
SS loadings       1.53    1.19    1.16    1.03
Proportion Var    0.19    0.15    0.14    0.13
Cumulative Var    0.19    0.34    0.48    0.61

Three factor model:

Display code

fit03 <- factanal(imagistic, 3, rotation="promax")

print(fit03$loadings, cutoff = 0.35)


Loadings:
                        Factor1 Factor2 Factor3
episodic_recall_01                       0.904 
episodic_recall_02                       0.523 
shared_perception_01             0.706         
shared_perception_02             0.890         
reflection_01            0.706                 
reflection_02            0.885                 
transformative_indiv_02  0.744                 
transformative_group_02  0.545                 

               Factor1 Factor2 Factor3
SS loadings       2.16    1.39    1.14
Proportion Var    0.27    0.17    0.14
Cumulative Var    0.27    0.44    0.59

Section 9: CFA: Imagistic items

Display code

## Remove 'ds$END' from variable names:
names(imagistic) <- sub(".*\\$", "", names(imagistic))

names(imagistic)

[1] "episodic_recall_01"      "episodic_recall_02"     
[3] "shared_perception_01"    "shared_perception_02"   
[5] "reflection_01"           "reflection_02"          
[7] "transformative_indiv_02" "transformative_group_02"

Display code

#correlated two factor solution, marker method
fourfacs <- 'Reflection =~ reflection_01+reflection_02
             Vivid_recall =~ episodic_recall_01+episodic_recall_02
             Perceived shareness =~ shared_perception_01+shared_perception_02
             Defining_experience =~ transformative_indiv_02+transformative_group_02' 

cfa04 <- cfa(fourfacs, 
             data=imagistic, 
             std.lv=TRUE) 

summary(cfa04,
        fit.measures=TRUE,
        standardized=TRUE)

lavaan 0.6.16 ended normally after 31 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        22

  Number of observations                          1462

Model Test User Model:
                                                      
  Test statistic                               232.850
  Degrees of freedom                                14
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              3938.127
  Degrees of freedom                                28
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.944
  Tucker-Lewis Index (TLI)                       0.888

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -20318.551
  Loglikelihood unrestricted model (H1)     -20202.126
                                                      
  Akaike (AIC)                               40681.102
  Bayesian (BIC)                             40797.428
  Sample-size adjusted Bayesian (SABIC)      40727.541

Root Mean Square Error of Approximation:

  RMSEA                                          0.103
  90 Percent confidence interval - lower         0.092
  90 Percent confidence interval - upper         0.115
  P-value H_0: RMSEA <= 0.050                    0.000
  P-value H_0: RMSEA >= 0.080                    1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.046

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                         Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  Reflection =~                                                               
    reflection_01           1.247    0.040   30.914    0.000    1.247    0.764
    reflection_02           1.518    0.043   35.257    0.000    1.518    0.858
  Vivid_recall =~                                                             
    episdc_rcll_01          0.914    0.036   25.294    0.000    0.914    0.752
    episdc_rcll_02          0.955    0.038   25.014    0.000    0.955    0.741
  Perceivedshareness =~                                                       
    shrd_prcptn_01          1.125    0.050   22.346    0.000    1.125    0.715
    shrd_prcptn_02          1.231    0.050   24.393    0.000    1.231    0.820
  Defining_experience =~                                                      
    trnsfrmtv_n_02          1.686    0.055   30.913    0.000    1.686    0.820
    trnsfrmtv_g_02          1.358    0.054   24.963    0.000    1.358    0.659

Covariances:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  Reflection ~~                                                              
    Vivid_recall           0.519    0.028   18.436    0.000    0.519    0.519
    Perceivdshrnss         0.361    0.031   11.804    0.000    0.361    0.361
    Defining_xprnc         0.794    0.021   37.684    0.000    0.794    0.794
  Vivid_recall ~~                                                            
    Perceivdshrnss         0.472    0.031   15.363    0.000    0.472    0.472
    Defining_xprnc         0.480    0.031   15.499    0.000    0.480    0.480
  Perceivedshareness ~~                                                      
    Defining_xprnc         0.392    0.032   12.320    0.000    0.392    0.392

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .reflection_01     1.109    0.061   18.050    0.000    1.109    0.416
   .reflection_02     0.829    0.074   11.140    0.000    0.829    0.265
   .episdc_rcll_01    0.641    0.050   12.815    0.000    0.641    0.434
   .episdc_rcll_02    0.747    0.055   13.466    0.000    0.747    0.450
   .shrd_prcptn_01    1.212    0.092   13.172    0.000    1.212    0.489
   .shrd_prcptn_02    0.737    0.100    7.371    0.000    0.737    0.327
   .trnsfrmtv_n_02    1.389    0.121   11.497    0.000    1.389    0.328
   .trnsfrmtv_g_02    2.406    0.114   21.142    0.000    2.406    0.566
    Reflection        1.000                               1.000    1.000
    Vivid_recall      1.000                               1.000    1.000
    Perceivdshrnss    1.000                               1.000    1.000
    Defining_xprnc    1.000                               1.000    1.000