Data Analysis and Visualization
Cross-country Data

Author

Gagan Atreya

Published

September 21, 2023

Display code

rm(list=ls())
options(digits = 2)

## Install "pacman" package if not installed
# (remove the # symbol from the line below):
# install.packages("pacman")

## Load R packages:
pacman::p_load(data.table, tidyverse, haven, labelled, vtable, 
               psych, scales, weights, clipr, forcats,
               stargazer, ggthemes, ggcharts, geomtextpath,
               corrplot, tm, gt, lme4, car, lmerTest, 
               ggeffects, magrittr, broom, broom.mixed)

## Import BCL dataset:
ds <- read_sav("~/Desktop/oxford/data/BCL/BCL Dataset.sav")
ds <- as.data.table(ds)
## Remove cases with no country:
ds <- ds[!is.na(ds$Country),]

ds$Country <- as.character(haven::as_factor(ds$Country))

ds <- ds[!(ds$Country == "USA"), ]
table(ds$Country)


  Bangladesh        Ghana       Malawi     Pakistan Sierra Leone     Tanzania 
         310          319          206          387           65          310 
      Uganda 
         293

Display code

ds$CountryID <- ifelse(ds$Country == "Bangladesh", "01",
                ifelse(ds$Country == "Ghana", "02",
                ifelse(ds$Country == "Malawi", "03",
                ifelse(ds$Country == "Pakistan", "04",
                ifelse(ds$Country == "Sierra Leone", "05",
                ifelse(ds$Country == "Tanzania", "06",
                ifelse(ds$Country == "Uganda", "07", NA)))))))

Section 1: Data Cleaning and Exploratory Data Visualization

Variable: Country / Language

Display code

tbl01 <- table(ds$Country, ds$UserLanguage)
tbl02 <- addmargins(tbl01, c(1, 2))

## Table of user language by country:
tbl02

              
                 BN   EN   SW   UR  Sum
  Bangladesh     55  253    0    2  310
  Ghana           0  319    0    0  319
  Malawi          0  206    0    0  206
  Pakistan        0  375    0   12  387
  Sierra Leone    0   65    0    0   65
  Tanzania        0  251   59    0  310
  Uganda          0  293    0    0  293
  Sum            55 1762   59   14 1890

Display code

## Sample size by country:

lp01 <- ds %>% 
  # drop_na(Country) %>%
  lollipop_chart(x = Country,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Sample size by country")+
  theme_bw()

lp01

Variable: Survey duration

Display code

ds$seconds <- as.numeric(ds$Duration__in_seconds_)
ds$minutes <- (ds$seconds/60)
ds$hours <- (ds$minutes/60)

summary(ds$seconds)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    472    1557    2231    3579    3221  120037

Display code

summary(ds$minutes)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      8      26      37      60      54    2001

Display code

summary(ds$hours)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       1       1       1      33

Display code

ds %>% 
#  drop_na(Country)%>%
ggplot(aes(x = hours))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 200)+
  geom_textvline(label = "Mean = 1.00", 
                 xintercept = mean(ds$hours), 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Hours", 
       y = "Frequency", 
       title = "Survey duration (full sample)")+
  theme_bw()

Display code

ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = hours))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 100)+
  labs(x = "Hours", 
       y = "Frequency", 
       title = "Survey duration by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Display code

ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = hours, 
           y = Country, 
           color = Country))+
  geom_point(show.legend = FALSE)+
  labs(x = "Hours",
       title = "Survey duration by country")+
  scale_color_colorblind()+
  theme_bw()

Variable: Age

Display code

ds$age <- as.numeric(ds$age)

summary(ds$age)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     18      23      26      29      32      82       2

Display code

ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = age, 
           y = Country))+
  geom_boxplot(show.legend = FALSE)+
  labs(x = "Age",
       title = "Age distribution by country")+
  scale_color_colorblind()+
  theme_bw()

Display code

ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = age, 
           y = Country, 
           color = Country))+
  geom_point(show.legend = FALSE)+
  labs(x = "Age",
       title = "Age distribution by country")+
  scale_color_colorblind()+
  theme_bw()

Variable: Gender

Display code

ds$gend1 <- as_factor(ds$gend)
ds$gend2 <- as_factor(ds$gend_6_TEXT)

ds$gend3 <- ifelse(ds$gend2=="Female", "Female",
            ifelse(ds$gend2=="IM FEMAIL", "Female",
            ifelse(ds$gend2=="Male", "Male",
            ifelse(ds$gend2=="male", "Male", ""))))

ds$gender <- ifelse(ds$gend1 == "Male", "Male",
             ifelse(ds$gend3 == "Male", "Male",
             ifelse(ds$gend1 == "Female", "Female",
             ifelse(ds$gend3 == "Female", "Female", NA))))

## Gender distribution (full sample):
lp02 <- ds %>% 
  drop_na(gender) %>%
lollipop_chart(x = gender,
               line_color = "black",
               point_color = "black")+
  labs(x = "Gender", 
       y = "Frequency",
       title = "Gender distribution (full sample)")+
  theme_bw()

lp02

Display code

## Gender distribution by country
barp01 <- ds %>% 
  drop_na(gender) %>%
  ggplot(aes(x = gender, 
             fill = gender))+
  geom_bar()+
  labs(x = "",
       y = "Frequency",
       title = "Gender distribution by country", 
       fill = "")+
  scale_fill_manual(values=c("green", "purple"))+
  facet_wrap(~Country, ncol = 1)+
  coord_flip()+
  theme_bw()+
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

barp01

Variable: Province

Not available for this dataset.

Variable: Relative wealth status

Display code

ds$wealth_level <- as.numeric(ds$wealth_4)
summary(ds$wealth_level)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      0      28      50      47      55     100       5

Display code

ds %>% 
  drop_na(wealth_level, gender) %>%
  ggplot(aes(y = wealth_level, 
             x = Country))+
  geom_boxplot(fill = "white")+
  facet_wrap(~gender, 
             ncol = 2)+
  labs(y = "Self-reported relative wealth status", 
       x = "")+
  coord_flip()+
  theme_bw()

Variable: Socio-economic status

Not available for this dataset.

Variable: Income

This variable needs additional work. Each individual entry is a different character. See for example:

Display code

head(table(ds$income), n = 15)


                                     10 million shillings 
                          22                            1 
 48,000,000 Uganda Shillings                     5000 USD 
                           1                            1 
                     50000tk                    GH ₵78934 
                           1                            1 
                  PKR1500000                      Shs1.2m 
                           1                            1 
                           -                   &gt;100000 
                           1                            1 
              ¢1,000-¢60,000                        ¢1000 
                           1                            3 
                       ¢1200                      ¢190000 
                           1                            1 
                         ¢20 
                           1

Variable: Nature of employment

Not available in this dataset. There is an “occupation” variable which needs additional work. See for example:

Display code

head(table(ds$occupation), n = 15)


                                                                                                                     
                                                                                                                  15 
                                                                                                      Am not working 
                                                                                                                   1 
                                                                                                            Employee 
                                                                                                                   1 
                                                                                        I`m a primary school teacher 
                                                                                                                   1 
                                                                                                                ছাত্র 
                                                                                                                   1 
                                                                                                                   2 
                                                                                                                   1 
                                                                                                  A digital marketer 
                                                                                                                   1 
A hustler with a high school education, I do any work which comes to me as long it does not involve using of skills. 
                                                                                                                   1 
                                                                                                          A MARKETER 
                                                                                                                   1 
                                                                           A statistician and operational researcher 
                                                                                                                   1 
                                                                                                           A student 
                                                                                                                   1 
                                                                                                           A teacher 
                                                                                                                   1 
                                                                                                         Account ant 
                                                                                                                   1 
                                                                                                         Accountancy 
                                                                                                                   1 
                                                                                                          Accountant 
                                                                                                                  32

Variable: Religious affiliation

Display code

ds$religion2 <- as_factor(ds$religion)
ds$religion2 <- as.character(ds$religion2)

ds$religion2 <- str_replace_all(ds$religion2, " - ", ": ")

ds$religion2 <- ifelse(ds$religion2 == "Other (please specify)", "Other", 
                       ds$religion2)

## Religion distribution:

lp06 <- ds %>% 
  drop_na(religion2) %>%
lollipop_chart(x = religion2,
               line_color = "black",
               point_color = "black")+
  labs(x = "",
       y = "Frequency",
       title = "Religion distribution (full sample)")+
  theme_bw()

lp06

Display code

## Religion distribution by country:

tbl04 <- table(ds$religion2, ds$Country)
tbl05 <- addmargins(tbl04, c(1, 2))

## Table of user language by country:
tbl05

                         
                          Bangladesh Ghana Malawi Pakistan Sierra Leone
  Agnostic                         2     3      0        1            0
  Atheist                          0     4      1        0            0
  Buddhist                         1     0      0        0            0
  Christian: Catholic              6    52     46        0            8
  Christian: Other                 1   146    102        1           23
  Christian: Protestant            0    61     33        2            5
  Hindu                           21     0      0        3            0
  Jewish                           0     0      0        0            0
  Muslim: Shia                     9     2      0       35            1
  Muslim: Sunni                  262    31      7      336           22
  None                             2     6      0        0            3
  Other                            3     4      9        7            1
  Sikh                             0     0      0        0            0
  Spiritual not Religious          3    10      8        1            2
  Sum                            310   319    206      386           65
                         
                          Tanzania Uganda  Sum
  Agnostic                       0      0    6
  Atheist                        0      0    5
  Buddhist                       0      0    1
  Christian: Catholic           98     93  303
  Christian: Other              59     57  389
  Christian: Protestant         54     97  252
  Hindu                          1      3   28
  Jewish                         1      0    1
  Muslim: Shia                  22      8   77
  Muslim: Sunni                 64     27  749
  None                           1      1   13
  Other                          5      4   33
  Sikh                           1      0    1
  Spiritual not Religious        3      3   30
  Sum                          309    293 1888

Display code

ds$religion3 <- ifelse(ds$religion2=="Other (please specify)", "Other",
                ifelse(ds$religion2=="Atheist", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="Agnostic", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="Spiritual not Religious", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="None", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="Sikh", "Other",
                ifelse(ds$religion2=="Jewish", "Other",
                ifelse(ds$religion2=="Buddhist", "Other",
                       ds$religion2))))))))

## Religion distribution by country:

lp07 <- ds %>% 
  drop_na(religion3) %>%
  group_by(religion3, Country) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(religion3, count)) + 
  geom_segment(aes(x=religion3, xend=religion3, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Religion distribution by country")+
    facet_wrap(vars(Country), nrow = 2)+
  coord_flip()+
  theme_bw()

lp07

Variable: Marital status

Display code

ds$marriage01 <- as.character(haven::as_factor(ds$marriage))

table(ds$marriage01)


                         Divorced In a relationship but not married 
                               12                               334 
                          Married            Other (please specify) 
                              577                                18 
                           Single                           Widowed 
                              932                                13

Display code

ds$married <- ifelse(ds$marriage01 == "Single", "Unmarried",
              ifelse(ds$marriage01 == "Married", "Married", "Other"))

bp01 <- ds %>% drop_na(married) %>%
  ggplot(aes(x = fct_rev(fct_infreq(married))))+
  geom_bar(fill = "black")+
  labs(x = "",
       y = "Frequency",
       title = "Marital status")+
  coord_flip()+
  theme_bw()

# bp01

## Marital status (full sample):

lp04 <- ds %>% 
  drop_na(married) %>%
  group_by(married) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(married, count)) + 
  geom_segment(aes(x=married, xend=married, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Marital status (full sample)")+
  coord_flip()+
  theme_bw()

lp04

Display code

## Marital status by country:

lp05 <- ds %>% 
  drop_na(married) %>%
  group_by(married, Country) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(married, count)) + 
  geom_segment(aes(x=married, xend=married, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Marital status by country")+
    facet_wrap(vars(Country), nrow = 2)+
  coord_flip()+
  theme_bw()

lp05

Variable: Number of children

Display code

ds$children <- as.numeric(gsub("\\D", "", ds$children))

summary(ds$children)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    0.0     0.0     0.0     0.7     1.0     7.0      11

Display code

ds %>% drop_na(children)%>%
ggplot(aes(x = children))+
  geom_bar(color = "black",
                 fill = "gray",
                 width = 0.75)+
  geom_textvline(label = "Mean = 0.7", 
                 xintercept = 0.7, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Number of children", 
       y = "Frequency", 
       title = "Number of children: Full sample")+
  theme_bw()

Display code

ds %>% drop_na(children)%>%
ggplot(aes(x = children))+
  geom_histogram(color = "black",
                 fill = "gray", 
                 bins = 10)+
  labs(x = "Number of children", 
       y = "Frequency", 
       title = "Number of children by country")+
    facet_wrap(vars(Country), nrow = 2)+
#  coord_flip()+
  theme_bw()

Variable: Ethnicity

This variable needs additional work. Each individual entry is a different character. See for example:

Display code

head(table(ds$ethnic), n = 15)


                                                                           
                                                                        27 
                                                                  Am black 
                                                                         1 
                                                                     Black 
                                                                         1 
'Race' and 'ethnicity' have been used as ways to describe human diversity. 
                                                                         1 
                                                                 1-2 hours 
                                                                         1 
                                                                         2 
                                                                         2 
                                                                         3 
                                                                         3 
                                                                         4 
                                                                         2 
                                                                        60 
                                                                         1 
                                                                       90% 
                                                                         1 
                                                   A black African, lhomwe 
                                                                         1 
                                                               A black guy 
                                                                         1 
                                                              A black man. 
                                                                         1 
                                                               A Christian 
                                                                         1 
                                                             A Fante/ Akan 
                                                                         1

Variable: Education

Display code

ds$edu1 <- as_factor(ds$Education)

ds$edu2 <- ifelse(ds$edu1 == "No schooling completed", "No schooling",
           ifelse(ds$edu1 == "Nursery school to 8th grade", "8th grade or less",
           ifelse(ds$edu1 == "Some high school, no diploma", "Some high school",
           ifelse(ds$edu1 == "High school graduate, diploma or the equivalent (for example: GED)", 
                  "High school or equivalent",
           ifelse(ds$edu1 == "Some college credit, no degree", "Some college",
           ifelse(ds$edu1 == "Trade/technical/vocational training", "Vocational school",
           ifelse(ds$edu1 == "Associate degree", "Associate's",
           ifelse(ds$edu1 == "Bachelor’s degree", "Bachelor's",
           ifelse(ds$edu1 == "Master’s degree", "Master's",
           ifelse(ds$edu1 == "Professional degree", "Professional",
           ifelse(ds$edu1 == "Doctorate degree", "Doctorate", NA)))))))))))

ds$edu2 <- factor(ds$edu2, 
                  levels = c("No schooling", "8th grade or less", 
                             "Some high school", "High school or equivalent",
                             "Some college", "Vocational school",
                             "Associate's", "Bachelor's", "Master's",
                             "Professional", "Doctorate"))

# table(ds$edu2)

## Education distribution (full sample):

lp04 <- ds %>% 
  drop_na(edu2) %>%
  group_by(edu2) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(edu2, count)) + 
  geom_segment(aes(x=edu2, xend=edu2, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Education distribution (full sample)")+
  coord_flip()+
  theme_bw()

lp04

Display code

## Education distribution by country:

lp05 <- ds %>% 
  drop_na(edu2) %>%
  group_by(edu2, Country) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(edu2, count)) + 
  geom_segment(aes(x=edu2, xend=edu2, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Education distribution by country")+
    facet_wrap(vars(Country), nrow = 2)+
  coord_flip()+
  theme_bw()

lp05

Section 2. Factor Analysis: Group fusion/identification

Display code

## Four ingroup fusion items:

# I have a deep emotional bond with the [ingroup].
ds$IGF01 <- as.numeric(ds$Fus.IG1)

# I am strong because of the [ingroup].
ds$IGF02 <- as.numeric(ds$Fus.IG2)

# I make the [ingroup] strong.  
ds$IGF03 <- as.numeric(ds$Fus.IG3)

# I am one with the [ingroup]
ds$IGF04 <- as.numeric(ds$Fus.IG4)

## Four outgroup fusion items:

# I have a deep emotional bond with the [outgroup].
ds$OGF01 <- as.numeric(ds$FUS.OG1)

# I am strong because of the [outgroup].
ds$OGF02 <- as.numeric(ds$FUS.OG2)

# I make the [outgroup] strong. 
ds$OGF03 <- as.numeric(ds$FUS.OG3)

# I am one with the [outgroup].
ds$OGF04 <- as.numeric(ds$FUS.OG4)


## Four ingroup identification items:

# I identify with the [ingroup].
ds$IGI01 <- as.numeric(ds$IDT.IG1)

# I have a lot in common with the [ingroup].
ds$IGI02 <- as.numeric(ds$IDT.IG2)

# I connect with the values of the [ingroup].
ds$IGI03 <- as.numeric(ds$IDT.IG3)

# I feel a sense of belonging with the [ingroup].
ds$IGI04 <- as.numeric(ds$IDT.IG4)


## Four outgroup identification items:

# I identify with the [outgroup].   
ds$OGI01 <- as.numeric(ds$IDNT.OG1)

# I have a lot in common with the [outgroup].   
ds$OGI02 <- as.numeric(ds$IDNT.OG2)

# I connect with the values of the [outgroup].  
ds$OGI03 <- as.numeric(ds$IDNT.OG3)

# I feel a sense of belonging with the [outgroup].  
ds$OGI04 <- as.numeric(ds$IDNT.OG4)

Display code

## Bonds dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04,
                          ds$IGI01, ds$IGI02, ds$IGI03, ds$IGI04,
                          ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04,
                          ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:16)])

Shorthand for item names:
IG = Ingroup, OG = Outgroup
F = Fusion, I = Identification
So for example:
IGF01 = “ingroup fusion: item 1”
OGI04 = “outgroup identification: item 4”, and so on

Display code

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))+
    facet_wrap(vars(Country), nrow = 2)

NULL

The correlation plot above shows the correlation coefficients between the sixteen different items (eight for ingroup fusion/identification and eight for outgroup fusion/identification).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code

KMO(r=cor(bonds))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(bonds))
Overall MSA =  0.96
MSA for each item = 
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.97  0.97  0.97  0.97  0.97  0.97  0.96  0.96  0.97  0.97  0.96  0.96  0.96 
OGI02 OGI03 OGI04 
 0.96  0.96  0.96

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.96), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code

cortest.bartlett(bonds)

$chisq
[1] 21035

$p.value
[1] 0

$df
[1] 120

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code

# # parallel <- fa.parallel(bonds)

Based on the scree plot, factor analysis with two factors is the most appropriate. We will proceed with promax rotation, which assumes that the items are inter-correlated (that is, not independent from each other).

Factor analysis: Two factor model with promax rotation

Display code

fit02 <- factanal(bonds, 2, rotation="promax")
fit02


Call:
factanal(x = bonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.20  0.23  0.37  0.30  0.19  0.19  0.14  0.14  0.26  0.39  0.23  0.21  0.24 
OGI02 OGI03 OGI04 
 0.24  0.26  0.21 

Loadings:
      Factor1 Factor2
IGF01  0.895         
IGF02  0.878         
IGF03  0.796   0.124 
IGF04  0.837         
IGI01  0.895         
IGI02  0.901         
IGI03  0.920         
IGI04  0.924         
OGF01          0.862 
OGF02          0.785 
OGF03          0.872 
OGF04          0.880 
OGI01          0.869 
OGI02          0.874 
OGI03          0.862 
OGI04          0.887 

               Factor1 Factor2
SS loadings       6.24    5.97
Proportion Var    0.39    0.37
Cumulative Var    0.39    0.76

Factor Correlations:
        Factor1 Factor2
Factor1  1.0000 -0.0801
Factor2 -0.0801  1.0000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 186 on 89 degrees of freedom.
The p-value is 0.0000000083

We want to split the ingroup/outgroup fusion/identification items into distinct factors. We can examine what the three and four factor solutions looks like.

Three factor model

Display code

fit03 <- factanal(bonds, 3, rotation="promax")
fit03


Call:
factanal(x = bonds, factors = 3, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.20  0.23  0.30  0.29  0.19  0.18  0.14  0.14  0.25  0.39  0.20  0.21  0.24 
OGI02 OGI03 OGI04 
 0.23  0.25  0.21 

Loadings:
      Factor1 Factor2 Factor3
IGF01  0.903                 
IGF02  0.873                 
IGF03  0.666           0.308 
IGF04  0.767           0.165 
IGI01  0.871                 
IGI02  0.915                 
IGI03  0.916                 
IGI04  0.932                 
OGF01          0.881         
OGF02          0.775         
OGF03 -0.124   0.827   0.176 
OGF04          0.872         
OGI01          0.856         
OGI02          0.894         
OGI03  0.115   0.885         
OGI04          0.882         

               Factor1 Factor2 Factor3
SS loadings       5.96    5.92   0.174
Proportion Var    0.37    0.37   0.011
Cumulative Var    0.37    0.74   0.753

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1   1.000   0.108  -0.388
Factor2   0.108   1.000   0.211
Factor3  -0.388   0.211   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 120 on 75 degrees of freedom.
The p-value is 0.00081

Four factor model

Display code

fit04 <- factanal(bonds, 4, rotation="promax")
fit04


Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.19  0.22  0.31  0.28  0.19  0.18  0.14  0.13  0.25  0.38  0.20  0.20  0.24 
OGI02 OGI03 OGI04 
 0.13  0.26  0.21 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01          0.912                 
IGF02          0.893                 
IGF03          0.675   0.277         
IGF04          0.750   0.187         
IGI01          0.870                 
IGI02          0.920                 
IGI03          0.912                 
IGI04          0.924                 
OGF01  0.881                         
OGF02  0.775                         
OGF03  0.840  -0.137   0.205         
OGF04  0.875                         
OGI01  0.858                         
OGI02  0.957                   0.343 
OGI03  0.894                   0.122 
OGI04  0.884                         

               Factor1 Factor2 Factor3 Factor4
SS loadings       6.09    5.97    0.17   0.146
Proportion Var    0.38    0.37    0.01   0.009
Cumulative Var    0.38    0.75    0.76   0.773

Factor Correlations:
        Factor1 Factor2  Factor3  Factor4
Factor1  1.0000  -0.113  0.07951  0.42514
Factor2 -0.1126   1.000 -0.24494  0.14138
Factor3  0.0795  -0.245  1.00000 -0.00254
Factor4  0.4251   0.141 -0.00254  1.00000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 71 on 62 degrees of freedom.
The p-value is 0.21

Section 3. Factor Analysis: Group fusion/identification (Version 2)

The fourth item has been removed from each sub-scale:

Display code

## Remove 4th item from all subscales and retry:

## New dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03,
                          ds$IGI01, ds$IGI02, ds$IGI03,
                          ds$OGF01, ds$OGF02, ds$OGF03,
                          ds$OGI01, ds$OGI02, ds$OGI03)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:12)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

# # parallel <- fa.parallel(bonds)

Two factor model:

Display code

# Two factor model
fit04 <- factanal(bonds, 2, rotation="promax")
fit04


Call:
factanal(x = bonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.20  0.23  0.37  0.20  0.18  0.14  0.26  0.38  0.24  0.26  0.23  0.25 

Loadings:
      Factor1 Factor2
IGF01  0.896         
IGF02  0.878         
IGF03  0.791   0.122 
IGI01  0.894         
IGI02  0.902         
IGI03  0.921         
OGF01          0.862 
OGF02          0.787 
OGF03          0.865 
OGI01          0.860 
OGI02          0.877 
OGI03          0.865 

               Factor1 Factor2
SS loadings       4.67    4.39
Proportion Var    0.39    0.37
Cumulative Var    0.39    0.76

Factor Correlations:
        Factor1 Factor2
Factor1  1.0000  0.0562
Factor2  0.0562  1.0000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 98 on 43 degrees of freedom.
The p-value is 0.000004

Three factor model

Display code

# Three factor model:
fit05 <- factanal(bonds, 3, rotation="promax")
fit05


Call:
factanal(x = bonds, factors = 3, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.20  0.23  0.33  0.20  0.18  0.14  0.25  0.38  0.20  0.26  0.22  0.24 

Loadings:
      Factor1 Factor2 Factor3
IGF01  0.899                 
IGF02  0.872                 
IGF03  0.734           0.230 
IGI01  0.884                 
IGI02  0.917                 
IGI03  0.915                 
OGF01          0.869         
OGF02          0.765         
OGF03 -0.120   0.809   0.214 
OGI01          0.835         
OGI02          0.889         
OGI03          0.894         

               Factor1 Factor2 Factor3
SS loadings       4.59    4.29    0.12
Proportion Var    0.38    0.36    0.01
Cumulative Var    0.38    0.74    0.75

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1  1.0000  0.0818  -0.230
Factor2  0.0818  1.0000   0.236
Factor3 -0.2304  0.2363   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 53 on 33 degrees of freedom.
The p-value is 0.013

Four factor model

Display code

# Four factor model:
fit06 <- factanal(bonds, 4, rotation="promax")
fit06


Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.20  0.22  0.35  0.20  0.18  0.14  0.25  0.35  0.12  0.26  0.20  0.24 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01  0.902                         
IGF02  0.858                         
IGF03  0.774           0.111         
IGI01  0.896                         
IGI02  0.911                         
IGI03  0.937                         
OGF01          0.842                 
OGF02          0.667           0.224 
OGF03          0.909   0.367         
OGI01          0.814                 
OGI02          0.992          -0.194 
OGI03          0.915                 

               Factor1 Factor2 Factor3 Factor4
SS loadings       4.68    4.47   0.157    0.12
Proportion Var    0.39    0.37   0.013    0.01
Cumulative Var    0.39    0.76   0.776    0.79

Factor Correlations:
         Factor1 Factor2  Factor3 Factor4
Factor1  1.00000 -0.0599 -0.00647   0.243
Factor2 -0.05990  1.0000  0.08120   0.533
Factor3 -0.00647  0.0812  1.00000  -0.171
Factor4  0.24262  0.5327 -0.17078   1.000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 30 on 24 degrees of freedom.
The p-value is 0.17

FA basically suggests that ingroup / outgroup fusion/identification are not distinct factors. Only real split is between ingroup (identification + fusion) as one factor and outgroup (identification + fusion) as another factor. However, I’ll replicate the analysis as before - two constructs for ingroup fusion / identification and a single “bonds” construct for outgroup fusion+identification. Here is the reliability and inter-correlations of the three sub-scales:

Reliability
Scale 1: Ingroup fusion

Display code

igfusion <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03)

names(igfusion) <- sub('ds\\$', '', names(igfusion))

igfusion <- na.omit(igfusion)
mtx2 <- cor(igfusion[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha02 <- psych::alpha(igfusion)
alpha02


Reliability analysis   
Call: psych::alpha(x = igfusion)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.88      0.88    0.84      0.72 7.7 0.0047  4.7 1.9     0.69

    95% confidence boundaries 
         lower alpha upper
Feldt     0.88  0.88  0.89
Duhachek  0.88  0.88  0.89

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGF01      0.81      0.81    0.68      0.68 4.2   0.0090    NA  0.68
IGF02      0.82      0.82    0.69      0.69 4.5   0.0085    NA  0.69
IGF03      0.88      0.88    0.78      0.78 7.2   0.0058    NA  0.78

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
IGF01 1784  0.92  0.92  0.86   0.81  4.9 2.1
IGF02 1784  0.91  0.91  0.85   0.80  4.8 2.2
IGF03 1784  0.87  0.88  0.77   0.73  4.5 2.1

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
IGF01 0.13 0.07 0.07 0.09 0.15 0.17 0.32    0
IGF02 0.14 0.06 0.07 0.10 0.13 0.16 0.33    0
IGF03 0.13 0.09 0.08 0.13 0.17 0.16 0.23    0

Reliability
Scale 2: Ingroup identification

Display code

igidentification <- cbind.data.frame(ds$IGI01, ds$IGI02, ds$IGI03)

names(igidentification) <- sub('ds\\$', '', names(igidentification))

igidentification <- na.omit(igidentification)
mtx3 <- cor(igidentification[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha03 <- psych::alpha(igidentification)
alpha03


Reliability analysis   
Call: psych::alpha(x = igidentification)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean sd median_r
      0.93      0.93     0.9      0.81  13 0.0029  4.9  2     0.82

    95% confidence boundaries 
         lower alpha upper
Feldt     0.92  0.93  0.93
Duhachek  0.92  0.93  0.93

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGI01      0.90      0.90    0.82      0.82 9.2   0.0047    NA  0.82
IGI02      0.90      0.90    0.82      0.82 9.4   0.0046    NA  0.82
IGI03      0.88      0.88    0.79      0.79 7.5   0.0056    NA  0.79

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
IGI01 1784  0.93  0.93  0.88   0.85  4.9 2.2
IGI02 1784  0.93  0.93  0.88   0.84  4.8 2.1
IGI03 1784  0.94  0.94  0.90   0.87  4.9 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
IGI01 0.15 0.06 0.06 0.09 0.14 0.16 0.35    0
IGI02 0.13 0.07 0.05 0.10 0.16 0.18 0.30    0
IGI03 0.14 0.07 0.06 0.08 0.12 0.18 0.34    0

Reliability
Scale 3a: Outgroup fusion/identification

Display code

ogbonds <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03,
                            ds$OGI01, ds$OGI02, ds$OGI03)

names(ogbonds) <- sub('ds\\$', '', names(ogbonds))

ogbonds <- na.omit(ogbonds)
mtx3 <- cor(ogbonds[, c(1:6)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha04 <- psych::alpha(ogbonds)
alpha04


Reliability analysis   
Call: psych::alpha(x = ogbonds)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.94      0.94    0.93      0.73  16 0.0026  3.3 1.9     0.74

    95% confidence boundaries 
         lower alpha upper
Feldt     0.94  0.94  0.95
Duhachek  0.94  0.94  0.95

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se   var.r med.r
OGF01      0.93      0.93    0.91      0.72  13   0.0032 0.00158  0.73
OGF02      0.94      0.94    0.92      0.75  15   0.0028 0.00019  0.75
OGF03      0.93      0.93    0.91      0.72  13   0.0032 0.00148  0.74
OGI01      0.93      0.93    0.91      0.72  13   0.0032 0.00158  0.74
OGI02      0.93      0.93    0.91      0.72  13   0.0033 0.00098  0.73
OGI03      0.93      0.93    0.91      0.72  13   0.0032 0.00144  0.75

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
OGF01 1264  0.89  0.89  0.86   0.83  3.3 2.2
OGF02 1264  0.84  0.83  0.78   0.76  3.4 2.2
OGF03 1264  0.89  0.89  0.86   0.84  3.1 2.2
OGI01 1264  0.88  0.88  0.85   0.83  3.2 2.1
OGI02 1264  0.89  0.89  0.87   0.84  3.3 2.1
OGI03 1264  0.88  0.88  0.86   0.83  3.4 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGF01 0.32 0.12 0.11 0.13 0.11 0.07 0.14    0
OGF02 0.32 0.11 0.11 0.12 0.11 0.09 0.14    0
OGF03 0.38 0.12 0.09 0.12 0.09 0.09 0.11    0
OGI01 0.35 0.13 0.10 0.13 0.11 0.07 0.12    0
OGI02 0.30 0.13 0.13 0.13 0.11 0.07 0.12    0
OGI03 0.31 0.12 0.10 0.13 0.12 0.09 0.12    0

Reliability
Scale 3b: Outgroup fusion

Display code

ogfusion <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03)

names(ogfusion) <- sub('ds\\$', '', names(ogfusion))

ogfusion <- na.omit(ogfusion)
mtx3 <- cor(ogfusion[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha04 <- psych::alpha(ogfusion)
alpha04


Reliability analysis   
Call: psych::alpha(x = ogfusion)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.88      0.88    0.83       0.7 7.1 0.006  3.3 1.9     0.69

    95% confidence boundaries 
         lower alpha upper
Feldt     0.86  0.88  0.89
Duhachek  0.87  0.88  0.89

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
OGF01      0.81      0.81    0.69      0.69 4.4   0.0104    NA  0.69
OGF02      0.85      0.85    0.74      0.74 5.8   0.0082    NA  0.74
OGF03      0.81      0.81    0.68      0.68 4.3   0.0106    NA  0.68

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
OGF01 1277  0.90  0.90  0.83   0.78  3.4 2.2
OGF02 1277  0.88  0.88  0.78   0.73  3.4 2.2
OGF03 1277  0.90  0.90  0.84   0.78  3.2 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGF01 0.32 0.12 0.11 0.13 0.11 0.07 0.14    0
OGF02 0.32 0.11 0.11 0.12 0.11 0.09 0.14    0
OGF03 0.38 0.12 0.09 0.12 0.09 0.09 0.11    0

Reliability
Scale 3c: Outgroup identification

Display code

ogidentification <- cbind.data.frame(ds$OGI01, ds$OGI02, ds$OGI03)

names(ogidentification) <- sub('ds\\$', '', names(ogidentification))

ogidentification <- na.omit(ogidentification)
mtx3 <- cor(ogidentification[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha04 <- psych::alpha(ogidentification)
alpha04


Reliability analysis   
Call: psych::alpha(x = ogidentification)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
       0.9       0.9    0.86      0.75   9 0.0049  3.3 1.9     0.75

    95% confidence boundaries 
         lower alpha upper
Feldt     0.89   0.9  0.91
Duhachek  0.89   0.9  0.91

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
OGI01      0.87      0.87    0.77      0.77 6.8   0.0072    NA  0.77
OGI02      0.84      0.84    0.73      0.73 5.3   0.0088    NA  0.73
OGI03      0.86      0.86    0.75      0.75 6.0   0.0080    NA  0.75

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
OGI01 1274  0.90  0.90  0.83   0.78  3.2 2.1
OGI02 1274  0.92  0.92  0.87   0.82  3.3 2.1
OGI03 1274  0.91  0.91  0.85   0.80  3.4 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGI01 0.35 0.13 0.10 0.13 0.11 0.07 0.12    0
OGI02 0.31 0.13 0.13 0.13 0.11 0.08 0.12    0
OGI03 0.32 0.12 0.10 0.13 0.12 0.09 0.12    0

Section 4. Factor Analysis: BCL/BBL endorsement/experience

Display code

## BCL and BBL items:

# BCL_01:
# Seek out opportunities to bridge social divisions with their opponents, enemies, opposition groups, or other outgroups.   
# Variables: endorse_style_1    and exp_style_1

# BCL_02:
# Demonstrate willingness to compromise with their opponents, enemies, opposition groups, or other outgroups. 
# Variables: endorse_style_2    and exp_style_2

# BCL_03:
# Try to understand and empathize with their opponents, enemies, opposition groups, or other outgroups.  
# Variables: endorse_style_3    and exp_style_3

# BBL_01:
# Represent the interests of the communities and groups that they belong to even at the cost of other groups.
# Variables: endorse_style_4    and exp_style_4

# BBL_02:

## OLD QUESTION (individual datasets):
# Focus on building stronger connections within the communities and groups they belong to rather than building stronger relationships with other groups across boundaries.

## NEW QUESTION (this dataset):
# Seek out opportunities to build stronger connections with the communities and groups they belong to.

## Are the above same?? I'm assuming they are the same item. 

# Variables: endorse_style_5    and exp_style_5

# BBL_03:
# Try to gain benefits for the communities and groups they belong to even at the expense of other groups.
# Variables: endorse_style_6    and exp_style_6

ds$ENDBCL01 <- as.numeric(ds$endorse_style_1)
ds$ENDBCL02 <- as.numeric(ds$endorse_style_2)
ds$ENDBCL03 <- as.numeric(ds$endorse_style_3)
ds$ENDBBL01 <- as.numeric(ds$endorse_style_4)
ds$ENDBBL02 <- as.numeric(ds$endorse_style_5)
ds$ENDBBL03 <- as.numeric(ds$endorse_style_6)

ds$EXPBCL01 <- as.numeric(ds$exp_style_1)
ds$EXPBCL02 <- as.numeric(ds$exp_style_2)
ds$EXPBCL03 <- as.numeric(ds$exp_style_3)
ds$EXPBBL01 <- as.numeric(ds$exp_style_4)
ds$EXPBBL02 <- as.numeric(ds$exp_style_5)
ds$EXPBBL03 <- as.numeric(ds$exp_style_6)

leadership <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03, 
                               ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03,
                               ds$EXPBCL01, ds$EXPBCL02, ds$EXPBCL03,
                               ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(leadership) <- sub('ds\\$', '', names(leadership))

leadership <- na.omit(leadership)
mtx1 <- cor(leadership[, c(1:12)])

Shorthand for item names:
END = Endorse, EXP = Experience
BCL = BCL, BBL = BBL
So for example:
ENDBCL01 = “Endorsement of BCL, item 1”
EXPBBL02 = “Experience of BBL, item 2”, and so on

Display code

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the twelve different items (six for endorsement of BCL/BBL, and six for experience of BCL/BBL).

KMO and Bartlett’s test:

Display code

## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(leadership))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(leadership))
Overall MSA =  0.94
MSA for each item = 
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.94     0.93     0.95     0.94     0.93     0.94     0.93     0.91 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.93     0.94     0.95     0.94

Display code

## Bartlett's test of sphericity
cortest.bartlett(leadership)

$chisq
[1] 12764

$p.value
[1] 0

$df
[1] 66

Parallel test

Display code

# # parallel <- fa.parallel(leadership)

Three factor model

Display code

fit03 <- factanal(leadership, 3, rotation="promax")
fit03


Call:
factanal(x = leadership, factors = 3, rotation = "promax")

Uniquenesses:
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.41     0.41     0.34     0.23     0.20     0.60     0.45     0.45 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.43     0.23     0.32     0.59 

Loadings:
         Factor1 Factor2 Factor3
ENDBCL01  0.629   0.126         
ENDBCL02  0.616   0.381  -0.272 
ENDBCL03  0.657   0.139         
ENDBBL01  0.776  -0.204   0.338 
ENDBBL02  0.769  -0.168   0.339 
ENDBBL03  0.536                 
EXPBCL01          0.524   0.272 
EXPBCL02          0.821         
EXPBCL03          0.625   0.265 
EXPBBL01  0.118   0.152   0.685 
EXPBBL02  0.151   0.234   0.542 
EXPBBL03  0.166   0.259   0.305 

               Factor1 Factor2 Factor3
SS loadings       2.77    1.74    1.32
Proportion Var    0.23    0.14    0.11
Cumulative Var    0.23    0.38    0.48

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1   1.000  -0.635  -0.627
Factor2  -0.635   1.000   0.720
Factor3  -0.627   0.720   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 450 on 33 degrees of freedom.
The p-value is 0.000000000000000000000000000000000000000000000000000000000000000000000000012

Four factor model

Display code

fit04 <- factanal(leadership, 4, rotation="promax")
fit04


Call:
factanal(x = leadership, factors = 4, rotation = "promax")

Uniquenesses:
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.41     0.41     0.33     0.23     0.20     0.46     0.45     0.46 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.43     0.23     0.32     0.31 

Loadings:
         Factor1 Factor2 Factor3 Factor4
ENDBCL01  0.631   0.140                 
ENDBCL02  0.604   0.339  -0.222         
ENDBCL03  0.687   0.170   0.102         
ENDBBL01  0.772  -0.161   0.333         
ENDBBL02  0.777  -0.124   0.338         
ENDBBL03  0.382                   0.510 
EXPBCL01          0.548   0.238         
EXPBCL02          0.776                 
EXPBCL03          0.641   0.219         
EXPBBL01  0.134   0.203   0.612         
EXPBBL02  0.159   0.256   0.466         
EXPBBL03 -0.134           0.108   0.801 

               Factor1 Factor2 Factor3 Factor4
SS loadings       2.65    1.64   1.006   0.925
Proportion Var    0.22    0.14   0.084   0.077
Cumulative Var    0.22    0.36   0.441   0.518

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1   1.000  -0.545  -0.524  -0.520
Factor2  -0.545   1.000   0.690   0.616
Factor3  -0.524   0.690   1.000   0.684
Factor4  -0.520   0.616   0.684   1.000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 263 on 24 degrees of freedom.
The p-value is 0.0000000000000000000000000000000000000000053

Again, FA structure does not really support a four factor Endorse/Experience BCL/BBL structure. I will still split the sub-scales into those four constructs.

Display code

end_bcl <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03)
end_bbl <- cbind.data.frame(ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03)
exp_bcl <- cbind.data.frame(ds$EXPBCL01, ds$EXPBCL02, ds$EXPBCL03)
exp_bbl <- cbind.data.frame(ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(end_bcl) <- sub('ds\\$', '', names(end_bcl))
names(end_bbl) <- sub('ds\\$', '', names(end_bbl))
names(exp_bcl) <- sub('ds\\$', '', names(exp_bcl))
names(exp_bbl) <- sub('ds\\$', '', names(exp_bbl))

end_bcl <- na.omit(end_bcl)
end_bbl <- na.omit(end_bbl)
exp_bcl <- na.omit(exp_bcl)
exp_bbl <- na.omit(exp_bbl)

Reliability
Scale 1: Endorsement of BCL

Display code

mtx1 <- cor(end_bcl[, c(1:3)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

alph01 <- psych::alpha(end_bcl)
alph01


Reliability analysis   
Call: psych::alpha(x = end_bcl)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.82      0.82    0.75       0.6 4.4 0.0076  4.7 1.8     0.61

    95% confidence boundaries 
         lower alpha upper
Feldt      0.8  0.82  0.83
Duhachek   0.8  0.82  0.83

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBCL01      0.76      0.76    0.61      0.61 3.1    0.012    NA  0.61
ENDBCL02      0.77      0.77    0.63      0.63 3.4    0.011    NA  0.63
ENDBCL03      0.71      0.71    0.55      0.55 2.5    0.014    NA  0.55

 Item statistics 
            n raw.r std.r r.cor r.drop mean  sd
ENDBCL01 1758  0.86  0.85  0.73   0.66  4.8 2.1
ENDBCL02 1758  0.84  0.84  0.71   0.64  4.5 2.0
ENDBCL03 1758  0.87  0.87  0.78   0.70  4.8 2.0

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
ENDBCL01 0.13 0.08 0.08 0.10 0.14 0.14 0.33    0
ENDBCL02 0.12 0.09 0.09 0.15 0.18 0.16 0.22    0
ENDBCL03 0.11 0.07 0.09 0.12 0.16 0.17 0.29    0

Reliability
Scale 2: Endorsement of BBL

Display code

mtx2 <- cor(end_bbl[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

alph02 <- psych::alpha(end_bbl)
alph02


Reliability analysis   
Call: psych::alpha(x = end_bbl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.83      0.83     0.8      0.62   5 0.007  4.9 1.9     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.82  0.83  0.85
Duhachek  0.82  0.83  0.85

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBBL01      0.71      0.71    0.55      0.55 2.4   0.0139    NA  0.55
ENDBBL02      0.70      0.70    0.54      0.54 2.3   0.0144    NA  0.54
ENDBBL03      0.88      0.88    0.79      0.79 7.6   0.0056    NA  0.79

 Item statistics 
            n raw.r std.r r.cor r.drop mean  sd
ENDBBL01 1763   0.9   0.9  0.85   0.76  5.0 2.2
ENDBBL02 1763   0.9   0.9  0.86   0.77  5.1 2.2
ENDBBL03 1763   0.8   0.8  0.61   0.57  4.5 2.1

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
ENDBBL01 0.13 0.06 0.07 0.08 0.12 0.15 0.39    0
ENDBBL02 0.13 0.06 0.07 0.07 0.11 0.14 0.42    0
ENDBBL03 0.15 0.09 0.09 0.14 0.15 0.14 0.25    0

Reliability
Scale 3: Experience of BCL

Display code

mtx3 <- cor(exp_bcl[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

alph03 <- psych::alpha(exp_bcl)
alph03


Reliability analysis   
Call: psych::alpha(x = exp_bcl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.78      0.78    0.71      0.54 3.6 0.009  4.3 1.6     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.76  0.78   0.8
Duhachek  0.76  0.78   0.8

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBCL01      0.71      0.71    0.55      0.55 2.4    0.014    NA  0.55
EXPBCL02      0.72      0.72    0.57      0.57 2.6    0.013    NA  0.57
EXPBCL03      0.68      0.68    0.52      0.52 2.1    0.015    NA  0.52

 Item statistics 
            n raw.r std.r r.cor r.drop mean  sd
EXPBCL01 1758  0.84  0.83  0.70   0.62  4.4 2.0
EXPBCL02 1758  0.82  0.82  0.68   0.60  4.2 1.9
EXPBCL03 1758  0.84  0.85  0.73   0.64  4.5 1.9

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
EXPBCL01 0.11 0.10 0.12 0.15 0.18 0.15 0.18    0
EXPBCL02 0.12 0.11 0.13 0.16 0.19 0.14 0.15    0
EXPBCL03 0.10 0.10 0.11 0.15 0.20 0.17 0.18    0

Reliability
Scale 4: Experience of BBL

Display code

mtx4 <- cor(exp_bbl[, c(1:3)])

corrplot(mtx4, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

alph04 <- psych::alpha(exp_bbl)
alph04


Reliability analysis   
Call: psych::alpha(x = exp_bbl)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.82      0.82    0.77       0.6 4.6 0.0074  4.7 1.7     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.81  0.82  0.84
Duhachek  0.81  0.82  0.84

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBBL01      0.70      0.70    0.54      0.54 2.4   0.0142    NA  0.54
EXPBBL02      0.71      0.71    0.55      0.55 2.5   0.0138    NA  0.55
EXPBBL03      0.84      0.84    0.72      0.72 5.1   0.0079    NA  0.72

 Item statistics 
            n raw.r std.r r.cor r.drop mean sd
EXPBBL01 1750  0.88  0.88  0.81   0.73  4.7  2
EXPBBL02 1750  0.88  0.88  0.80   0.72  4.8  2
EXPBBL03 1750  0.81  0.81  0.64   0.59  4.4  2

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
EXPBBL01 0.11 0.08 0.08 0.11 0.18 0.17 0.26    0
EXPBBL02 0.11 0.07 0.09 0.11 0.14 0.19 0.29    0
EXPBBL03 0.11 0.09 0.13 0.14 0.18 0.16 0.18    0

The four subscales are all highly reliable.

Section 5. Preliminary regression analysis

Research question:
Do group fusion / identification predict BCL vs BBL?

Regression models

Display code

## Proper (usable) variables in the model:

ds$Age <- as.numeric(ds$age)

ds$Female <- ifelse(ds$gender == "Female", 1, 0)
ds$Married <- ifelse(ds$married == "Married", 1, 0)
ds$Wealth_level <- ds$wealth_level

ds$Endorse_BCL <- as.numeric((ds$ENDBCL01+ds$ENDBCL02+ds$ENDBCL03)/3)
ds$Endorse_BBL <- as.numeric((ds$ENDBBL01+ds$ENDBBL02+ds$ENDBBL03)/3)
ds$Experience_BCL <- as.numeric((ds$EXPBCL01+ds$EXPBCL02+ds$EXPBCL03)/3)
ds$Experience_BBL <- as.numeric((ds$EXPBBL01+ds$EXPBBL02+ds$EXPBBL03)/3)

ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03)/3)
ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03)/3)
ds$OG_Bonds <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03+
                             ds$OGI01+ds$OGI02+ds$OGI03)/6)


summary(ds$Age)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     18      23      26      29      32      82       2

Display code

## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	Endorse_BCL	Experience_BCL	Endorse_BBL	Experience_BBL
	(1)	(2)	(3)	(4)

IG_Fusion	0.073	0.140^**	0.055	0.140^**
	(0.049)	(0.049)	(0.047)	(0.046)

IG_Identification	0.500^***	0.340^***	0.620^***	0.470^***
	(0.047)	(0.047)	(0.045)	(0.044)

OG_Bonds	-0.023	0.076^***	-0.041^*	0.023
	(0.021)	(0.020)	(0.020)	(0.019)

Age	-0.002	-0.006	-0.007	-0.002
	(0.005)	(0.005)	(0.005)	(0.005)

Female	0.085	0.019	0.049	-0.007
	(0.079)	(0.079)	(0.076)	(0.074)

Married	-0.024	0.054	0.003	0.042
	(0.099)	(0.098)	(0.094)	(0.092)

Wealth_level	0.002	0.003	0.001	0.001
	(0.002)	(0.002)	(0.002)	(0.002)

Constant	1.900^***	1.800^***	1.900^***	1.600^***
	(0.210)	(0.210)	(0.200)	(0.200)


Observations	1,198	1,194	1,198	1,192
R²	0.430	0.340	0.540	0.500
Adjusted R²	0.430	0.340	0.540	0.490
Residual Std. Error	1.300 (df = 1190)	1.300 (df = 1186)	1.300 (df = 1190)	1.200 (df = 1184)
F Statistic	130.000^*** (df = 7; 1190)	89.000^*** (df = 7; 1186)	201.000^*** (df = 7; 1190)	167.000^*** (df = 7; 1184)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 6. Ingroup/outgroup empathy/hostility

Empathic concern

Display code

## Empathic concern

ds$empathic_concern_01 <- as.numeric(ds$empathy_1)

ds$empathic_concern_02a <- as.numeric(ds$empathy_2)
ds$empathic_concern_02 <- as.numeric(8 - ds$empathic_concern_02a)

ds$empathic_concern_03 <- as.numeric(ds$empathy_3)

ds$empathic_concern_04a <- as.numeric(ds$empathy_4)
ds$empathic_concern_04 <- as.numeric(8 - ds$empathic_concern_04a)

ds$empathic_concern_05a <- as.numeric(ds$empathy_5)
ds$empathic_concern_05 <- as.numeric(8 - ds$empathic_concern_05a)

ds$empathic_concern_06 <- as.numeric(ds$empathy_6)

ds$empathic_concern_07 <- as.numeric(ds$empathy_7)

ds$empathic_concern <- (ds$empathic_concern_01+ds$empathic_concern_02+
                        ds$empathic_concern_03+ds$empathic_concern_04+
                        ds$empathic_concern_05+ds$empathic_concern_06+
                        ds$empathic_concern_07)/7

summary(ds$empathic_concern)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      2       4       4       4       4       6     148

Display code

ds %>% drop_na(empathic_concern)%>%
ggplot(aes(x = empathic_concern))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Empathic concern score", 
       y = "Frequency", 
       title = "Empathic concern (full sample)")+
  theme_bw()

Display code

ds %>% 
   drop_na(empathic_concern)%>%
ggplot(aes(x = empathic_concern))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "Empathic concern score", 
       y = "Frequency", 
       title = "Empathic concern by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Perspective taking

Display code

ds$perspective_taking_01a <- as.numeric(ds$empathy_8)
ds$perspective_taking_01 <- (8 - ds$perspective_taking_01a)

ds$perspective_taking_02 <- as.numeric(ds$empathy_9)

ds$perspective_taking_03 <- as.numeric(ds$empathy_10)

ds$perspective_taking_04a <- as.numeric(ds$empathy_11)
ds$perspective_taking_04 <- (8 - ds$perspective_taking_04a)

ds$perspective_taking_05 <- as.numeric(ds$empathy_12)

ds$perspective_taking_06 <- as.numeric(ds$empathy_13)

ds$perspective_taking_07 <- as.numeric(ds$empathy_14)

ds$perspective_taking <- (ds$perspective_taking_01+ds$perspective_taking_02+
                          ds$perspective_taking_03+ds$perspective_taking_04+
                          ds$perspective_taking_05+ds$perspective_taking_06+
                          ds$perspective_taking_07)/7

summary(ds$perspective_taking)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      2       4       4       4       5       6     635

Display code

ds %>% drop_na(perspective_taking)%>%
ggplot(aes(x = perspective_taking))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Perspective taking score", 
       y = "Frequency", 
       title = "Perspective taking (full sample)")+
  theme_bw()

Display code

ds %>% 
   drop_na(perspective_taking)%>%
ggplot(aes(x = perspective_taking))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "Perspective taking score", 
       y = "Frequency", 
       title = "Perspective taking by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Outgroup cooperation; hostility; perceived discrimination; willingness to fight outgroup

Not available for this dataset

Section 7. Correlation between ingroup/outgroup empathy/hostility items

Display code

ds3 <- cbind.data.frame(ds$empathic_concern, ds$perspective_taking)
ds3 <- na.omit(ds3)

names(ds3) <- sub('ds\\$', '', names(ds3))

mtx <- cor(ds3[, c(1:2)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 8. Regression with new variables

This set of regression models have additional predictor variables compared to the last set. It adds empathic concern and perspective taking as additional predictors.

Display code

## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	Endorse_BCL	Experience_BCL	Endorse_BBL	Experience_BBL
	(1)	(2)	(3)	(4)

IG_Fusion	0.041	0.140^**	0.017	0.110^*
	(0.049)	(0.050)	(0.047)	(0.046)

IG_Identification	0.440^***	0.310^***	0.580^***	0.450^***
	(0.047)	(0.048)	(0.045)	(0.045)

OG_Bonds	-0.023	0.078^***	-0.043^*	0.027
	(0.020)	(0.021)	(0.020)	(0.019)

empathic_concern	0.150^*	0.210^**	0.150^*	0.069
	(0.074)	(0.075)	(0.071)	(0.070)

perspective_taking	0.390^***	0.110	0.330^***	0.210^***
	(0.059)	(0.060)	(0.057)	(0.056)

Age	-0.002	-0.005	-0.007	-0.003
	(0.005)	(0.005)	(0.005)	(0.005)

Female	0.099	0.024	0.074	0.010
	(0.079)	(0.080)	(0.076)	(0.074)

Married	0.006	0.067	0.039	0.060
	(0.098)	(0.099)	(0.094)	(0.092)

Wealth_level	0.002	0.003	0.001	0.001
	(0.002)	(0.002)	(0.002)	(0.002)

Constant	0.140	0.590	0.230	0.630
	(0.380)	(0.380)	(0.360)	(0.360)


Observations	1,166	1,161	1,167	1,159
R²	0.460	0.360	0.560	0.500
Adjusted R²	0.460	0.350	0.560	0.500
Residual Std. Error	1.300 (df = 1156)	1.300 (df = 1151)	1.200 (df = 1157)	1.200 (df = 1149)
F Statistic	110.000^*** (df = 9; 1156)	71.000^*** (df = 9; 1151)	163.000^*** (df = 9; 1157)	130.000^*** (df = 9; 1149)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 9. Alternative regression models with different outcomes

This set of regression models have different outcome variables which are: outgroup cooperation, outgroup hostility, and willingness to fight outgroup. None of these DVs are available for this dataset.

Section 10. Religious freedom measures

Social perception of religious freedom

Display code

ds$religious_freedom_per_01 <- as.numeric(ds$FORB1)

ds$religious_freedom_per_02a <- as.numeric(ds$FORB2)
ds$religious_freedom_per_02 <- (8 - ds$religious_freedom_per_02a)

ds$religious_freedom_per_03 <- as.numeric(ds$FORB3)
ds$religious_freedom_per_04 <- as.numeric(ds$FORB4)
ds$religious_freedom_per_05 <- as.numeric(ds$FORB5)
ds$religious_freedom_per_06 <- as.numeric(ds$FORB6)
ds$religious_freedom_per_07 <- as.numeric(ds$FORB7)
ds$religious_freedom_per_08 <- as.numeric(ds$FORB8)
ds$religious_freedom_per_09 <- as.numeric(ds$FORB9)

ds$sprf <- (ds$religious_freedom_per_01+ds$religious_freedom_per_02+
            ds$religious_freedom_per_03+ds$religious_freedom_per_04+
            ds$religious_freedom_per_05+ds$religious_freedom_per_06+
            ds$religious_freedom_per_07+ds$religious_freedom_per_08+
            ds$religious_freedom_per_09)/9

summary(ds$sprf)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       7       6       8      10     144

Display code

ds %>% drop_na(sprf)%>%
ggplot(aes(x = sprf))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 6.00", 
                 xintercept = 6.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "SPRF score", 
       y = "Frequency", 
       title = "Social perception of religious freedom (full sample)")+
  theme_bw()

Display code

ds %>% 
   drop_na(sprf)%>%
ggplot(aes(x = sprf))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "SPRF score", 
       y = "Frequency", 
       title = "Social perception of religious freedom by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Experience of religious freedom

Display code

ds$religious_freedom_exp_01a <- as.numeric(ds$REL.PERS_1)
ds$religious_freedom_exp_01 <- (8 - ds$religious_freedom_exp_01a)

ds$religious_freedom_exp_02a <- as.numeric(ds$REL.PERS_2)
ds$religious_freedom_exp_02 <- (8 - ds$religious_freedom_exp_02a)

ds$religious_freedom_exp_03a <- as.numeric(ds$REL.PERS_3)
ds$religious_freedom_exp_03 <- (8 - ds$religious_freedom_exp_03a)

ds$religious_freedom_exp_04a <- as.numeric(ds$REL.PERS_4)
ds$religious_freedom_exp_04 <- (8 - ds$religious_freedom_exp_04a)

ds$religious_freedom_exp_05a <- as.numeric(ds$REL.PERS_5)
ds$religious_freedom_exp_05 <- (8 - ds$religious_freedom_exp_05a)

ds$religious_freedom_exp_06a <- as.numeric(ds$REL.PERS_6)
ds$religious_freedom_exp_06 <- (8 - ds$religious_freedom_exp_06a)

ds$religious_freedom_exp_07a <- as.numeric(ds$REL.PERS7)
ds$religious_freedom_exp_07 <- (8 - ds$religious_freedom_exp_07a)

ds$religious_freedom_exp_08a <- as.numeric(ds$REL.PERS8)
ds$religious_freedom_exp_08 <- (8 - ds$religious_freedom_exp_08a)

ds$religious_freedom_exp_09a <- as.numeric(ds$REL.PERS9)
ds$religious_freedom_exp_09 <- (9 - ds$religious_freedom_exp_09a)

ds$religious_freedom_exp_10a <- as.numeric(ds$REL.PERS10)
ds$religious_freedom_exp_10 <- (10 - ds$religious_freedom_exp_10a)

ds$religious_freedom_exp_11a <- as.numeric(ds$REL.PERS11)
ds$religious_freedom_exp_11 <- (11 - ds$religious_freedom_exp_11a)

ds$exp_religious_freedom <- (ds$religious_freedom_exp_01+ds$religious_freedom_exp_02+
                             ds$religious_freedom_exp_03+ds$religious_freedom_exp_04+
                             ds$religious_freedom_exp_05+ds$religious_freedom_exp_06+
                             ds$religious_freedom_exp_07+ds$religious_freedom_exp_08+
                             ds$religious_freedom_exp_09+ds$religious_freedom_exp_10+
                             ds$religious_freedom_exp_11)/11

summary(ds$exp_religious_freedom)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      4       5       6       6       7       8     635

Display code

ds %>% drop_na(exp_religious_freedom)%>%
ggplot(aes(x = exp_religious_freedom))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 6.00", 
                 xintercept = 6.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Experience of religious freedom score", 
       y = "Frequency", 
       title = "Experience of religious freedom (full sample)")+
  theme_bw()

Display code

ds %>% 
   drop_na(exp_religious_freedom)%>%
ggplot(aes(x = exp_religious_freedom))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "Experience of religious freedom score", 
       y = "Frequency", 
       title = "Experience of religious freedom by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Experience of religious freedom based on religious affiliation

Display code

ds$Religion <- ds$religion2

ds2 <- ds %>% 
drop_na(Religion, exp_religious_freedom)%>%
 group_by(Religion) %>% 
 summarise(Exp_religious_freedom=mean(exp_religious_freedom),.groups = 'drop') %>%
  as.data.frame()

ds2

                  Religion Exp_religious_freedom
1                 Agnostic                   5.5
2                  Atheist                   4.7
3      Christian: Catholic                   5.9
4         Christian: Other                   6.0
5    Christian: Protestant                   6.1
6                    Hindu                   5.7
7                   Jewish                   6.3
8             Muslim: Shia                   5.7
9            Muslim: Sunni                   5.8
10                    None                   6.4
11                   Other                   5.7
12 Spiritual not Religious                   5.8

Display code

ds %>% drop_na(religion, exp_religious_freedom)%>%
ggplot(aes(y = exp_religious_freedom, 
           x = Religion))+
  geom_boxplot()+
  labs(x = "", 
       y = "Experience of religious freedom score", 
       title = "Experience of religious freedom (full sample)")+
  coord_flip()+
  theme_bw()

Display code

ds %>% drop_na(religion, exp_religious_freedom)%>%
ggplot(aes(y = exp_religious_freedom, 
           x = Religion))+
  geom_boxplot()+
  labs(x = "", 
       y = "Experience of religious freedom score", 
       title = "Experience of religious freedom by country and religion")+
  coord_flip()+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Section 11. Positive/Negative contact with outgroup

Not available for this dataset

Section 12. Inter-group marriage

Not available for this dataset

Section 13. Outgroup affect

Not available for this dataset

Section 14. Updated regression models

This set of regression models have additional predictors: frequency of positive and negative contact, and interaction between perspective taking and perceived history of discrimination. None of these variables were available for this dataset.

Section 15. Updated regression models (alternate DVs)

Same predictors as section 14 above, but with different outcomes. This set of regression models has an additional outcome variable: affect towards outgroup. None of these variables were available for this dataset.

Section 16. Imagistic Experience: Factor analysis

Display code

ds$event_negative_affect <- as.numeric(ds$NEG.AFF)
table(ds$event_negative_affect)


  1   2   3   4   5   6   7 
554 199 179 184 195 188 298

Display code

ds$event_positive_affect_01 <- (8 - ds$event_negative_affect)
table(ds$event_positive_affect_01)


  1   2   3   4   5   6   7 
298 188 195 184 179 199 554

Display code

ds$event_positive_affect_02 <- as.numeric(ds$POS.AFF)
table(ds$event_positive_affect_02)


  1   2   3   4   5   6   7 
326 150 134 142 209 271 573

Display code

ds$event_positive_affect <- ds$event_positive_affect_02

ds$event_episodic_recall_01 <- as.numeric(ds$VIV.MEM)
table(ds$event_episodic_recall_01)


  1   2   3   4   5   6   7 
216 141 137 197 261 304 547

Display code

ds$event_episodic_recall_02 <- as.numeric(ds$REL.MEM)
table(ds$event_episodic_recall_02)


  1   2   3   4   5   6   7 
253 133 134 188 264 266 560

Display code

ds$event_shared_perception_01 <- as.numeric(ds$SHRD.EXP)
table(ds$event_shared_perception_01)


  1   2   3   4   5   6   7 
229 146 154 217 296 286 475

Display code

ds$event_shared_perception_02 <- as.numeric(ds$SHRD.MEM)
table(ds$event_shared_perception_02)


  1   2   3   4   5   6   7 
250 146 159 215 308 315 411

Display code

ds$event_event_reflection_01 <- as.numeric(ds$REFL1)
table(ds$event_event_reflection_01)


  1   2   3   4   5   6   7 
211 156 166 228 302 305 445

Display code

ds$event_event_reflection_02 <- as.numeric(ds$REFL2)
table(ds$event_event_reflection_02)


  1   2   3   4   5   6   7 
204 152 191 289 338 284 343

Display code

ds$event_transformative_indiv_01 <- as.numeric(ds$PERSIG)
table(ds$event_transformative_indiv_01)


  1   2   3   4   5   6   7 
233 129 130 160 235 279 637

Display code

ds$event_transformative_indiv_02 <- as.numeric(ds$TRNSFM)
table(ds$event_transformative_indiv_02)


  1   2   3   4   5   6   7 
246 157 176 258 262 274 431

Display code

ds$event_transformative_group_01 <- as.numeric(ds$IMPTGRP1)
table(ds$event_transformative_group_01)


  1   2   3   4   5   6   7 
261 128 115 173 266 290 577

Display code

ds$event_transformative_group_02 <- as.numeric(ds$IMPTGRP2)
table(ds$event_transformative_group_02)


  1   2   3   4   5   6   7 
297 149 195 257 281 282 338

Display code

imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)

imagistic <- na.omit(imagistic)

names(imagistic) <- sub('ds\\$event\\_', '', names(imagistic))

mtx <- cor(imagistic[, c(1:12)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Imagistic experience: Factor analysis

Display code

## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(imagistic))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(imagistic))
Overall MSA =  0.95
MSA for each item = 
        negative_affect         positive_affect      episodic_recall_01 
                   0.40                    0.91                    0.95 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.96                    0.96                    0.96 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.97                    0.96                    0.96 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.96                    0.95                    0.95

Display code

## Bartlett's test of sphericity
cortest.bartlett(imagistic)

$chisq
[1] 12202

$p.value
[1] 0

$df
[1] 66

Display code

## Parallel test
# parallel <- fa.parallel(imagistic)

Scree plot suggests there needs to be two factors, but theoretically we want six factors. Here is how the three factors break down:

Display code

fit02 <- factanal(imagistic, 2, rotation="promax")
fit02


Call:
factanal(x = imagistic, factors = 2, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                   0.51                    0.36                    0.33 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.35                    0.46                    0.41 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.40                    0.42                    0.34 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.53                    0.30                    0.63 

Loadings:
                        Factor1 Factor2
negative_affect                  0.708 
positive_affect          0.600  -0.460 
episodic_recall_01       0.815         
episodic_recall_02       0.801         
shared_perception_01     0.740   0.124 
shared_perception_02     0.769         
event_reflection_01      0.782         
event_reflection_02      0.765         
transformative_indiv_01  0.800         
transformative_indiv_02  0.690         
transformative_group_01  0.813  -0.113 
transformative_group_02  0.614         

               Factor1 Factor2
SS loadings       6.16   0.765
Proportion Var    0.51   0.064
Cumulative Var    0.51   0.577

Factor Correlations:
        Factor1 Factor2
Factor1   1.000   0.117
Factor2   0.117   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 405 on 43 degrees of freedom.
The p-value is 0.0000000000000000000000000000000000000000000000000000000000022

The factor structure above makes sense but is a little incoherent. Here is what happens if we “force” six factors:

Display code

fit06 <- factanal(imagistic, 6, rotation="promax")
fit06


Call:
factanal(x = imagistic, factors = 6, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                  0.610                   0.291                   0.304 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                  0.046                   0.393                   0.344 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                  0.402                   0.005                   0.295 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                  0.404                   0.286                   0.429 

Loadings:
                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
negative_affect                                  0.654                 
positive_affect          0.153   0.147          -0.585   0.176         
episodic_recall_01       0.571   0.312                  -0.127         
episodic_recall_02       0.245                                   0.794 
shared_perception_01     0.800                   0.118                 
shared_perception_02     0.774  -0.168                   0.125         
event_reflection_01      0.411   0.332                                 
event_reflection_02                      0.938                         
transformative_indiv_01  0.466   0.524                  -0.111         
transformative_indiv_02          0.588                   0.350         
transformative_group_01  0.607                  -0.163   0.147         
transformative_group_02  0.139                           0.650         

               Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
SS loadings        2.4   0.894   0.893   0.819   0.660   0.649
Proportion Var     0.2   0.075   0.074   0.068   0.055   0.054
Cumulative Var     0.2   0.278   0.352   0.421   0.476   0.530

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
Factor1   1.000   0.519   0.654   0.208  -0.553  -0.708
Factor2   0.519   1.000   0.678   0.294  -0.398  -0.671
Factor3   0.654   0.678   1.000   0.321  -0.677  -0.757
Factor4   0.208   0.294   0.321   1.000  -0.149  -0.346
Factor5  -0.553  -0.398  -0.677  -0.149   1.000   0.580
Factor6  -0.708  -0.671  -0.757  -0.346   0.580   1.000

Test of the hypothesis that 6 factors are sufficient.
The chi square statistic is 13 on 9 degrees of freedom.
The p-value is 0.16

FA solution still seems incoherent.

Imagistic experience: Reliability

Display code

imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)


## Reliability:
alph01 <- psych::alpha(imagistic)

Some items ( ds$event_negative_affect ) were negatively correlated with the total scale and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option

Display code

alph01


Reliability analysis   
Call: psych::alpha(x = imagistic)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.91      0.91    0.92      0.46  10 0.003  4.5 1.5     0.53

    95% confidence boundaries 
         lower alpha upper
Feldt      0.9  0.91  0.91
Duhachek   0.9  0.91  0.91

 Reliability if an item is dropped:
                                 raw_alpha std.alpha G6(smc) average_r  S/N
ds$event_negative_affect              0.93      0.93    0.93      0.55 13.4
ds$event_positive_affect              0.90      0.91    0.91      0.46  9.6
ds$event_episodic_recall_01           0.89      0.90    0.91      0.44  8.6
ds$event_episodic_recall_02           0.89      0.90    0.91      0.44  8.7
ds$event_shared_perception_01         0.90      0.90    0.91      0.45  9.0
ds$event_shared_perception_02         0.90      0.90    0.91      0.44  8.8
ds$event_event_reflection_01          0.89      0.90    0.91      0.44  8.8
ds$event_event_reflection_02          0.90      0.90    0.91      0.45  8.8
ds$event_transformative_indiv_01      0.89      0.90    0.91      0.44  8.7
ds$event_transformative_indiv_02      0.90      0.90    0.91      0.45  9.1
ds$event_transformative_group_01      0.89      0.90    0.91      0.44  8.6
ds$event_transformative_group_02      0.90      0.90    0.92      0.46  9.5
                                 alpha se  var.r med.r
ds$event_negative_affect           0.0024 0.0058  0.56
ds$event_positive_affect           0.0032 0.0488  0.55
ds$event_episodic_recall_01        0.0035 0.0514  0.52
ds$event_episodic_recall_02        0.0035 0.0519  0.53
ds$event_shared_perception_01      0.0034 0.0559  0.53
ds$event_shared_perception_02      0.0034 0.0541  0.53
ds$event_event_reflection_01       0.0035 0.0544  0.52
ds$event_event_reflection_02       0.0034 0.0549  0.53
ds$event_transformative_indiv_01   0.0035 0.0514  0.52
ds$event_transformative_indiv_02   0.0033 0.0565  0.55
ds$event_transformative_group_01   0.0035 0.0507  0.52
ds$event_transformative_group_02   0.0032 0.0572  0.56

 Item statistics 
                                    n raw.r std.r r.cor r.drop mean  sd
ds$event_negative_affect         1797  0.12  0.11 0.015 -0.013  3.6 2.3
ds$event_positive_affect         1805  0.66  0.65 0.620  0.573  4.6 2.3
ds$event_episodic_recall_01      1803  0.81  0.81 0.803  0.767  4.8 2.1
ds$event_episodic_recall_02      1798  0.81  0.80 0.791  0.756  4.7 2.2
ds$event_shared_perception_01    1803  0.76  0.75 0.723  0.692  4.6 2.1
ds$event_shared_perception_02    1804  0.78  0.78 0.761  0.728  4.5 2.1
ds$event_event_reflection_01     1813  0.79  0.79 0.769  0.738  4.6 2.0
ds$event_event_reflection_02     1801  0.78  0.78 0.755  0.725  4.5 1.9
ds$event_transformative_indiv_01 1803  0.81  0.81 0.794  0.759  4.9 2.2
ds$event_transformative_indiv_02 1804  0.73  0.73 0.693  0.664  4.5 2.1
ds$event_transformative_group_01 1810  0.82  0.82 0.813  0.777  4.8 2.2
ds$event_transformative_group_02 1799  0.66  0.66 0.616  0.588  4.3 2.1

Non missing response frequency for each item
                                    1    2    3    4    5    6    7 miss
ds$event_negative_affect         0.31 0.11 0.10 0.10 0.11 0.10 0.17 0.05
ds$event_positive_affect         0.18 0.08 0.07 0.08 0.12 0.15 0.32 0.04
ds$event_episodic_recall_01      0.12 0.08 0.08 0.11 0.14 0.17 0.30 0.05
ds$event_episodic_recall_02      0.14 0.07 0.07 0.10 0.15 0.15 0.31 0.05
ds$event_shared_perception_01    0.13 0.08 0.09 0.12 0.16 0.16 0.26 0.05
ds$event_shared_perception_02    0.14 0.08 0.09 0.12 0.17 0.17 0.23 0.05
ds$event_event_reflection_01     0.12 0.09 0.09 0.13 0.17 0.17 0.25 0.04
ds$event_event_reflection_02     0.11 0.08 0.11 0.16 0.19 0.16 0.19 0.05
ds$event_transformative_indiv_01 0.13 0.07 0.07 0.09 0.13 0.15 0.35 0.05
ds$event_transformative_indiv_02 0.14 0.09 0.10 0.14 0.15 0.15 0.24 0.05
ds$event_transformative_group_01 0.14 0.07 0.06 0.10 0.15 0.16 0.32 0.04
ds$event_transformative_group_02 0.17 0.08 0.11 0.14 0.16 0.16 0.19 0.05

Imagistic experience: Visualization

If we conceptualize “imagistic experience” as a single measure that is the sum of the six scales, here is the visualization of the measure:

Display code

ds$imagistic <- (((ds$event_positive_affect)+
                 (ds$event_negative_affect)+ 
                ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)+
                ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)+
                ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)+
                ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)+
                ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)))

summary(ds$imagistic)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      7      26      34      31      39      49     157

Display code

ds %>% drop_na(imagistic)%>%
ggplot(aes(x = imagistic))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 32.00", 
                 xintercept = 32.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Imagistic experience score", 
       y = "Frequency", 
       title = "Imagistic experience (full sample)")+
  theme_bw()

Display code

ds %>% 
  drop_na(imagistic)%>%
ggplot(aes(x = imagistic))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  labs(x = "Imagistic experience score", 
       y = "Frequency", 
       title = "Imagistic experience by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Section 17. Imagistic experience: sub-scales

Below are the individual Cronbach’s alpha for each of the seven subscales, followed by their individual visualizations, followed by the correlation between the seven sub-scales

Negative affect about event

Display code

## Positive affect:
negative_affect <- cbind.data.frame(ds$event_negative_affect)
negative_affect <- na.omit(negative_affect)

## Visualization:
ds$event_negative_affect <- (ds$event_negative_affect)
summary(ds$event_negative_affect)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       1       3       4       6       7      93

Display code

ds %>% drop_na(event_negative_affect)%>%
ggplot(aes(x = event_negative_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Negative affect about event", 
       y = "Frequency", 
       title = "Negative affect about event (full sample)")+
  theme_bw()

Display code

ds %>% 
  drop_na(event_negative_affect)%>%
ggplot(aes(x = event_negative_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Negative affect about event", 
       y = "Frequency", 
       title = "Negative affect about event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Positive affect about event

Display code

## Positive affect:
positive_affect <- cbind.data.frame(ds$event_positive_affect)
positive_affect <- na.omit(positive_affect)

## Visualization:
ds$event_positive_affect <- (ds$event_positive_affect)
summary(ds$event_positive_affect)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       2       5       5       7       7      85

Display code

ds %>% drop_na(event_positive_affect)%>%
ggplot(aes(x = event_positive_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Positive affect about event", 
       y = "Frequency", 
       title = "Positive affect about event: Gambia")+
  theme_bw()

Display code

ds %>% 
  drop_na(event_positive_affect)%>%
ggplot(aes(x = event_positive_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Positive affect about event", 
       y = "Frequency", 
       title = "Positive affect about event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Episodic recall of event

Display code

## Episodic recall:
episodic_recall <- cbind.data.frame(ds$event_episodic_recall_01, ds$event_episodic_recall_02)
episodic_recall <- na.omit(episodic_recall)

names(episodic_recall) <- sub('ds\\$event\\_', '', names(episodic_recall))

mtx <- cor(episodic_recall[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(episodic_recall)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean sd median_r
      0.83      0.83    0.71      0.71 4.8 0.0081  4.8  2     0.71

Display code

## Visualization:
ds$event_episodic_recall <- ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)
summary(ds$event_episodic_recall)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     111

Display code

ds %>% drop_na(event_episodic_recall)%>%
ggplot(aes(x = event_episodic_recall))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Episodic recall of event", 
       y = "Frequency", 
       title = "Episodic recall of event (full sample)")+
  theme_bw()

Display code

ds %>% drop_na(event_episodic_recall)%>%
ggplot(aes(x = event_episodic_recall))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Episodic recall of event", 
       y = "Frequency", 
       title = "Episodic recall of event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Shared perception of event

Display code

## Shared perception:
shared_perception <- cbind.data.frame(ds$event_shared_perception_01, ds$event_shared_perception_02)
shared_perception <- na.omit(shared_perception)

names(shared_perception) <- sub('ds\\$event\\_', '', names(shared_perception))

mtx <- cor(shared_perception[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(shared_perception)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.77      0.77    0.62      0.62 3.3 0.011  4.6 1.9     0.62

Display code

## Visualization:
ds$event_shared_perception <- ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)
summary(ds$event_shared_perception)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     106

Display code

ds %>% drop_na(event_shared_perception)%>%
ggplot(aes(x = event_shared_perception))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Shared perception of event", 
       y = "Frequency", 
       title = "Shared perception of event (full sample)")+
  theme_bw()

Display code

ds %>% drop_na(event_shared_perception)%>%
ggplot(aes(x = event_shared_perception))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Shared perception of event", 
       y = "Frequency", 
       title = "Shared perception of event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Reflection of event

Display code

## Reflection:
event_reflection <- cbind.data.frame(ds$event_event_reflection_01, ds$event_event_reflection_02)
event_reflection <- na.omit(event_reflection)

names(event_reflection) <- sub('ds\\$event\\_', '', names(event_reflection))

mtx <- cor(event_reflection[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(event_reflection)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.76      0.76    0.61      0.61 3.2 0.011  4.6 1.8     0.61

Display code

## Visualization:
ds$event_event_reflection <- ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)
summary(ds$event_event_reflection)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     103

Display code

ds %>% drop_na(event_event_reflection)%>%
ggplot(aes(x = event_event_reflection))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Reflection of event", 
       y = "Frequency", 
       title = "Reflection of event (full sample)")+
  theme_bw()

Display code

ds %>% drop_na(event_event_reflection)%>%
ggplot(aes(x = event_event_reflection))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Reflection of event", 
       y = "Frequency", 
       title = "Reflection of event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Transformative event for individual

Display code

## Reflection:
event_transformative_indiv <- cbind.data.frame(ds$event_transformative_indiv_01, ds$event_transformative_indiv_02)

event_transformative_indiv <- na.omit(event_transformative_indiv)

names(event_transformative_indiv) <- sub('ds\\$event\\_', '', names(event_transformative_indiv))

mtx <- cor(event_transformative_indiv[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
psych::alpha(event_transformative_indiv)


Reliability analysis   
Call: psych::alpha(x = event_transformative_indiv)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.73      0.73    0.57      0.57 2.7 0.013  4.7 1.9     0.57

    95% confidence boundaries 
         lower alpha upper
Feldt      0.7  0.73  0.75
Duhachek   0.7  0.73  0.75

 Reliability if an item is dropped:
                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
transformative_indiv_01      0.59      0.57    0.33      0.57 1.3       NA
transformative_indiv_02      0.55      0.57    0.33      0.57 1.3       NA
                        var.r med.r
transformative_indiv_01     0  0.57
transformative_indiv_02     0  0.57

 Item statistics 
                           n raw.r std.r r.cor r.drop mean  sd
transformative_indiv_01 1779  0.89  0.89  0.67   0.57  4.9 2.1
transformative_indiv_02 1779  0.88  0.89  0.67   0.57  4.5 2.1

Non missing response frequency for each item
                           1    2    3    4    5    6    7 miss
transformative_indiv_01 0.13 0.07 0.07 0.09 0.13 0.16 0.36    0
transformative_indiv_02 0.13 0.09 0.10 0.14 0.15 0.15 0.24    0

Display code

## Visualization:
ds$event_transformative_indiv <- ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)
summary(ds$event_transformative_indiv)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     111

Display code

ds %>% drop_na(event_transformative_indiv)%>%
ggplot(aes(x = event_transformative_indiv))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for individual (full sample)")+
  theme_bw()

Display code

ds %>% drop_na(event_transformative_indiv)%>%
ggplot(aes(x = event_transformative_indiv))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for individual by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Transformative event for group

Display code

event_transformative_group <- cbind.data.frame(ds$event_transformative_group_01, ds$event_transformative_group_02)
event_transformative_group <- na.omit(event_transformative_group)

names(event_transformative_group) <- sub('ds\\$event\\_', '', names(event_transformative_group))

mtx <- cor(event_transformative_group[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(event_transformative_group)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.68      0.68    0.52      0.52 2.2 0.015  4.5 1.8     0.52

Display code

## Visualization:
ds$event_transformative_group <- ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)
summary(ds$event_transformative_group)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     105

Display code

ds %>% drop_na(event_transformative_group)%>%
ggplot(aes(x = event_transformative_group))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for group (full sample)")+
  theme_bw()

Display code

ds %>% drop_na(event_transformative_group)%>%
ggplot(aes(x = event_transformative_group))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for group by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Correlation between seven subscales

Display code

imagistic_subscales <- cbind.data.frame(ds$event_positive_affect, ds$event_negative_affect,
                                        ds$event_episodic_recall, 
                                        ds$event_shared_perception, ds$event_event_reflection, 
                                        ds$event_transformative_indiv, ds$event_transformative_group)


imagistic_subscales <- na.omit(imagistic_subscales)

names(imagistic_subscales) <- sub('ds\\$event\\_', '', names(imagistic_subscales))

mtx <- cor(imagistic_subscales[, c(1:6)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 18. Regression models: Relationship between imagistic experience and IG/OG fusion/identification

Display code

## Four regression models predicting IG/OG Fusion/Identification:

lm01 <- lm(IG_Fusion~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+Wealth_level,
           data = ds)

lm02 <- lm(IG_Identification~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+Wealth_level,
           data = ds)

lm03 <- lm(OG_Bonds~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+Wealth_level,
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	IG_Fusion	IG_Identification	OG_Bonds
	(1)	(2)	(3)

event_positive_affect	0.096^***	0.065^***	0.130^***
	(0.017)	(0.017)	(0.032)

event_negative_affect	0.004	-0.041^**	0.220^***
	(0.014)	(0.014)	(0.026)

event_episodic_recall	0.210^***	0.300^***	-0.250^***
	(0.025)	(0.026)	(0.049)

event_shared_perception	0.200^***	0.240^***	-0.032
	(0.025)	(0.025)	(0.046)

event_event_reflection	0.180^***	0.140^***	0.060
	(0.027)	(0.028)	(0.052)

event_transformative_indiv	0.180^***	0.200^***	0.037
	(0.026)	(0.026)	(0.049)

event_transformative_group	0.072^**	0.040	0.100^*
	(0.025)	(0.025)	(0.048)

Age	-0.002	-0.005	0.005
	(0.003)	(0.003)	(0.007)

Female	-0.025	0.040	-0.330^**
	(0.058)	(0.058)	(0.110)

Married	0.057	-0.067	0.140
	(0.073)	(0.073)	(0.140)

Wealth_level	-0.0003	0.0003	0.007^**
	(0.001)	(0.001)	(0.003)

Constant	0.460^**	0.520^***	2.000^***
	(0.150)	(0.150)	(0.300)


Observations	1,697	1,696	1,202
R²	0.640	0.670	0.099
Adjusted R²	0.630	0.670	0.091
Residual Std. Error	1.100 (df = 1685)	1.200 (df = 1684)	1.800 (df = 1190)
F Statistic	266.000^*** (df = 11; 1685)	312.000^*** (df = 11; 1684)	12.000^*** (df = 11; 1190)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Display code

## Save the latest version of data:
# fwrite(ds, file = "~/Desktop/oxford/data/BCL/BCL01.csv")

Section 1: Data Cleaning and Exploratory Data Visualization

Variable: Country / Language

Variable: Survey duration

Variable: Age

Variable: Gender

Variable: Province

Variable: Relative wealth status

Variable: Socio-economic status

Variable: Income

Variable: Nature of employment

Variable: Religious affiliation

Variable: Marital status

Variable: Number of children

Variable: Ethnicity

Variable: Education

Section 2. Factor Analysis: Group fusion/identification

Kaiser-Meyer-Olkin (KMO) test of factorability

Bartlett’s test of sphericity

Parallel test

Factor analysis: Two factor model with promax rotation

Three factor model

Four factor model

Section 3. Factor Analysis: Group fusion/identification (Version 2)

Two factor model:

Three factor model

Four factor model

Reliability Scale 1: Ingroup fusion

Reliability Scale 2: Ingroup identification

Reliability Scale 3a: Outgroup fusion/identification

Reliability Scale 3b: Outgroup fusion

Reliability Scale 3c: Outgroup identification

Section 4. Factor Analysis: BCL/BBL endorsement/experience

KMO and Bartlett’s test:

Parallel test

Three factor model

Four factor model

Reliability Scale 1: Endorsement of BCL

Reliability Scale 2: Endorsement of BBL

Reliability Scale 3: Experience of BCL

Reliability Scale 4: Experience of BBL

Section 5. Preliminary regression analysis

Research question: Do group fusion / identification predict BCL vs BBL?

Regression models

Section 6. Ingroup/outgroup empathy/hostility

Empathic concern

Perspective taking

Outgroup cooperation; hostility; perceived discrimination; willingness to fight outgroup

Section 7. Correlation between ingroup/outgroup empathy/hostility items

Section 8. Regression with new variables

Section 9. Alternative regression models with different outcomes

Section 10. Religious freedom measures

Social perception of religious freedom

Experience of religious freedom

Experience of religious freedom based on religious affiliation

Section 11. Positive/Negative contact with outgroup

Section 12. Inter-group marriage

Section 13. Outgroup affect

Section 14. Updated regression models

Section 15. Updated regression models (alternate DVs)

Section 16. Imagistic Experience: Factor analysis

Imagistic experience: Factor analysis

Imagistic experience: Reliability

Imagistic experience: Visualization

Section 17. Imagistic experience: sub-scales

Negative affect about event

Positive affect about event

Episodic recall of event

Shared perception of event

Reflection of event

Transformative event for individual

Transformative event for group

Correlation between seven subscales

Section 18. Regression models: Relationship between imagistic experience and IG/OG fusion/identification

Reliability
Scale 1: Ingroup fusion

Reliability
Scale 2: Ingroup identification

Reliability
Scale 3a: Outgroup fusion/identification

Reliability
Scale 3b: Outgroup fusion

Reliability
Scale 3c: Outgroup identification

Reliability
Scale 1: Endorsement of BCL

Reliability
Scale 2: Endorsement of BBL

Reliability
Scale 3: Experience of BCL

Reliability
Scale 4: Experience of BBL

Research question:
Do group fusion / identification predict BCL vs BBL?