Data Analysis and Visualization
Cross-country Data

Author

Gagan Atreya

Published

September 21, 2023

Display code
rm(list=ls())
options(digits = 2)

## Install "pacman" package if not installed
# (remove the # symbol from the line below):
# install.packages("pacman")

## Load R packages:
pacman::p_load(data.table, tidyverse, haven, labelled, vtable, 
               psych, scales, weights, clipr, forcats,
               stargazer, ggthemes, ggcharts, geomtextpath,
               corrplot, tm, gt, lme4, car, lmerTest, 
               ggeffects, magrittr, broom, broom.mixed)

## Import BCL dataset:
ds <- read_sav("~/Desktop/oxford/data/BCL/BCL Dataset.sav")
ds <- as.data.table(ds)
## Remove cases with no country:
ds <- ds[!is.na(ds$Country),]

ds$Country <- as.character(haven::as_factor(ds$Country))

ds <- ds[!(ds$Country == "USA"), ]
table(ds$Country)

  Bangladesh        Ghana       Malawi     Pakistan Sierra Leone     Tanzania 
         310          319          206          387           65          310 
      Uganda 
         293 
Display code
ds$CountryID <- ifelse(ds$Country == "Bangladesh", "01",
                ifelse(ds$Country == "Ghana", "02",
                ifelse(ds$Country == "Malawi", "03",
                ifelse(ds$Country == "Pakistan", "04",
                ifelse(ds$Country == "Sierra Leone", "05",
                ifelse(ds$Country == "Tanzania", "06",
                ifelse(ds$Country == "Uganda", "07", NA)))))))

Section 1: Data Cleaning and Exploratory Data Visualization

Variable: Country / Language

Display code
tbl01 <- table(ds$Country, ds$UserLanguage)
tbl02 <- addmargins(tbl01, c(1, 2))

## Table of user language by country:
tbl02
              
                 BN   EN   SW   UR  Sum
  Bangladesh     55  253    0    2  310
  Ghana           0  319    0    0  319
  Malawi          0  206    0    0  206
  Pakistan        0  375    0   12  387
  Sierra Leone    0   65    0    0   65
  Tanzania        0  251   59    0  310
  Uganda          0  293    0    0  293
  Sum            55 1762   59   14 1890
Display code
## Sample size by country:

lp01 <- ds %>% 
  # drop_na(Country) %>%
  lollipop_chart(x = Country,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Sample size by country")+
  theme_bw()

lp01

Variable: Survey duration

Display code
ds$seconds <- as.numeric(ds$Duration__in_seconds_)
ds$minutes <- (ds$seconds/60)
ds$hours <- (ds$minutes/60)

summary(ds$seconds)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    472    1557    2231    3579    3221  120037 
Display code
summary(ds$minutes)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      8      26      37      60      54    2001 
Display code
summary(ds$hours)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       1       1       1      33 
Display code
ds %>% 
#  drop_na(Country)%>%
ggplot(aes(x = hours))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 200)+
  geom_textvline(label = "Mean = 1.00", 
                 xintercept = mean(ds$hours), 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Hours", 
       y = "Frequency", 
       title = "Survey duration (full sample)")+
  theme_bw()

Display code
ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = hours))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 100)+
  labs(x = "Hours", 
       y = "Frequency", 
       title = "Survey duration by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Display code
ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = hours, 
           y = Country, 
           color = Country))+
  geom_point(show.legend = FALSE)+
  labs(x = "Hours",
       title = "Survey duration by country")+
  scale_color_colorblind()+
  theme_bw()

Variable: Age

Display code
ds$age <- as.numeric(ds$age)

summary(ds$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     18      23      26      29      32      82       2 
Display code
ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = age, 
           y = Country))+
  geom_boxplot(show.legend = FALSE)+
  labs(x = "Age",
       title = "Age distribution by country")+
  scale_color_colorblind()+
  theme_bw()

Display code
ds %>% 
  # drop_na(Country)%>%
ggplot(aes(x = age, 
           y = Country, 
           color = Country))+
  geom_point(show.legend = FALSE)+
  labs(x = "Age",
       title = "Age distribution by country")+
  scale_color_colorblind()+
  theme_bw()

Variable: Gender

Display code
ds$gend1 <- as_factor(ds$gend)
ds$gend2 <- as_factor(ds$gend_6_TEXT)

ds$gend3 <- ifelse(ds$gend2=="Female", "Female",
            ifelse(ds$gend2=="IM FEMAIL", "Female",
            ifelse(ds$gend2=="Male", "Male",
            ifelse(ds$gend2=="male", "Male", ""))))

ds$gender <- ifelse(ds$gend1 == "Male", "Male",
             ifelse(ds$gend3 == "Male", "Male",
             ifelse(ds$gend1 == "Female", "Female",
             ifelse(ds$gend3 == "Female", "Female", NA))))

## Gender distribution (full sample):
lp02 <- ds %>% 
  drop_na(gender) %>%
lollipop_chart(x = gender,
               line_color = "black",
               point_color = "black")+
  labs(x = "Gender", 
       y = "Frequency",
       title = "Gender distribution (full sample)")+
  theme_bw()

lp02

Display code
## Gender distribution by country
barp01 <- ds %>% 
  drop_na(gender) %>%
  ggplot(aes(x = gender, 
             fill = gender))+
  geom_bar()+
  labs(x = "",
       y = "Frequency",
       title = "Gender distribution by country", 
       fill = "")+
  scale_fill_manual(values=c("green", "purple"))+
  facet_wrap(~Country, ncol = 1)+
  coord_flip()+
  theme_bw()+
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

barp01

Variable: Province

Not available for this dataset.

Variable: Relative wealth status

Display code
ds$wealth_level <- as.numeric(ds$wealth_4)
summary(ds$wealth_level)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      0      28      50      47      55     100       5 
Display code
ds %>% 
  drop_na(wealth_level, gender) %>%
  ggplot(aes(y = wealth_level, 
             x = Country))+
  geom_boxplot(fill = "white")+
  facet_wrap(~gender, 
             ncol = 2)+
  labs(y = "Self-reported relative wealth status", 
       x = "")+
  coord_flip()+
  theme_bw()

Variable: Socio-economic status

Not available for this dataset.

Variable: Income

This variable needs additional work. Each individual entry is a different character. See for example:

Display code
head(table(ds$income), n = 15)

                                     10 million shillings 
                          22                            1 
 48,000,000 Uganda Shillings                     5000 USD 
                           1                            1 
                     50000tk                    GH ₵78934 
                           1                            1 
                  PKR1500000                      Shs1.2m 
                           1                            1 
                           -                   &gt;100000 
                           1                            1 
              ¢1,000-¢60,000                        ¢1000 
                           1                            3 
                       ¢1200                      ¢190000 
                           1                            1 
                         ¢20 
                           1 

Variable: Nature of employment

Not available in this dataset. There is an “occupation” variable which needs additional work. See for example:

Display code
head(table(ds$occupation), n = 15)

                                                                                                                     
                                                                                                                  15 
                                                                                                      Am not working 
                                                                                                                   1 
                                                                                                            Employee 
                                                                                                                   1 
                                                                                        I`m a primary school teacher 
                                                                                                                   1 
                                                                                                                ছাত্র 
                                                                                                                   1 
                                                                                                                   2 
                                                                                                                   1 
                                                                                                  A digital marketer 
                                                                                                                   1 
A hustler with a high school education, I do any work which comes to me as long it does not involve using of skills. 
                                                                                                                   1 
                                                                                                          A MARKETER 
                                                                                                                   1 
                                                                           A statistician and operational researcher 
                                                                                                                   1 
                                                                                                           A student 
                                                                                                                   1 
                                                                                                           A teacher 
                                                                                                                   1 
                                                                                                         Account ant 
                                                                                                                   1 
                                                                                                         Accountancy 
                                                                                                                   1 
                                                                                                          Accountant 
                                                                                                                  32 

Variable: Religious affiliation

Display code
ds$religion2 <- as_factor(ds$religion)
ds$religion2 <- as.character(ds$religion2)

ds$religion2 <- str_replace_all(ds$religion2, " - ", ": ")

ds$religion2 <- ifelse(ds$religion2 == "Other (please specify)", "Other", 
                       ds$religion2)

## Religion distribution:

lp06 <- ds %>% 
  drop_na(religion2) %>%
lollipop_chart(x = religion2,
               line_color = "black",
               point_color = "black")+
  labs(x = "",
       y = "Frequency",
       title = "Religion distribution (full sample)")+
  theme_bw()

lp06

Display code
## Religion distribution by country:

tbl04 <- table(ds$religion2, ds$Country)
tbl05 <- addmargins(tbl04, c(1, 2))

## Table of user language by country:
tbl05
                         
                          Bangladesh Ghana Malawi Pakistan Sierra Leone
  Agnostic                         2     3      0        1            0
  Atheist                          0     4      1        0            0
  Buddhist                         1     0      0        0            0
  Christian: Catholic              6    52     46        0            8
  Christian: Other                 1   146    102        1           23
  Christian: Protestant            0    61     33        2            5
  Hindu                           21     0      0        3            0
  Jewish                           0     0      0        0            0
  Muslim: Shia                     9     2      0       35            1
  Muslim: Sunni                  262    31      7      336           22
  None                             2     6      0        0            3
  Other                            3     4      9        7            1
  Sikh                             0     0      0        0            0
  Spiritual not Religious          3    10      8        1            2
  Sum                            310   319    206      386           65
                         
                          Tanzania Uganda  Sum
  Agnostic                       0      0    6
  Atheist                        0      0    5
  Buddhist                       0      0    1
  Christian: Catholic           98     93  303
  Christian: Other              59     57  389
  Christian: Protestant         54     97  252
  Hindu                          1      3   28
  Jewish                         1      0    1
  Muslim: Shia                  22      8   77
  Muslim: Sunni                 64     27  749
  None                           1      1   13
  Other                          5      4   33
  Sikh                           1      0    1
  Spiritual not Religious        3      3   30
  Sum                          309    293 1888
Display code
ds$religion3 <- ifelse(ds$religion2=="Other (please specify)", "Other",
                ifelse(ds$religion2=="Atheist", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="Agnostic", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="Spiritual not Religious", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="None", "Atheist/Agnostic/None",
                ifelse(ds$religion2=="Sikh", "Other",
                ifelse(ds$religion2=="Jewish", "Other",
                ifelse(ds$religion2=="Buddhist", "Other",
                       ds$religion2))))))))

## Religion distribution by country:

lp07 <- ds %>% 
  drop_na(religion3) %>%
  group_by(religion3, Country) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(religion3, count)) + 
  geom_segment(aes(x=religion3, xend=religion3, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Religion distribution by country")+
    facet_wrap(vars(Country), nrow = 2)+
  coord_flip()+
  theme_bw()

lp07

Variable: Marital status

Display code
ds$marriage01 <- as.character(haven::as_factor(ds$marriage))

table(ds$marriage01)

                         Divorced In a relationship but not married 
                               12                               334 
                          Married            Other (please specify) 
                              577                                18 
                           Single                           Widowed 
                              932                                13 
Display code
ds$married <- ifelse(ds$marriage01 == "Single", "Unmarried",
              ifelse(ds$marriage01 == "Married", "Married", "Other"))

bp01 <- ds %>% drop_na(married) %>%
  ggplot(aes(x = fct_rev(fct_infreq(married))))+
  geom_bar(fill = "black")+
  labs(x = "",
       y = "Frequency",
       title = "Marital status")+
  coord_flip()+
  theme_bw()

# bp01

## Marital status (full sample):

lp04 <- ds %>% 
  drop_na(married) %>%
  group_by(married) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(married, count)) + 
  geom_segment(aes(x=married, xend=married, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Marital status (full sample)")+
  coord_flip()+
  theme_bw()

lp04

Display code
## Marital status by country:

lp05 <- ds %>% 
  drop_na(married) %>%
  group_by(married, Country) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(married, count)) + 
  geom_segment(aes(x=married, xend=married, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Marital status by country")+
    facet_wrap(vars(Country), nrow = 2)+
  coord_flip()+
  theme_bw()

lp05

Variable: Number of children

Display code
ds$children <- as.numeric(gsub("\\D", "", ds$children))

summary(ds$children)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    0.0     0.0     0.0     0.7     1.0     7.0      11 
Display code
ds %>% drop_na(children)%>%
ggplot(aes(x = children))+
  geom_bar(color = "black",
                 fill = "gray",
                 width = 0.75)+
  geom_textvline(label = "Mean = 0.7", 
                 xintercept = 0.7, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Number of children", 
       y = "Frequency", 
       title = "Number of children: Full sample")+
  theme_bw()

Display code
ds %>% drop_na(children)%>%
ggplot(aes(x = children))+
  geom_histogram(color = "black",
                 fill = "gray", 
                 bins = 10)+
  labs(x = "Number of children", 
       y = "Frequency", 
       title = "Number of children by country")+
    facet_wrap(vars(Country), nrow = 2)+
#  coord_flip()+
  theme_bw()

Variable: Ethnicity

This variable needs additional work. Each individual entry is a different character. See for example:

Display code
head(table(ds$ethnic), n = 15)

                                                                           
                                                                        27 
                                                                  Am black 
                                                                         1 
                                                                     Black 
                                                                         1 
'Race' and 'ethnicity' have been used as ways to describe human diversity. 
                                                                         1 
                                                                 1-2 hours 
                                                                         1 
                                                                         2 
                                                                         2 
                                                                         3 
                                                                         3 
                                                                         4 
                                                                         2 
                                                                        60 
                                                                         1 
                                                                       90% 
                                                                         1 
                                                   A black African, lhomwe 
                                                                         1 
                                                               A black guy 
                                                                         1 
                                                              A black man. 
                                                                         1 
                                                               A Christian 
                                                                         1 
                                                             A Fante/ Akan 
                                                                         1 

Variable: Education

Display code
ds$edu1 <- as_factor(ds$Education)

ds$edu2 <- ifelse(ds$edu1 == "No schooling completed", "No schooling",
           ifelse(ds$edu1 == "Nursery school to 8th grade", "8th grade or less",
           ifelse(ds$edu1 == "Some high school, no diploma", "Some high school",
           ifelse(ds$edu1 == "High school graduate, diploma or the equivalent (for example: GED)", 
                  "High school or equivalent",
           ifelse(ds$edu1 == "Some college credit, no degree", "Some college",
           ifelse(ds$edu1 == "Trade/technical/vocational training", "Vocational school",
           ifelse(ds$edu1 == "Associate degree", "Associate's",
           ifelse(ds$edu1 == "Bachelor’s degree", "Bachelor's",
           ifelse(ds$edu1 == "Master’s degree", "Master's",
           ifelse(ds$edu1 == "Professional degree", "Professional",
           ifelse(ds$edu1 == "Doctorate degree", "Doctorate", NA)))))))))))

ds$edu2 <- factor(ds$edu2, 
                  levels = c("No schooling", "8th grade or less", 
                             "Some high school", "High school or equivalent",
                             "Some college", "Vocational school",
                             "Associate's", "Bachelor's", "Master's",
                             "Professional", "Doctorate"))

# table(ds$edu2)

## Education distribution (full sample):

lp04 <- ds %>% 
  drop_na(edu2) %>%
  group_by(edu2) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(edu2, count)) + 
  geom_segment(aes(x=edu2, xend=edu2, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Education distribution (full sample)")+
  coord_flip()+
  theme_bw()

lp04

Display code
## Education distribution by country:

lp05 <- ds %>% 
  drop_na(edu2) %>%
  group_by(edu2, Country) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(edu2, count)) + 
  geom_segment(aes(x=edu2, xend=edu2, 
                   y=0, yend=count))+ 
  geom_point()+
  labs(x = "", 
       y = "Frequency", 
       title = "Education distribution by country")+
    facet_wrap(vars(Country), nrow = 2)+
  coord_flip()+
  theme_bw()

lp05

Section 2. Factor Analysis: Group fusion/identification

Display code
## Four ingroup fusion items:

# I have a deep emotional bond with the [ingroup].
ds$IGF01 <- as.numeric(ds$Fus.IG1)

# I am strong because of the [ingroup].
ds$IGF02 <- as.numeric(ds$Fus.IG2)

# I make the [ingroup] strong.  
ds$IGF03 <- as.numeric(ds$Fus.IG3)

# I am one with the [ingroup]
ds$IGF04 <- as.numeric(ds$Fus.IG4)

## Four outgroup fusion items:

# I have a deep emotional bond with the [outgroup].
ds$OGF01 <- as.numeric(ds$FUS.OG1)

# I am strong because of the [outgroup].
ds$OGF02 <- as.numeric(ds$FUS.OG2)

# I make the [outgroup] strong. 
ds$OGF03 <- as.numeric(ds$FUS.OG3)

# I am one with the [outgroup].
ds$OGF04 <- as.numeric(ds$FUS.OG4)


## Four ingroup identification items:

# I identify with the [ingroup].
ds$IGI01 <- as.numeric(ds$IDT.IG1)

# I have a lot in common with the [ingroup].
ds$IGI02 <- as.numeric(ds$IDT.IG2)

# I connect with the values of the [ingroup].
ds$IGI03 <- as.numeric(ds$IDT.IG3)

# I feel a sense of belonging with the [ingroup].
ds$IGI04 <- as.numeric(ds$IDT.IG4)


## Four outgroup identification items:

# I identify with the [outgroup].   
ds$OGI01 <- as.numeric(ds$IDNT.OG1)

# I have a lot in common with the [outgroup].   
ds$OGI02 <- as.numeric(ds$IDNT.OG2)

# I connect with the values of the [outgroup].  
ds$OGI03 <- as.numeric(ds$IDNT.OG3)

# I feel a sense of belonging with the [outgroup].  
ds$OGI04 <- as.numeric(ds$IDNT.OG4)
Display code
## Bonds dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04,
                          ds$IGI01, ds$IGI02, ds$IGI03, ds$IGI04,
                          ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04,
                          ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:16)])

Shorthand for item names:
IG = Ingroup, OG = Outgroup
F = Fusion, I = Identification
So for example:
IGF01 = “ingroup fusion: item 1”
OGI04 = “outgroup identification: item 4”, and so on

Display code
corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))+
    facet_wrap(vars(Country), nrow = 2)

NULL

The correlation plot above shows the correlation coefficients between the sixteen different items (eight for ingroup fusion/identification and eight for outgroup fusion/identification).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code
KMO(r=cor(bonds))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(bonds))
Overall MSA =  0.96
MSA for each item = 
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.97  0.97  0.97  0.97  0.97  0.97  0.96  0.96  0.97  0.97  0.96  0.96  0.96 
OGI02 OGI03 OGI04 
 0.96  0.96  0.96 

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.96), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code
cortest.bartlett(bonds)
$chisq
[1] 21035

$p.value
[1] 0

$df
[1] 120

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code
# # parallel <- fa.parallel(bonds)

Based on the scree plot, factor analysis with two factors is the most appropriate. We will proceed with promax rotation, which assumes that the items are inter-correlated (that is, not independent from each other).

Factor analysis: Two factor model with promax rotation

Display code
fit02 <- factanal(bonds, 2, rotation="promax")
fit02

Call:
factanal(x = bonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.20  0.23  0.37  0.30  0.19  0.19  0.14  0.14  0.26  0.39  0.23  0.21  0.24 
OGI02 OGI03 OGI04 
 0.24  0.26  0.21 

Loadings:
      Factor1 Factor2
IGF01  0.895         
IGF02  0.878         
IGF03  0.796   0.124 
IGF04  0.837         
IGI01  0.895         
IGI02  0.901         
IGI03  0.920         
IGI04  0.924         
OGF01          0.862 
OGF02          0.785 
OGF03          0.872 
OGF04          0.880 
OGI01          0.869 
OGI02          0.874 
OGI03          0.862 
OGI04          0.887 

               Factor1 Factor2
SS loadings       6.24    5.97
Proportion Var    0.39    0.37
Cumulative Var    0.39    0.76

Factor Correlations:
        Factor1 Factor2
Factor1  1.0000 -0.0801
Factor2 -0.0801  1.0000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 186 on 89 degrees of freedom.
The p-value is 0.0000000083 

We want to split the ingroup/outgroup fusion/identification items into distinct factors. We can examine what the three and four factor solutions looks like.

Three factor model

Display code
fit03 <- factanal(bonds, 3, rotation="promax")
fit03

Call:
factanal(x = bonds, factors = 3, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.20  0.23  0.30  0.29  0.19  0.18  0.14  0.14  0.25  0.39  0.20  0.21  0.24 
OGI02 OGI03 OGI04 
 0.23  0.25  0.21 

Loadings:
      Factor1 Factor2 Factor3
IGF01  0.903                 
IGF02  0.873                 
IGF03  0.666           0.308 
IGF04  0.767           0.165 
IGI01  0.871                 
IGI02  0.915                 
IGI03  0.916                 
IGI04  0.932                 
OGF01          0.881         
OGF02          0.775         
OGF03 -0.124   0.827   0.176 
OGF04          0.872         
OGI01          0.856         
OGI02          0.894         
OGI03  0.115   0.885         
OGI04          0.882         

               Factor1 Factor2 Factor3
SS loadings       5.96    5.92   0.174
Proportion Var    0.37    0.37   0.011
Cumulative Var    0.37    0.74   0.753

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1   1.000   0.108  -0.388
Factor2   0.108   1.000   0.211
Factor3  -0.388   0.211   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 120 on 75 degrees of freedom.
The p-value is 0.00081 

Four factor model

Display code
fit04 <- factanal(bonds, 4, rotation="promax")
fit04

Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.19  0.22  0.31  0.28  0.19  0.18  0.14  0.13  0.25  0.38  0.20  0.20  0.24 
OGI02 OGI03 OGI04 
 0.13  0.26  0.21 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01          0.912                 
IGF02          0.893                 
IGF03          0.675   0.277         
IGF04          0.750   0.187         
IGI01          0.870                 
IGI02          0.920                 
IGI03          0.912                 
IGI04          0.924                 
OGF01  0.881                         
OGF02  0.775                         
OGF03  0.840  -0.137   0.205         
OGF04  0.875                         
OGI01  0.858                         
OGI02  0.957                   0.343 
OGI03  0.894                   0.122 
OGI04  0.884                         

               Factor1 Factor2 Factor3 Factor4
SS loadings       6.09    5.97    0.17   0.146
Proportion Var    0.38    0.37    0.01   0.009
Cumulative Var    0.38    0.75    0.76   0.773

Factor Correlations:
        Factor1 Factor2  Factor3  Factor4
Factor1  1.0000  -0.113  0.07951  0.42514
Factor2 -0.1126   1.000 -0.24494  0.14138
Factor3  0.0795  -0.245  1.00000 -0.00254
Factor4  0.4251   0.141 -0.00254  1.00000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 71 on 62 degrees of freedom.
The p-value is 0.21 

Section 3. Factor Analysis: Group fusion/identification (Version 2)

The fourth item has been removed from each sub-scale:

Display code
## Remove 4th item from all subscales and retry:

## New dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03,
                          ds$IGI01, ds$IGI02, ds$IGI03,
                          ds$OGF01, ds$OGF02, ds$OGF03,
                          ds$OGI01, ds$OGI02, ds$OGI03)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:12)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
# # parallel <- fa.parallel(bonds)

Two factor model:

Display code
# Two factor model
fit04 <- factanal(bonds, 2, rotation="promax")
fit04

Call:
factanal(x = bonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.20  0.23  0.37  0.20  0.18  0.14  0.26  0.38  0.24  0.26  0.23  0.25 

Loadings:
      Factor1 Factor2
IGF01  0.896         
IGF02  0.878         
IGF03  0.791   0.122 
IGI01  0.894         
IGI02  0.902         
IGI03  0.921         
OGF01          0.862 
OGF02          0.787 
OGF03          0.865 
OGI01          0.860 
OGI02          0.877 
OGI03          0.865 

               Factor1 Factor2
SS loadings       4.67    4.39
Proportion Var    0.39    0.37
Cumulative Var    0.39    0.76

Factor Correlations:
        Factor1 Factor2
Factor1  1.0000  0.0562
Factor2  0.0562  1.0000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 98 on 43 degrees of freedom.
The p-value is 0.000004 

Three factor model

Display code
# Three factor model:
fit05 <- factanal(bonds, 3, rotation="promax")
fit05

Call:
factanal(x = bonds, factors = 3, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.20  0.23  0.33  0.20  0.18  0.14  0.25  0.38  0.20  0.26  0.22  0.24 

Loadings:
      Factor1 Factor2 Factor3
IGF01  0.899                 
IGF02  0.872                 
IGF03  0.734           0.230 
IGI01  0.884                 
IGI02  0.917                 
IGI03  0.915                 
OGF01          0.869         
OGF02          0.765         
OGF03 -0.120   0.809   0.214 
OGI01          0.835         
OGI02          0.889         
OGI03          0.894         

               Factor1 Factor2 Factor3
SS loadings       4.59    4.29    0.12
Proportion Var    0.38    0.36    0.01
Cumulative Var    0.38    0.74    0.75

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1  1.0000  0.0818  -0.230
Factor2  0.0818  1.0000   0.236
Factor3 -0.2304  0.2363   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 53 on 33 degrees of freedom.
The p-value is 0.013 

Four factor model

Display code
# Four factor model:
fit06 <- factanal(bonds, 4, rotation="promax")
fit06

Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.20  0.22  0.35  0.20  0.18  0.14  0.25  0.35  0.12  0.26  0.20  0.24 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01  0.902                         
IGF02  0.858                         
IGF03  0.774           0.111         
IGI01  0.896                         
IGI02  0.911                         
IGI03  0.937                         
OGF01          0.842                 
OGF02          0.667           0.224 
OGF03          0.909   0.367         
OGI01          0.814                 
OGI02          0.992          -0.194 
OGI03          0.915                 

               Factor1 Factor2 Factor3 Factor4
SS loadings       4.68    4.47   0.157    0.12
Proportion Var    0.39    0.37   0.013    0.01
Cumulative Var    0.39    0.76   0.776    0.79

Factor Correlations:
         Factor1 Factor2  Factor3 Factor4
Factor1  1.00000 -0.0599 -0.00647   0.243
Factor2 -0.05990  1.0000  0.08120   0.533
Factor3 -0.00647  0.0812  1.00000  -0.171
Factor4  0.24262  0.5327 -0.17078   1.000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 30 on 24 degrees of freedom.
The p-value is 0.17 

FA basically suggests that ingroup / outgroup fusion/identification are not distinct factors. Only real split is between ingroup (identification + fusion) as one factor and outgroup (identification + fusion) as another factor. However, I’ll replicate the analysis as before - two constructs for ingroup fusion / identification and a single “bonds” construct for outgroup fusion+identification. Here is the reliability and inter-correlations of the three sub-scales:

Reliability
Scale 1: Ingroup fusion

Display code
igfusion <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03)

names(igfusion) <- sub('ds\\$', '', names(igfusion))

igfusion <- na.omit(igfusion)
mtx2 <- cor(igfusion[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(igfusion)
alpha02

Reliability analysis   
Call: psych::alpha(x = igfusion)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.88      0.88    0.84      0.72 7.7 0.0047  4.7 1.9     0.69

    95% confidence boundaries 
         lower alpha upper
Feldt     0.88  0.88  0.89
Duhachek  0.88  0.88  0.89

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGF01      0.81      0.81    0.68      0.68 4.2   0.0090    NA  0.68
IGF02      0.82      0.82    0.69      0.69 4.5   0.0085    NA  0.69
IGF03      0.88      0.88    0.78      0.78 7.2   0.0058    NA  0.78

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
IGF01 1784  0.92  0.92  0.86   0.81  4.9 2.1
IGF02 1784  0.91  0.91  0.85   0.80  4.8 2.2
IGF03 1784  0.87  0.88  0.77   0.73  4.5 2.1

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
IGF01 0.13 0.07 0.07 0.09 0.15 0.17 0.32    0
IGF02 0.14 0.06 0.07 0.10 0.13 0.16 0.33    0
IGF03 0.13 0.09 0.08 0.13 0.17 0.16 0.23    0

Reliability
Scale 2: Ingroup identification

Display code
igidentification <- cbind.data.frame(ds$IGI01, ds$IGI02, ds$IGI03)

names(igidentification) <- sub('ds\\$', '', names(igidentification))

igidentification <- na.omit(igidentification)
mtx3 <- cor(igidentification[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha03 <- psych::alpha(igidentification)
alpha03

Reliability analysis   
Call: psych::alpha(x = igidentification)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean sd median_r
      0.93      0.93     0.9      0.81  13 0.0029  4.9  2     0.82

    95% confidence boundaries 
         lower alpha upper
Feldt     0.92  0.93  0.93
Duhachek  0.92  0.93  0.93

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGI01      0.90      0.90    0.82      0.82 9.2   0.0047    NA  0.82
IGI02      0.90      0.90    0.82      0.82 9.4   0.0046    NA  0.82
IGI03      0.88      0.88    0.79      0.79 7.5   0.0056    NA  0.79

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
IGI01 1784  0.93  0.93  0.88   0.85  4.9 2.2
IGI02 1784  0.93  0.93  0.88   0.84  4.8 2.1
IGI03 1784  0.94  0.94  0.90   0.87  4.9 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
IGI01 0.15 0.06 0.06 0.09 0.14 0.16 0.35    0
IGI02 0.13 0.07 0.05 0.10 0.16 0.18 0.30    0
IGI03 0.14 0.07 0.06 0.08 0.12 0.18 0.34    0

Reliability
Scale 3a: Outgroup fusion/identification

Display code
ogbonds <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03,
                            ds$OGI01, ds$OGI02, ds$OGI03)

names(ogbonds) <- sub('ds\\$', '', names(ogbonds))

ogbonds <- na.omit(ogbonds)
mtx3 <- cor(ogbonds[, c(1:6)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha04 <- psych::alpha(ogbonds)
alpha04

Reliability analysis   
Call: psych::alpha(x = ogbonds)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.94      0.94    0.93      0.73  16 0.0026  3.3 1.9     0.74

    95% confidence boundaries 
         lower alpha upper
Feldt     0.94  0.94  0.95
Duhachek  0.94  0.94  0.95

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se   var.r med.r
OGF01      0.93      0.93    0.91      0.72  13   0.0032 0.00158  0.73
OGF02      0.94      0.94    0.92      0.75  15   0.0028 0.00019  0.75
OGF03      0.93      0.93    0.91      0.72  13   0.0032 0.00148  0.74
OGI01      0.93      0.93    0.91      0.72  13   0.0032 0.00158  0.74
OGI02      0.93      0.93    0.91      0.72  13   0.0033 0.00098  0.73
OGI03      0.93      0.93    0.91      0.72  13   0.0032 0.00144  0.75

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
OGF01 1264  0.89  0.89  0.86   0.83  3.3 2.2
OGF02 1264  0.84  0.83  0.78   0.76  3.4 2.2
OGF03 1264  0.89  0.89  0.86   0.84  3.1 2.2
OGI01 1264  0.88  0.88  0.85   0.83  3.2 2.1
OGI02 1264  0.89  0.89  0.87   0.84  3.3 2.1
OGI03 1264  0.88  0.88  0.86   0.83  3.4 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGF01 0.32 0.12 0.11 0.13 0.11 0.07 0.14    0
OGF02 0.32 0.11 0.11 0.12 0.11 0.09 0.14    0
OGF03 0.38 0.12 0.09 0.12 0.09 0.09 0.11    0
OGI01 0.35 0.13 0.10 0.13 0.11 0.07 0.12    0
OGI02 0.30 0.13 0.13 0.13 0.11 0.07 0.12    0
OGI03 0.31 0.12 0.10 0.13 0.12 0.09 0.12    0

Reliability
Scale 3b: Outgroup fusion

Display code
ogfusion <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03)

names(ogfusion) <- sub('ds\\$', '', names(ogfusion))

ogfusion <- na.omit(ogfusion)
mtx3 <- cor(ogfusion[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha04 <- psych::alpha(ogfusion)
alpha04

Reliability analysis   
Call: psych::alpha(x = ogfusion)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.88      0.88    0.83       0.7 7.1 0.006  3.3 1.9     0.69

    95% confidence boundaries 
         lower alpha upper
Feldt     0.86  0.88  0.89
Duhachek  0.87  0.88  0.89

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
OGF01      0.81      0.81    0.69      0.69 4.4   0.0104    NA  0.69
OGF02      0.85      0.85    0.74      0.74 5.8   0.0082    NA  0.74
OGF03      0.81      0.81    0.68      0.68 4.3   0.0106    NA  0.68

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
OGF01 1277  0.90  0.90  0.83   0.78  3.4 2.2
OGF02 1277  0.88  0.88  0.78   0.73  3.4 2.2
OGF03 1277  0.90  0.90  0.84   0.78  3.2 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGF01 0.32 0.12 0.11 0.13 0.11 0.07 0.14    0
OGF02 0.32 0.11 0.11 0.12 0.11 0.09 0.14    0
OGF03 0.38 0.12 0.09 0.12 0.09 0.09 0.11    0

Reliability
Scale 3c: Outgroup identification

Display code
ogidentification <- cbind.data.frame(ds$OGI01, ds$OGI02, ds$OGI03)

names(ogidentification) <- sub('ds\\$', '', names(ogidentification))

ogidentification <- na.omit(ogidentification)
mtx3 <- cor(ogidentification[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha04 <- psych::alpha(ogidentification)
alpha04

Reliability analysis   
Call: psych::alpha(x = ogidentification)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
       0.9       0.9    0.86      0.75   9 0.0049  3.3 1.9     0.75

    95% confidence boundaries 
         lower alpha upper
Feldt     0.89   0.9  0.91
Duhachek  0.89   0.9  0.91

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
OGI01      0.87      0.87    0.77      0.77 6.8   0.0072    NA  0.77
OGI02      0.84      0.84    0.73      0.73 5.3   0.0088    NA  0.73
OGI03      0.86      0.86    0.75      0.75 6.0   0.0080    NA  0.75

 Item statistics 
         n raw.r std.r r.cor r.drop mean  sd
OGI01 1274  0.90  0.90  0.83   0.78  3.2 2.1
OGI02 1274  0.92  0.92  0.87   0.82  3.3 2.1
OGI03 1274  0.91  0.91  0.85   0.80  3.4 2.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGI01 0.35 0.13 0.10 0.13 0.11 0.07 0.12    0
OGI02 0.31 0.13 0.13 0.13 0.11 0.08 0.12    0
OGI03 0.32 0.12 0.10 0.13 0.12 0.09 0.12    0

Section 4. Factor Analysis: BCL/BBL endorsement/experience

Display code
## BCL and BBL items:

# BCL_01:
# Seek out opportunities to bridge social divisions with their opponents, enemies, opposition groups, or other outgroups.   
# Variables: endorse_style_1    and exp_style_1

# BCL_02:
# Demonstrate willingness to compromise with their opponents, enemies, opposition groups, or other outgroups. 
# Variables: endorse_style_2    and exp_style_2

# BCL_03:
# Try to understand and empathize with their opponents, enemies, opposition groups, or other outgroups.  
# Variables: endorse_style_3    and exp_style_3

# BBL_01:
# Represent the interests of the communities and groups that they belong to even at the cost of other groups.
# Variables: endorse_style_4    and exp_style_4

# BBL_02:

## OLD QUESTION (individual datasets):
# Focus on building stronger connections within the communities and groups they belong to rather than building stronger relationships with other groups across boundaries.

## NEW QUESTION (this dataset):
# Seek out opportunities to build stronger connections with the communities and groups they belong to.

## Are the above same?? I'm assuming they are the same item. 

# Variables: endorse_style_5    and exp_style_5

# BBL_03:
# Try to gain benefits for the communities and groups they belong to even at the expense of other groups.
# Variables: endorse_style_6    and exp_style_6

ds$ENDBCL01 <- as.numeric(ds$endorse_style_1)
ds$ENDBCL02 <- as.numeric(ds$endorse_style_2)
ds$ENDBCL03 <- as.numeric(ds$endorse_style_3)
ds$ENDBBL01 <- as.numeric(ds$endorse_style_4)
ds$ENDBBL02 <- as.numeric(ds$endorse_style_5)
ds$ENDBBL03 <- as.numeric(ds$endorse_style_6)

ds$EXPBCL01 <- as.numeric(ds$exp_style_1)
ds$EXPBCL02 <- as.numeric(ds$exp_style_2)
ds$EXPBCL03 <- as.numeric(ds$exp_style_3)
ds$EXPBBL01 <- as.numeric(ds$exp_style_4)
ds$EXPBBL02 <- as.numeric(ds$exp_style_5)
ds$EXPBBL03 <- as.numeric(ds$exp_style_6)

leadership <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03, 
                               ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03,
                               ds$EXPBCL01, ds$EXPBCL02, ds$EXPBCL03,
                               ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(leadership) <- sub('ds\\$', '', names(leadership))

leadership <- na.omit(leadership)
mtx1 <- cor(leadership[, c(1:12)])

Shorthand for item names:
END = Endorse, EXP = Experience
BCL = BCL, BBL = BBL
So for example:
ENDBCL01 = “Endorsement of BCL, item 1”
EXPBBL02 = “Experience of BBL, item 2”, and so on

Display code
corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the twelve different items (six for endorsement of BCL/BBL, and six for experience of BCL/BBL).

KMO and Bartlett’s test:

Display code
## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(leadership))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(leadership))
Overall MSA =  0.94
MSA for each item = 
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.94     0.93     0.95     0.94     0.93     0.94     0.93     0.91 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.93     0.94     0.95     0.94 
Display code
## Bartlett's test of sphericity
cortest.bartlett(leadership)
$chisq
[1] 12764

$p.value
[1] 0

$df
[1] 66

Parallel test

Display code
# # parallel <- fa.parallel(leadership)

Three factor model

Display code
fit03 <- factanal(leadership, 3, rotation="promax")
fit03

Call:
factanal(x = leadership, factors = 3, rotation = "promax")

Uniquenesses:
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.41     0.41     0.34     0.23     0.20     0.60     0.45     0.45 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.43     0.23     0.32     0.59 

Loadings:
         Factor1 Factor2 Factor3
ENDBCL01  0.629   0.126         
ENDBCL02  0.616   0.381  -0.272 
ENDBCL03  0.657   0.139         
ENDBBL01  0.776  -0.204   0.338 
ENDBBL02  0.769  -0.168   0.339 
ENDBBL03  0.536                 
EXPBCL01          0.524   0.272 
EXPBCL02          0.821         
EXPBCL03          0.625   0.265 
EXPBBL01  0.118   0.152   0.685 
EXPBBL02  0.151   0.234   0.542 
EXPBBL03  0.166   0.259   0.305 

               Factor1 Factor2 Factor3
SS loadings       2.77    1.74    1.32
Proportion Var    0.23    0.14    0.11
Cumulative Var    0.23    0.38    0.48

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1   1.000  -0.635  -0.627
Factor2  -0.635   1.000   0.720
Factor3  -0.627   0.720   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 450 on 33 degrees of freedom.
The p-value is 0.000000000000000000000000000000000000000000000000000000000000000000000000012 

Four factor model

Display code
fit04 <- factanal(leadership, 4, rotation="promax")
fit04

Call:
factanal(x = leadership, factors = 4, rotation = "promax")

Uniquenesses:
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.41     0.41     0.33     0.23     0.20     0.46     0.45     0.46 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.43     0.23     0.32     0.31 

Loadings:
         Factor1 Factor2 Factor3 Factor4
ENDBCL01  0.631   0.140                 
ENDBCL02  0.604   0.339  -0.222         
ENDBCL03  0.687   0.170   0.102         
ENDBBL01  0.772  -0.161   0.333         
ENDBBL02  0.777  -0.124   0.338         
ENDBBL03  0.382                   0.510 
EXPBCL01          0.548   0.238         
EXPBCL02          0.776                 
EXPBCL03          0.641   0.219         
EXPBBL01  0.134   0.203   0.612         
EXPBBL02  0.159   0.256   0.466         
EXPBBL03 -0.134           0.108   0.801 

               Factor1 Factor2 Factor3 Factor4
SS loadings       2.65    1.64   1.006   0.925
Proportion Var    0.22    0.14   0.084   0.077
Cumulative Var    0.22    0.36   0.441   0.518

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1   1.000  -0.545  -0.524  -0.520
Factor2  -0.545   1.000   0.690   0.616
Factor3  -0.524   0.690   1.000   0.684
Factor4  -0.520   0.616   0.684   1.000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 263 on 24 degrees of freedom.
The p-value is 0.0000000000000000000000000000000000000000053 

Again, FA structure does not really support a four factor Endorse/Experience BCL/BBL structure. I will still split the sub-scales into those four constructs.

Display code
end_bcl <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03)
end_bbl <- cbind.data.frame(ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03)
exp_bcl <- cbind.data.frame(ds$EXPBCL01, ds$EXPBCL02, ds$EXPBCL03)
exp_bbl <- cbind.data.frame(ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(end_bcl) <- sub('ds\\$', '', names(end_bcl))
names(end_bbl) <- sub('ds\\$', '', names(end_bbl))
names(exp_bcl) <- sub('ds\\$', '', names(exp_bcl))
names(exp_bbl) <- sub('ds\\$', '', names(exp_bbl))

end_bcl <- na.omit(end_bcl)
end_bbl <- na.omit(end_bbl)
exp_bcl <- na.omit(exp_bcl)
exp_bbl <- na.omit(exp_bbl)

Reliability
Scale 1: Endorsement of BCL

Display code
mtx1 <- cor(end_bcl[, c(1:3)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
alph01 <- psych::alpha(end_bcl)
alph01

Reliability analysis   
Call: psych::alpha(x = end_bcl)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.82      0.82    0.75       0.6 4.4 0.0076  4.7 1.8     0.61

    95% confidence boundaries 
         lower alpha upper
Feldt      0.8  0.82  0.83
Duhachek   0.8  0.82  0.83

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBCL01      0.76      0.76    0.61      0.61 3.1    0.012    NA  0.61
ENDBCL02      0.77      0.77    0.63      0.63 3.4    0.011    NA  0.63
ENDBCL03      0.71      0.71    0.55      0.55 2.5    0.014    NA  0.55

 Item statistics 
            n raw.r std.r r.cor r.drop mean  sd
ENDBCL01 1758  0.86  0.85  0.73   0.66  4.8 2.1
ENDBCL02 1758  0.84  0.84  0.71   0.64  4.5 2.0
ENDBCL03 1758  0.87  0.87  0.78   0.70  4.8 2.0

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
ENDBCL01 0.13 0.08 0.08 0.10 0.14 0.14 0.33    0
ENDBCL02 0.12 0.09 0.09 0.15 0.18 0.16 0.22    0
ENDBCL03 0.11 0.07 0.09 0.12 0.16 0.17 0.29    0

Reliability
Scale 2: Endorsement of BBL

Display code
mtx2 <- cor(end_bbl[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
alph02 <- psych::alpha(end_bbl)
alph02

Reliability analysis   
Call: psych::alpha(x = end_bbl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.83      0.83     0.8      0.62   5 0.007  4.9 1.9     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.82  0.83  0.85
Duhachek  0.82  0.83  0.85

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBBL01      0.71      0.71    0.55      0.55 2.4   0.0139    NA  0.55
ENDBBL02      0.70      0.70    0.54      0.54 2.3   0.0144    NA  0.54
ENDBBL03      0.88      0.88    0.79      0.79 7.6   0.0056    NA  0.79

 Item statistics 
            n raw.r std.r r.cor r.drop mean  sd
ENDBBL01 1763   0.9   0.9  0.85   0.76  5.0 2.2
ENDBBL02 1763   0.9   0.9  0.86   0.77  5.1 2.2
ENDBBL03 1763   0.8   0.8  0.61   0.57  4.5 2.1

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
ENDBBL01 0.13 0.06 0.07 0.08 0.12 0.15 0.39    0
ENDBBL02 0.13 0.06 0.07 0.07 0.11 0.14 0.42    0
ENDBBL03 0.15 0.09 0.09 0.14 0.15 0.14 0.25    0

Reliability
Scale 3: Experience of BCL

Display code
mtx3 <- cor(exp_bcl[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
alph03 <- psych::alpha(exp_bcl)
alph03

Reliability analysis   
Call: psych::alpha(x = exp_bcl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.78      0.78    0.71      0.54 3.6 0.009  4.3 1.6     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.76  0.78   0.8
Duhachek  0.76  0.78   0.8

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBCL01      0.71      0.71    0.55      0.55 2.4    0.014    NA  0.55
EXPBCL02      0.72      0.72    0.57      0.57 2.6    0.013    NA  0.57
EXPBCL03      0.68      0.68    0.52      0.52 2.1    0.015    NA  0.52

 Item statistics 
            n raw.r std.r r.cor r.drop mean  sd
EXPBCL01 1758  0.84  0.83  0.70   0.62  4.4 2.0
EXPBCL02 1758  0.82  0.82  0.68   0.60  4.2 1.9
EXPBCL03 1758  0.84  0.85  0.73   0.64  4.5 1.9

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
EXPBCL01 0.11 0.10 0.12 0.15 0.18 0.15 0.18    0
EXPBCL02 0.12 0.11 0.13 0.16 0.19 0.14 0.15    0
EXPBCL03 0.10 0.10 0.11 0.15 0.20 0.17 0.18    0

Reliability
Scale 4: Experience of BBL

Display code
mtx4 <- cor(exp_bbl[, c(1:3)])

corrplot(mtx4, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
alph04 <- psych::alpha(exp_bbl)
alph04

Reliability analysis   
Call: psych::alpha(x = exp_bbl)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.82      0.82    0.77       0.6 4.6 0.0074  4.7 1.7     0.55

    95% confidence boundaries 
         lower alpha upper
Feldt     0.81  0.82  0.84
Duhachek  0.81  0.82  0.84

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBBL01      0.70      0.70    0.54      0.54 2.4   0.0142    NA  0.54
EXPBBL02      0.71      0.71    0.55      0.55 2.5   0.0138    NA  0.55
EXPBBL03      0.84      0.84    0.72      0.72 5.1   0.0079    NA  0.72

 Item statistics 
            n raw.r std.r r.cor r.drop mean sd
EXPBBL01 1750  0.88  0.88  0.81   0.73  4.7  2
EXPBBL02 1750  0.88  0.88  0.80   0.72  4.8  2
EXPBBL03 1750  0.81  0.81  0.64   0.59  4.4  2

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
EXPBBL01 0.11 0.08 0.08 0.11 0.18 0.17 0.26    0
EXPBBL02 0.11 0.07 0.09 0.11 0.14 0.19 0.29    0
EXPBBL03 0.11 0.09 0.13 0.14 0.18 0.16 0.18    0

The four subscales are all highly reliable.

Section 5. Preliminary regression analysis

Research question:
Do group fusion / identification predict BCL vs BBL?

Regression models

Display code
## Proper (usable) variables in the model:

ds$Age <- as.numeric(ds$age)

ds$Female <- ifelse(ds$gender == "Female", 1, 0)
ds$Married <- ifelse(ds$married == "Married", 1, 0)
ds$Wealth_level <- ds$wealth_level

ds$Endorse_BCL <- as.numeric((ds$ENDBCL01+ds$ENDBCL02+ds$ENDBCL03)/3)
ds$Endorse_BBL <- as.numeric((ds$ENDBBL01+ds$ENDBBL02+ds$ENDBBL03)/3)
ds$Experience_BCL <- as.numeric((ds$EXPBCL01+ds$EXPBCL02+ds$EXPBCL03)/3)
ds$Experience_BBL <- as.numeric((ds$EXPBBL01+ds$EXPBBL02+ds$EXPBBL03)/3)

ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03)/3)
ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03)/3)
ds$OG_Bonds <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03+
                             ds$OGI01+ds$OGI02+ds$OGI03)/6)


summary(ds$Age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     18      23      26      29      32      82       2 
Display code
## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+Wealth_level, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
Endorse_BCLExperience_BCLEndorse_BBLExperience_BBL
(1)(2)(3)(4)
IG_Fusion0.0730.140**0.0550.140**
(0.049)(0.049)(0.047)(0.046)
IG_Identification0.500***0.340***0.620***0.470***
(0.047)(0.047)(0.045)(0.044)
OG_Bonds-0.0230.076***-0.041*0.023
(0.021)(0.020)(0.020)(0.019)
Age-0.002-0.006-0.007-0.002
(0.005)(0.005)(0.005)(0.005)
Female0.0850.0190.049-0.007
(0.079)(0.079)(0.076)(0.074)
Married-0.0240.0540.0030.042
(0.099)(0.098)(0.094)(0.092)
Wealth_level0.0020.0030.0010.001
(0.002)(0.002)(0.002)(0.002)
Constant1.900***1.800***1.900***1.600***
(0.210)(0.210)(0.200)(0.200)
Observations1,1981,1941,1981,192
R20.4300.3400.5400.500
Adjusted R20.4300.3400.5400.490
Residual Std. Error1.300 (df = 1190)1.300 (df = 1186)1.300 (df = 1190)1.200 (df = 1184)
F Statistic130.000*** (df = 7; 1190)89.000*** (df = 7; 1186)201.000*** (df = 7; 1190)167.000*** (df = 7; 1184)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 6. Ingroup/outgroup empathy/hostility

Empathic concern

Display code
## Empathic concern

ds$empathic_concern_01 <- as.numeric(ds$empathy_1)

ds$empathic_concern_02a <- as.numeric(ds$empathy_2)
ds$empathic_concern_02 <- as.numeric(8 - ds$empathic_concern_02a)

ds$empathic_concern_03 <- as.numeric(ds$empathy_3)

ds$empathic_concern_04a <- as.numeric(ds$empathy_4)
ds$empathic_concern_04 <- as.numeric(8 - ds$empathic_concern_04a)

ds$empathic_concern_05a <- as.numeric(ds$empathy_5)
ds$empathic_concern_05 <- as.numeric(8 - ds$empathic_concern_05a)

ds$empathic_concern_06 <- as.numeric(ds$empathy_6)

ds$empathic_concern_07 <- as.numeric(ds$empathy_7)

ds$empathic_concern <- (ds$empathic_concern_01+ds$empathic_concern_02+
                        ds$empathic_concern_03+ds$empathic_concern_04+
                        ds$empathic_concern_05+ds$empathic_concern_06+
                        ds$empathic_concern_07)/7

summary(ds$empathic_concern)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      2       4       4       4       4       6     148 
Display code
ds %>% drop_na(empathic_concern)%>%
ggplot(aes(x = empathic_concern))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Empathic concern score", 
       y = "Frequency", 
       title = "Empathic concern (full sample)")+
  theme_bw()

Display code
ds %>% 
   drop_na(empathic_concern)%>%
ggplot(aes(x = empathic_concern))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "Empathic concern score", 
       y = "Frequency", 
       title = "Empathic concern by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Perspective taking

Display code
ds$perspective_taking_01a <- as.numeric(ds$empathy_8)
ds$perspective_taking_01 <- (8 - ds$perspective_taking_01a)

ds$perspective_taking_02 <- as.numeric(ds$empathy_9)

ds$perspective_taking_03 <- as.numeric(ds$empathy_10)

ds$perspective_taking_04a <- as.numeric(ds$empathy_11)
ds$perspective_taking_04 <- (8 - ds$perspective_taking_04a)

ds$perspective_taking_05 <- as.numeric(ds$empathy_12)

ds$perspective_taking_06 <- as.numeric(ds$empathy_13)

ds$perspective_taking_07 <- as.numeric(ds$empathy_14)

ds$perspective_taking <- (ds$perspective_taking_01+ds$perspective_taking_02+
                          ds$perspective_taking_03+ds$perspective_taking_04+
                          ds$perspective_taking_05+ds$perspective_taking_06+
                          ds$perspective_taking_07)/7

summary(ds$perspective_taking)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      2       4       4       4       5       6     635 
Display code
ds %>% drop_na(perspective_taking)%>%
ggplot(aes(x = perspective_taking))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Perspective taking score", 
       y = "Frequency", 
       title = "Perspective taking (full sample)")+
  theme_bw()

Display code
ds %>% 
   drop_na(perspective_taking)%>%
ggplot(aes(x = perspective_taking))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "Perspective taking score", 
       y = "Frequency", 
       title = "Perspective taking by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Outgroup cooperation; hostility; perceived discrimination; willingness to fight outgroup

Not available for this dataset

Section 7. Correlation between ingroup/outgroup empathy/hostility items

Display code
ds3 <- cbind.data.frame(ds$empathic_concern, ds$perspective_taking)
ds3 <- na.omit(ds3)

names(ds3) <- sub('ds\\$', '', names(ds3))

mtx <- cor(ds3[, c(1:2)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 8. Regression with new variables

This set of regression models have additional predictor variables compared to the last set. It adds empathic concern and perspective taking as additional predictors.

Display code
## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+Age+Female+Married+Wealth_level, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
Endorse_BCLExperience_BCLEndorse_BBLExperience_BBL
(1)(2)(3)(4)
IG_Fusion0.0410.140**0.0170.110*
(0.049)(0.050)(0.047)(0.046)
IG_Identification0.440***0.310***0.580***0.450***
(0.047)(0.048)(0.045)(0.045)
OG_Bonds-0.0230.078***-0.043*0.027
(0.020)(0.021)(0.020)(0.019)
empathic_concern0.150*0.210**0.150*0.069
(0.074)(0.075)(0.071)(0.070)
perspective_taking0.390***0.1100.330***0.210***
(0.059)(0.060)(0.057)(0.056)
Age-0.002-0.005-0.007-0.003
(0.005)(0.005)(0.005)(0.005)
Female0.0990.0240.0740.010
(0.079)(0.080)(0.076)(0.074)
Married0.0060.0670.0390.060
(0.098)(0.099)(0.094)(0.092)
Wealth_level0.0020.0030.0010.001
(0.002)(0.002)(0.002)(0.002)
Constant0.1400.5900.2300.630
(0.380)(0.380)(0.360)(0.360)
Observations1,1661,1611,1671,159
R20.4600.3600.5600.500
Adjusted R20.4600.3500.5600.500
Residual Std. Error1.300 (df = 1156)1.300 (df = 1151)1.200 (df = 1157)1.200 (df = 1149)
F Statistic110.000*** (df = 9; 1156)71.000*** (df = 9; 1151)163.000*** (df = 9; 1157)130.000*** (df = 9; 1149)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 9. Alternative regression models with different outcomes

This set of regression models have different outcome variables which are: outgroup cooperation, outgroup hostility, and willingness to fight outgroup. None of these DVs are available for this dataset.

Section 10. Religious freedom measures

Social perception of religious freedom

Display code
ds$religious_freedom_per_01 <- as.numeric(ds$FORB1)

ds$religious_freedom_per_02a <- as.numeric(ds$FORB2)
ds$religious_freedom_per_02 <- (8 - ds$religious_freedom_per_02a)

ds$religious_freedom_per_03 <- as.numeric(ds$FORB3)
ds$religious_freedom_per_04 <- as.numeric(ds$FORB4)
ds$religious_freedom_per_05 <- as.numeric(ds$FORB5)
ds$religious_freedom_per_06 <- as.numeric(ds$FORB6)
ds$religious_freedom_per_07 <- as.numeric(ds$FORB7)
ds$religious_freedom_per_08 <- as.numeric(ds$FORB8)
ds$religious_freedom_per_09 <- as.numeric(ds$FORB9)

ds$sprf <- (ds$religious_freedom_per_01+ds$religious_freedom_per_02+
            ds$religious_freedom_per_03+ds$religious_freedom_per_04+
            ds$religious_freedom_per_05+ds$religious_freedom_per_06+
            ds$religious_freedom_per_07+ds$religious_freedom_per_08+
            ds$religious_freedom_per_09)/9

summary(ds$sprf)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       7       6       8      10     144 
Display code
ds %>% drop_na(sprf)%>%
ggplot(aes(x = sprf))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 6.00", 
                 xintercept = 6.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "SPRF score", 
       y = "Frequency", 
       title = "Social perception of religious freedom (full sample)")+
  theme_bw()

Display code
ds %>% 
   drop_na(sprf)%>%
ggplot(aes(x = sprf))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "SPRF score", 
       y = "Frequency", 
       title = "Social perception of religious freedom by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Experience of religious freedom

Display code
ds$religious_freedom_exp_01a <- as.numeric(ds$REL.PERS_1)
ds$religious_freedom_exp_01 <- (8 - ds$religious_freedom_exp_01a)

ds$religious_freedom_exp_02a <- as.numeric(ds$REL.PERS_2)
ds$religious_freedom_exp_02 <- (8 - ds$religious_freedom_exp_02a)

ds$religious_freedom_exp_03a <- as.numeric(ds$REL.PERS_3)
ds$religious_freedom_exp_03 <- (8 - ds$religious_freedom_exp_03a)

ds$religious_freedom_exp_04a <- as.numeric(ds$REL.PERS_4)
ds$religious_freedom_exp_04 <- (8 - ds$religious_freedom_exp_04a)

ds$religious_freedom_exp_05a <- as.numeric(ds$REL.PERS_5)
ds$religious_freedom_exp_05 <- (8 - ds$religious_freedom_exp_05a)

ds$religious_freedom_exp_06a <- as.numeric(ds$REL.PERS_6)
ds$religious_freedom_exp_06 <- (8 - ds$religious_freedom_exp_06a)

ds$religious_freedom_exp_07a <- as.numeric(ds$REL.PERS7)
ds$religious_freedom_exp_07 <- (8 - ds$religious_freedom_exp_07a)

ds$religious_freedom_exp_08a <- as.numeric(ds$REL.PERS8)
ds$religious_freedom_exp_08 <- (8 - ds$religious_freedom_exp_08a)

ds$religious_freedom_exp_09a <- as.numeric(ds$REL.PERS9)
ds$religious_freedom_exp_09 <- (9 - ds$religious_freedom_exp_09a)

ds$religious_freedom_exp_10a <- as.numeric(ds$REL.PERS10)
ds$religious_freedom_exp_10 <- (10 - ds$religious_freedom_exp_10a)

ds$religious_freedom_exp_11a <- as.numeric(ds$REL.PERS11)
ds$religious_freedom_exp_11 <- (11 - ds$religious_freedom_exp_11a)

ds$exp_religious_freedom <- (ds$religious_freedom_exp_01+ds$religious_freedom_exp_02+
                             ds$religious_freedom_exp_03+ds$religious_freedom_exp_04+
                             ds$religious_freedom_exp_05+ds$religious_freedom_exp_06+
                             ds$religious_freedom_exp_07+ds$religious_freedom_exp_08+
                             ds$religious_freedom_exp_09+ds$religious_freedom_exp_10+
                             ds$religious_freedom_exp_11)/11

summary(ds$exp_religious_freedom)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      4       5       6       6       7       8     635 
Display code
ds %>% drop_na(exp_religious_freedom)%>%
ggplot(aes(x = exp_religious_freedom))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 6.00", 
                 xintercept = 6.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Experience of religious freedom score", 
       y = "Frequency", 
       title = "Experience of religious freedom (full sample)")+
  theme_bw()

Display code
ds %>% 
   drop_na(exp_religious_freedom)%>%
ggplot(aes(x = exp_religious_freedom))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  labs(x = "Experience of religious freedom score", 
       y = "Frequency", 
       title = "Experience of religious freedom by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Experience of religious freedom based on religious affiliation

Display code
ds$Religion <- ds$religion2

ds2 <- ds %>% 
drop_na(Religion, exp_religious_freedom)%>%
 group_by(Religion) %>% 
 summarise(Exp_religious_freedom=mean(exp_religious_freedom),.groups = 'drop') %>%
  as.data.frame()

ds2
                  Religion Exp_religious_freedom
1                 Agnostic                   5.5
2                  Atheist                   4.7
3      Christian: Catholic                   5.9
4         Christian: Other                   6.0
5    Christian: Protestant                   6.1
6                    Hindu                   5.7
7                   Jewish                   6.3
8             Muslim: Shia                   5.7
9            Muslim: Sunni                   5.8
10                    None                   6.4
11                   Other                   5.7
12 Spiritual not Religious                   5.8
Display code
ds %>% drop_na(religion, exp_religious_freedom)%>%
ggplot(aes(y = exp_religious_freedom, 
           x = Religion))+
  geom_boxplot()+
  labs(x = "", 
       y = "Experience of religious freedom score", 
       title = "Experience of religious freedom (full sample)")+
  coord_flip()+
  theme_bw()

Display code
ds %>% drop_na(religion, exp_religious_freedom)%>%
ggplot(aes(y = exp_religious_freedom, 
           x = Religion))+
  geom_boxplot()+
  labs(x = "", 
       y = "Experience of religious freedom score", 
       title = "Experience of religious freedom by country and religion")+
  coord_flip()+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Section 11. Positive/Negative contact with outgroup

Not available for this dataset

Section 12. Inter-group marriage

Not available for this dataset

Section 13. Outgroup affect

Not available for this dataset

Section 14. Updated regression models

This set of regression models have additional predictors: frequency of positive and negative contact, and interaction between perspective taking and perceived history of discrimination. None of these variables were available for this dataset.

Section 15. Updated regression models (alternate DVs)

Same predictors as section 14 above, but with different outcomes. This set of regression models has an additional outcome variable: affect towards outgroup. None of these variables were available for this dataset.

Section 16. Imagistic Experience: Factor analysis

Display code
ds$event_negative_affect <- as.numeric(ds$NEG.AFF)
table(ds$event_negative_affect)

  1   2   3   4   5   6   7 
554 199 179 184 195 188 298 
Display code
ds$event_positive_affect_01 <- (8 - ds$event_negative_affect)
table(ds$event_positive_affect_01)

  1   2   3   4   5   6   7 
298 188 195 184 179 199 554 
Display code
ds$event_positive_affect_02 <- as.numeric(ds$POS.AFF)
table(ds$event_positive_affect_02)

  1   2   3   4   5   6   7 
326 150 134 142 209 271 573 
Display code
ds$event_positive_affect <- ds$event_positive_affect_02

ds$event_episodic_recall_01 <- as.numeric(ds$VIV.MEM)
table(ds$event_episodic_recall_01)

  1   2   3   4   5   6   7 
216 141 137 197 261 304 547 
Display code
ds$event_episodic_recall_02 <- as.numeric(ds$REL.MEM)
table(ds$event_episodic_recall_02)

  1   2   3   4   5   6   7 
253 133 134 188 264 266 560 
Display code
ds$event_shared_perception_01 <- as.numeric(ds$SHRD.EXP)
table(ds$event_shared_perception_01)

  1   2   3   4   5   6   7 
229 146 154 217 296 286 475 
Display code
ds$event_shared_perception_02 <- as.numeric(ds$SHRD.MEM)
table(ds$event_shared_perception_02)

  1   2   3   4   5   6   7 
250 146 159 215 308 315 411 
Display code
ds$event_event_reflection_01 <- as.numeric(ds$REFL1)
table(ds$event_event_reflection_01)

  1   2   3   4   5   6   7 
211 156 166 228 302 305 445 
Display code
ds$event_event_reflection_02 <- as.numeric(ds$REFL2)
table(ds$event_event_reflection_02)

  1   2   3   4   5   6   7 
204 152 191 289 338 284 343 
Display code
ds$event_transformative_indiv_01 <- as.numeric(ds$PERSIG)
table(ds$event_transformative_indiv_01)

  1   2   3   4   5   6   7 
233 129 130 160 235 279 637 
Display code
ds$event_transformative_indiv_02 <- as.numeric(ds$TRNSFM)
table(ds$event_transformative_indiv_02)

  1   2   3   4   5   6   7 
246 157 176 258 262 274 431 
Display code
ds$event_transformative_group_01 <- as.numeric(ds$IMPTGRP1)
table(ds$event_transformative_group_01)

  1   2   3   4   5   6   7 
261 128 115 173 266 290 577 
Display code
ds$event_transformative_group_02 <- as.numeric(ds$IMPTGRP2)
table(ds$event_transformative_group_02)

  1   2   3   4   5   6   7 
297 149 195 257 281 282 338 
Display code
imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)

imagistic <- na.omit(imagistic)

names(imagistic) <- sub('ds\\$event\\_', '', names(imagistic))

mtx <- cor(imagistic[, c(1:12)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Imagistic experience: Factor analysis

Display code
## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(imagistic))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(imagistic))
Overall MSA =  0.95
MSA for each item = 
        negative_affect         positive_affect      episodic_recall_01 
                   0.40                    0.91                    0.95 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.96                    0.96                    0.96 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.97                    0.96                    0.96 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.96                    0.95                    0.95 
Display code
## Bartlett's test of sphericity
cortest.bartlett(imagistic)
$chisq
[1] 12202

$p.value
[1] 0

$df
[1] 66
Display code
## Parallel test
# parallel <- fa.parallel(imagistic)

Scree plot suggests there needs to be two factors, but theoretically we want six factors. Here is how the three factors break down:

Display code
fit02 <- factanal(imagistic, 2, rotation="promax")
fit02

Call:
factanal(x = imagistic, factors = 2, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                   0.51                    0.36                    0.33 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.35                    0.46                    0.41 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.40                    0.42                    0.34 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.53                    0.30                    0.63 

Loadings:
                        Factor1 Factor2
negative_affect                  0.708 
positive_affect          0.600  -0.460 
episodic_recall_01       0.815         
episodic_recall_02       0.801         
shared_perception_01     0.740   0.124 
shared_perception_02     0.769         
event_reflection_01      0.782         
event_reflection_02      0.765         
transformative_indiv_01  0.800         
transformative_indiv_02  0.690         
transformative_group_01  0.813  -0.113 
transformative_group_02  0.614         

               Factor1 Factor2
SS loadings       6.16   0.765
Proportion Var    0.51   0.064
Cumulative Var    0.51   0.577

Factor Correlations:
        Factor1 Factor2
Factor1   1.000   0.117
Factor2   0.117   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 405 on 43 degrees of freedom.
The p-value is 0.0000000000000000000000000000000000000000000000000000000000022 

The factor structure above makes sense but is a little incoherent. Here is what happens if we “force” six factors:

Display code
fit06 <- factanal(imagistic, 6, rotation="promax")
fit06

Call:
factanal(x = imagistic, factors = 6, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                  0.610                   0.291                   0.304 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                  0.046                   0.393                   0.344 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                  0.402                   0.005                   0.295 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                  0.404                   0.286                   0.429 

Loadings:
                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
negative_affect                                  0.654                 
positive_affect          0.153   0.147          -0.585   0.176         
episodic_recall_01       0.571   0.312                  -0.127         
episodic_recall_02       0.245                                   0.794 
shared_perception_01     0.800                   0.118                 
shared_perception_02     0.774  -0.168                   0.125         
event_reflection_01      0.411   0.332                                 
event_reflection_02                      0.938                         
transformative_indiv_01  0.466   0.524                  -0.111         
transformative_indiv_02          0.588                   0.350         
transformative_group_01  0.607                  -0.163   0.147         
transformative_group_02  0.139                           0.650         

               Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
SS loadings        2.4   0.894   0.893   0.819   0.660   0.649
Proportion Var     0.2   0.075   0.074   0.068   0.055   0.054
Cumulative Var     0.2   0.278   0.352   0.421   0.476   0.530

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
Factor1   1.000   0.519   0.654   0.208  -0.553  -0.708
Factor2   0.519   1.000   0.678   0.294  -0.398  -0.671
Factor3   0.654   0.678   1.000   0.321  -0.677  -0.757
Factor4   0.208   0.294   0.321   1.000  -0.149  -0.346
Factor5  -0.553  -0.398  -0.677  -0.149   1.000   0.580
Factor6  -0.708  -0.671  -0.757  -0.346   0.580   1.000

Test of the hypothesis that 6 factors are sufficient.
The chi square statistic is 13 on 9 degrees of freedom.
The p-value is 0.16 

FA solution still seems incoherent.

Imagistic experience: Reliability

Display code
imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)


## Reliability:
alph01 <- psych::alpha(imagistic)
Some items ( ds$event_negative_affect ) were negatively correlated with the total scale and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option
Display code
alph01

Reliability analysis   
Call: psych::alpha(x = imagistic)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.91      0.91    0.92      0.46  10 0.003  4.5 1.5     0.53

    95% confidence boundaries 
         lower alpha upper
Feldt      0.9  0.91  0.91
Duhachek   0.9  0.91  0.91

 Reliability if an item is dropped:
                                 raw_alpha std.alpha G6(smc) average_r  S/N
ds$event_negative_affect              0.93      0.93    0.93      0.55 13.4
ds$event_positive_affect              0.90      0.91    0.91      0.46  9.6
ds$event_episodic_recall_01           0.89      0.90    0.91      0.44  8.6
ds$event_episodic_recall_02           0.89      0.90    0.91      0.44  8.7
ds$event_shared_perception_01         0.90      0.90    0.91      0.45  9.0
ds$event_shared_perception_02         0.90      0.90    0.91      0.44  8.8
ds$event_event_reflection_01          0.89      0.90    0.91      0.44  8.8
ds$event_event_reflection_02          0.90      0.90    0.91      0.45  8.8
ds$event_transformative_indiv_01      0.89      0.90    0.91      0.44  8.7
ds$event_transformative_indiv_02      0.90      0.90    0.91      0.45  9.1
ds$event_transformative_group_01      0.89      0.90    0.91      0.44  8.6
ds$event_transformative_group_02      0.90      0.90    0.92      0.46  9.5
                                 alpha se  var.r med.r
ds$event_negative_affect           0.0024 0.0058  0.56
ds$event_positive_affect           0.0032 0.0488  0.55
ds$event_episodic_recall_01        0.0035 0.0514  0.52
ds$event_episodic_recall_02        0.0035 0.0519  0.53
ds$event_shared_perception_01      0.0034 0.0559  0.53
ds$event_shared_perception_02      0.0034 0.0541  0.53
ds$event_event_reflection_01       0.0035 0.0544  0.52
ds$event_event_reflection_02       0.0034 0.0549  0.53
ds$event_transformative_indiv_01   0.0035 0.0514  0.52
ds$event_transformative_indiv_02   0.0033 0.0565  0.55
ds$event_transformative_group_01   0.0035 0.0507  0.52
ds$event_transformative_group_02   0.0032 0.0572  0.56

 Item statistics 
                                    n raw.r std.r r.cor r.drop mean  sd
ds$event_negative_affect         1797  0.12  0.11 0.015 -0.013  3.6 2.3
ds$event_positive_affect         1805  0.66  0.65 0.620  0.573  4.6 2.3
ds$event_episodic_recall_01      1803  0.81  0.81 0.803  0.767  4.8 2.1
ds$event_episodic_recall_02      1798  0.81  0.80 0.791  0.756  4.7 2.2
ds$event_shared_perception_01    1803  0.76  0.75 0.723  0.692  4.6 2.1
ds$event_shared_perception_02    1804  0.78  0.78 0.761  0.728  4.5 2.1
ds$event_event_reflection_01     1813  0.79  0.79 0.769  0.738  4.6 2.0
ds$event_event_reflection_02     1801  0.78  0.78 0.755  0.725  4.5 1.9
ds$event_transformative_indiv_01 1803  0.81  0.81 0.794  0.759  4.9 2.2
ds$event_transformative_indiv_02 1804  0.73  0.73 0.693  0.664  4.5 2.1
ds$event_transformative_group_01 1810  0.82  0.82 0.813  0.777  4.8 2.2
ds$event_transformative_group_02 1799  0.66  0.66 0.616  0.588  4.3 2.1

Non missing response frequency for each item
                                    1    2    3    4    5    6    7 miss
ds$event_negative_affect         0.31 0.11 0.10 0.10 0.11 0.10 0.17 0.05
ds$event_positive_affect         0.18 0.08 0.07 0.08 0.12 0.15 0.32 0.04
ds$event_episodic_recall_01      0.12 0.08 0.08 0.11 0.14 0.17 0.30 0.05
ds$event_episodic_recall_02      0.14 0.07 0.07 0.10 0.15 0.15 0.31 0.05
ds$event_shared_perception_01    0.13 0.08 0.09 0.12 0.16 0.16 0.26 0.05
ds$event_shared_perception_02    0.14 0.08 0.09 0.12 0.17 0.17 0.23 0.05
ds$event_event_reflection_01     0.12 0.09 0.09 0.13 0.17 0.17 0.25 0.04
ds$event_event_reflection_02     0.11 0.08 0.11 0.16 0.19 0.16 0.19 0.05
ds$event_transformative_indiv_01 0.13 0.07 0.07 0.09 0.13 0.15 0.35 0.05
ds$event_transformative_indiv_02 0.14 0.09 0.10 0.14 0.15 0.15 0.24 0.05
ds$event_transformative_group_01 0.14 0.07 0.06 0.10 0.15 0.16 0.32 0.04
ds$event_transformative_group_02 0.17 0.08 0.11 0.14 0.16 0.16 0.19 0.05

Imagistic experience: Visualization

If we conceptualize “imagistic experience” as a single measure that is the sum of the six scales, here is the visualization of the measure:

Display code
ds$imagistic <- (((ds$event_positive_affect)+
                 (ds$event_negative_affect)+ 
                ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)+
                ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)+
                ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)+
                ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)+
                ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)))

summary(ds$imagistic)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      7      26      34      31      39      49     157 
Display code
ds %>% drop_na(imagistic)%>%
ggplot(aes(x = imagistic))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 32.00", 
                 xintercept = 32.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Imagistic experience score", 
       y = "Frequency", 
       title = "Imagistic experience (full sample)")+
  theme_bw()

Display code
ds %>% 
  drop_na(imagistic)%>%
ggplot(aes(x = imagistic))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  labs(x = "Imagistic experience score", 
       y = "Frequency", 
       title = "Imagistic experience by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Section 17. Imagistic experience: sub-scales

Below are the individual Cronbach’s alpha for each of the seven subscales, followed by their individual visualizations, followed by the correlation between the seven sub-scales

Negative affect about event

Display code
## Positive affect:
negative_affect <- cbind.data.frame(ds$event_negative_affect)
negative_affect <- na.omit(negative_affect)

## Visualization:
ds$event_negative_affect <- (ds$event_negative_affect)
summary(ds$event_negative_affect)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       1       3       4       6       7      93 
Display code
ds %>% drop_na(event_negative_affect)%>%
ggplot(aes(x = event_negative_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Negative affect about event", 
       y = "Frequency", 
       title = "Negative affect about event (full sample)")+
  theme_bw()

Display code
ds %>% 
  drop_na(event_negative_affect)%>%
ggplot(aes(x = event_negative_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Negative affect about event", 
       y = "Frequency", 
       title = "Negative affect about event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Positive affect about event

Display code
## Positive affect:
positive_affect <- cbind.data.frame(ds$event_positive_affect)
positive_affect <- na.omit(positive_affect)

## Visualization:
ds$event_positive_affect <- (ds$event_positive_affect)
summary(ds$event_positive_affect)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       2       5       5       7       7      85 
Display code
ds %>% drop_na(event_positive_affect)%>%
ggplot(aes(x = event_positive_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Positive affect about event", 
       y = "Frequency", 
       title = "Positive affect about event: Gambia")+
  theme_bw()

Display code
ds %>% 
  drop_na(event_positive_affect)%>%
ggplot(aes(x = event_positive_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Positive affect about event", 
       y = "Frequency", 
       title = "Positive affect about event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Episodic recall of event

Display code
## Episodic recall:
episodic_recall <- cbind.data.frame(ds$event_episodic_recall_01, ds$event_episodic_recall_02)
episodic_recall <- na.omit(episodic_recall)

names(episodic_recall) <- sub('ds\\$event\\_', '', names(episodic_recall))

mtx <- cor(episodic_recall[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(episodic_recall)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean sd median_r
      0.83      0.83    0.71      0.71 4.8 0.0081  4.8  2     0.71
Display code
## Visualization:
ds$event_episodic_recall <- ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)
summary(ds$event_episodic_recall)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     111 
Display code
ds %>% drop_na(event_episodic_recall)%>%
ggplot(aes(x = event_episodic_recall))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Episodic recall of event", 
       y = "Frequency", 
       title = "Episodic recall of event (full sample)")+
  theme_bw()

Display code
ds %>% drop_na(event_episodic_recall)%>%
ggplot(aes(x = event_episodic_recall))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Episodic recall of event", 
       y = "Frequency", 
       title = "Episodic recall of event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Shared perception of event

Display code
## Shared perception:
shared_perception <- cbind.data.frame(ds$event_shared_perception_01, ds$event_shared_perception_02)
shared_perception <- na.omit(shared_perception)

names(shared_perception) <- sub('ds\\$event\\_', '', names(shared_perception))

mtx <- cor(shared_perception[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(shared_perception)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.77      0.77    0.62      0.62 3.3 0.011  4.6 1.9     0.62
Display code
## Visualization:
ds$event_shared_perception <- ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)
summary(ds$event_shared_perception)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     106 
Display code
ds %>% drop_na(event_shared_perception)%>%
ggplot(aes(x = event_shared_perception))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Shared perception of event", 
       y = "Frequency", 
       title = "Shared perception of event (full sample)")+
  theme_bw()

Display code
ds %>% drop_na(event_shared_perception)%>%
ggplot(aes(x = event_shared_perception))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Shared perception of event", 
       y = "Frequency", 
       title = "Shared perception of event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Reflection of event

Display code
## Reflection:
event_reflection <- cbind.data.frame(ds$event_event_reflection_01, ds$event_event_reflection_02)
event_reflection <- na.omit(event_reflection)

names(event_reflection) <- sub('ds\\$event\\_', '', names(event_reflection))

mtx <- cor(event_reflection[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(event_reflection)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.76      0.76    0.61      0.61 3.2 0.011  4.6 1.8     0.61
Display code
## Visualization:
ds$event_event_reflection <- ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)
summary(ds$event_event_reflection)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     103 
Display code
ds %>% drop_na(event_event_reflection)%>%
ggplot(aes(x = event_event_reflection))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Reflection of event", 
       y = "Frequency", 
       title = "Reflection of event (full sample)")+
  theme_bw()

Display code
ds %>% drop_na(event_event_reflection)%>%
ggplot(aes(x = event_event_reflection))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Reflection of event", 
       y = "Frequency", 
       title = "Reflection of event by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Transformative event for individual

Display code
## Reflection:
event_transformative_indiv <- cbind.data.frame(ds$event_transformative_indiv_01, ds$event_transformative_indiv_02)

event_transformative_indiv <- na.omit(event_transformative_indiv)

names(event_transformative_indiv) <- sub('ds\\$event\\_', '', names(event_transformative_indiv))

mtx <- cor(event_transformative_indiv[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
psych::alpha(event_transformative_indiv)

Reliability analysis   
Call: psych::alpha(x = event_transformative_indiv)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.73      0.73    0.57      0.57 2.7 0.013  4.7 1.9     0.57

    95% confidence boundaries 
         lower alpha upper
Feldt      0.7  0.73  0.75
Duhachek   0.7  0.73  0.75

 Reliability if an item is dropped:
                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
transformative_indiv_01      0.59      0.57    0.33      0.57 1.3       NA
transformative_indiv_02      0.55      0.57    0.33      0.57 1.3       NA
                        var.r med.r
transformative_indiv_01     0  0.57
transformative_indiv_02     0  0.57

 Item statistics 
                           n raw.r std.r r.cor r.drop mean  sd
transformative_indiv_01 1779  0.89  0.89  0.67   0.57  4.9 2.1
transformative_indiv_02 1779  0.88  0.89  0.67   0.57  4.5 2.1

Non missing response frequency for each item
                           1    2    3    4    5    6    7 miss
transformative_indiv_01 0.13 0.07 0.07 0.09 0.13 0.16 0.36    0
transformative_indiv_02 0.13 0.09 0.10 0.14 0.15 0.15 0.24    0
Display code
## Visualization:
ds$event_transformative_indiv <- ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)
summary(ds$event_transformative_indiv)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     111 
Display code
ds %>% drop_na(event_transformative_indiv)%>%
ggplot(aes(x = event_transformative_indiv))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for individual (full sample)")+
  theme_bw()

Display code
ds %>% drop_na(event_transformative_indiv)%>%
ggplot(aes(x = event_transformative_indiv))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for individual by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Transformative event for group

Display code
event_transformative_group <- cbind.data.frame(ds$event_transformative_group_01, ds$event_transformative_group_02)
event_transformative_group <- na.omit(event_transformative_group)

names(event_transformative_group) <- sub('ds\\$event\\_', '', names(event_transformative_group))

mtx <- cor(event_transformative_group[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(event_transformative_group)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.68      0.68    0.52      0.52 2.2 0.015  4.5 1.8     0.52
Display code
## Visualization:
ds$event_transformative_group <- ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)
summary(ds$event_transformative_group)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7     105 
Display code
ds %>% drop_na(event_transformative_group)%>%
ggplot(aes(x = event_transformative_group))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for group (full sample)")+
  theme_bw()

Display code
ds %>% drop_na(event_transformative_group)%>%
ggplot(aes(x = event_transformative_group))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 10)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for group by country")+
    facet_wrap(vars(Country), nrow = 2)+
  theme_bw()

Correlation between seven subscales

Display code
imagistic_subscales <- cbind.data.frame(ds$event_positive_affect, ds$event_negative_affect,
                                        ds$event_episodic_recall, 
                                        ds$event_shared_perception, ds$event_event_reflection, 
                                        ds$event_transformative_indiv, ds$event_transformative_group)


imagistic_subscales <- na.omit(imagistic_subscales)

names(imagistic_subscales) <- sub('ds\\$event\\_', '', names(imagistic_subscales))

mtx <- cor(imagistic_subscales[, c(1:6)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 18. Regression models: Relationship between imagistic experience and IG/OG fusion/identification

Display code
## Four regression models predicting IG/OG Fusion/Identification:

lm01 <- lm(IG_Fusion~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+Wealth_level,
           data = ds)

lm02 <- lm(IG_Identification~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+Wealth_level,
           data = ds)

lm03 <- lm(OG_Bonds~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+Wealth_level,
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
IG_FusionIG_IdentificationOG_Bonds
(1)(2)(3)
event_positive_affect0.096***0.065***0.130***
(0.017)(0.017)(0.032)
event_negative_affect0.004-0.041**0.220***
(0.014)(0.014)(0.026)
event_episodic_recall0.210***0.300***-0.250***
(0.025)(0.026)(0.049)
event_shared_perception0.200***0.240***-0.032
(0.025)(0.025)(0.046)
event_event_reflection0.180***0.140***0.060
(0.027)(0.028)(0.052)
event_transformative_indiv0.180***0.200***0.037
(0.026)(0.026)(0.049)
event_transformative_group0.072**0.0400.100*
(0.025)(0.025)(0.048)
Age-0.002-0.0050.005
(0.003)(0.003)(0.007)
Female-0.0250.040-0.330**
(0.058)(0.058)(0.110)
Married0.057-0.0670.140
(0.073)(0.073)(0.140)
Wealth_level-0.00030.00030.007**
(0.001)(0.001)(0.003)
Constant0.460**0.520***2.000***
(0.150)(0.150)(0.300)
Observations1,6971,6961,202
R20.6400.6700.099
Adjusted R20.6300.6700.091
Residual Std. Error1.100 (df = 1685)1.200 (df = 1684)1.800 (df = 1190)
F Statistic266.000*** (df = 11; 1685)312.000*** (df = 11; 1684)12.000*** (df = 11; 1190)
Note:*p<0.05; **p<0.01; ***p<0.001
Display code
## Save the latest version of data:
# fwrite(ds, file = "~/Desktop/oxford/data/BCL/BCL01.csv")