Data Analysis and Visualization
Pakistan Community Data

Author

Gagan Atreya

Published

July 28, 2023

Section 1. Demographics

Display code

rm(list=ls())

## Install "pacman" package if not installed
# (remove the # symbol from the line below):
# install.packages("pacman")

## Load R packages:
pacman::p_load(data.table, tidyverse, haven, labelled, vtable, 
               psych, scales, weights, clipr, forcats,
               stargazer, ggthemes, ggcharts, geomtextpath,
               corrplot, tm)

## Import dataset:
ds <- read_sav("~/Desktop/oxford/data/pakistan/Pakistan Data.sav")

options(digits = 2)

Variable: Age

Display code

ds$Age1 <- as.numeric(gsub("\\D", "", ds$Age))

ds$age <- as.numeric(ifelse(ds$Age1 == "6465", 64.5,
                     ifelse(ds$Age1 == "806", 80.5, 
                     ifelse(ds$Age1 < 18, NA, ds$Age1))))
summary(ds$age)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     18      25      31      34      40      90

Display code

ds %>% drop_na(age)%>%
ggplot(aes(x = age))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 34.00", 
                 xintercept = 34.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Age", 
       y = "Frequency", 
       title = "Age distribution: Pakistan")+
  theme_bw()

Variable: Gender

Display code

ds$gender <- ifelse(ds$Gender == 1, "Male",
             ifelse(ds$Gender == 2, "Female", NA))

lp02 <- ds %>% drop_na(gender) %>%
lollipop_chart(x = gender,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gender distribution: Pakistan")+
  theme_bw()

lp02

Display code

bp01 <- ds %>% drop_na(gender) %>% 
  ggplot(aes(y = age, 
             x = gender))+
geom_boxplot(fill = "grey")+
  labs(y = "Age",
       x = "",
       title = "Age distribution by gender: Pakistan")+
  coord_flip()+
  theme_bw()

bp01

Variable: Province

Display code

table(ds$Province)


  1   2   3   4   5   6 
275  25 110  65  10  20

Display code

ds$province <- haven::as_factor(ds$Province)
table(ds$province)


             Punjab         Baluchistan               Sindh Khyber_Pakhtunkhuwa 
                275                  25                 110                  65 
   Gilgit-Baltistan  Azad_Jammu_Kashmir 
                 10                  20

Display code

lp03 <- ds %>% drop_na(province) %>%
lollipop_chart(x = province,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Province distribution: Pakistan")+
  theme_bw()

lp03

Variable: Household wealth

Display code

ds$hh_wealth <- haven::as_factor(ds$Household_level_of_wealth)

ds$hh_wealth <- factor(ds$hh_wealth, 
                       levels=c("Much lower", "Slightly lower", "Average",
                                "Slightly higher", "Much higher"))

lp04 <- ds %>% drop_na(hh_wealth) %>%
lollipop_chart(x = hh_wealth,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Household wealth distribution: Pakistan")+
  theme_bw()

lp04

Variable: Socioeconomic status

Display code

ds$ses1 <- haven::as_factor(ds$Socioeconomic_status)

ds$ses <- ifelse(ds$ses1 == "Lower middle", "Lower middle", 
          ifelse(ds$ses1 == "Middle", "Middle", 
          ifelse(ds$ses1 == "Uper middle", "Upper middle",
          ifelse(ds$ses1 == "Uper", "Upper", NA))))

ds$ses <- factor(ds$ses, 
                 levels=c("Lower middle", "Middle", "Upper middle", "Upper"))

lp05 <- ds %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Socioeconomic status: Pakistan")+
  theme_bw()

lp05

Variable: Annual income

Display code

ds$income <- as.numeric(ds$Annual_income)

summary(ds$income)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
   80000   430000   720000  1280405  1200000 30000000

Display code

## Remove outliers (greater than 10 million):
ds <- ds[ds$income < 10000000, ]

# ds$income <- ifelse(ds$income < 10000000, ds$income, NA)

summary(ds$income)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  80000  427500  710000 1223422 1200000 9000000

Display code

ds %>% drop_na(income)%>%
ggplot(aes(x = income))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 80)+
  geom_textvline(label = "Mean = 1.2 M", 
                 xintercept = 1223422, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  scale_x_continuous(labels = scales::unit_format(unit = "M", 
                                                  scale = 1e-6))+
  labs(x = "Annual income", 
       y = "Frequency", 
       title = "Annual income distribution: Pakistan")+
  theme_bw()

Correlation between income, wealth, and socioeconomic status

Display code

ds$fses <- as.numeric(ds$ses)

ds$fhhw <- as.numeric(ds$hh_wealth)

ds3 <- cbind.data.frame(ds$fses, ds$fhhw, ds$income)
ds3 <- na.omit(ds3)

ds3$Annual_income <- ds3$`ds$income`
ds3$Household_wealth <- ds3$`ds$fhhw`
ds3$SES <- ds3$`ds$fses`

mtx <- cor(ds3[, c(4:6)])

corrplot(mtx, method = "number", number.cex = 1.0,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Variable: Nature of employment

Display code

ds$jobnature <- haven::as_factor(ds$Job_Nature)

lp06 <- ds %>% drop_na(jobnature) %>%
lollipop_chart(x = jobnature,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Nature of employment: Pakistan")+
  theme_bw()

lp06

Variable: Religious affiliation

Display code

table(haven::as_factor(ds$Religious_Affiliation))


 Christian (Catholic) Christian (Protestor)        Muslim (Sunni) 
                   77                    50                   189 
        Muslim (Shia)              Buddhist                 Hindu 
                  142                     0                    37 
                 Sikh 
                    9

Display code

## Change label for protestant (it has typo):
val_label(ds$Religious_Affiliation, 2) <- "Christian (Protestant)"
ds$religion <- haven::as_factor(ds$Religious_Affiliation)

lp07 <- ds %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Religious affiliation: Pakistan")+
  theme_bw()

lp07

Variable: Marital status

Display code

ds$married <- ifelse(ds$Marital_status == 1, "Unmarried",
              ifelse(ds$Marital_status == 2, "Married", "Other"))

table(ds$married)


  Married     Other Unmarried 
      305        32       167

Display code

bp01 <- ds %>% drop_na(married) %>%
  ggplot(aes(x = fct_rev(fct_infreq(married))))+
  geom_bar(fill = "black")+
  labs(x = "",
       y = "Frequency",
       title = "Marital status")+
  coord_flip()+
  theme_bw()

# bp01

lp08 <- ds %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Marital status: Pakistan")+
  theme_bw()

lp08

Variable: Number of children

Display code

ds$children <- as.numeric(gsub("\\D", "", ds$Number_of_Children))

summary(ds$children)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      0       0       2       2       3      10      34

Display code

ds %>% drop_na(children)%>%
ggplot(aes(x = children))+
  geom_bar(color = "black",
                 fill = "gray",
                 width = 0.75)+
  geom_textvline(label = "Mean = 2.00", 
                 xintercept = 2.00, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Number of children", 
       y = "Frequency", 
       title = "Number of children: Pakistan")+
  theme_bw()

Variable: Ethnicity

Note: I did my best to clean up the variable, but the categories still need more work. Some of the categories (like “Christian” or “Hindu” or “Urdu speaking”) do not seem like ethnic groups.

Display code

eth <- as.data.frame(table(ds$Ethnicity))
eth$Var1 <- as.character(eth$Var1)
eth$ethnicity <- ifelse(eth$Freq < 5, "Other", eth$Var1)
l1 <- as.list(eth$ethnicity)

ds2 <- ds[, c("Ethnicity")]

ds2$ethnicity <- ifelse(ds2$Ethnicity %in% l1, ds2$Ethnicity, "Other")
ds2$ethnicity <- ifelse(ds2$ethnicity == "Pujnabi", "Punjabi", 
                 ifelse(ds2$ethnicity == "Christain", "Chrisitian", 
                 ifelse(ds2$ethnicity == "Urduspeaking", "Urdu speaking",
                        ds2$ethnicity)))

lp08 <- ds2 %>% drop_na(ethnicity) %>%
lollipop_chart(x = ethnicity,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Ethnic distribution: Pakistan")+
  theme_bw()

lp08

Variable: Education

Note: Maybe find a different label for “illiterate”??

Display code

ds$education <- as.character(haven::as_factor(ds$Level_of_education))

ds$education <- ifelse(ds$education=="Illetriate", "Illiterate",
                ifelse(ds$education=="Matric", "Matriculate",
                       ds$education))

ds$education <- factor(ds$education, 
                       levels = c("Illiterate", "Middle", "Matriculate", 
                                  "Intermediate", "Bachelor", "Master",
                                  "MPhil", "PhD"))

table(ds$education)


  Illiterate       Middle  Matriculate Intermediate     Bachelor       Master 
          10           53           34           85          102          161 
       MPhil          PhD 
          56            3

Display code

lp08 <- ds %>% drop_na(education) %>%
lollipop_chart(x = education,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Education distribution: Pakistan")+
  theme_bw()

lp08

Section 2. Factor Analysis: Group fusion/identification

Display code

## Four ingroup fusion items:

# I have a deep emotional bond with the [ingroup].
ds$IGF01 <- as.numeric(ds$Q11_1)

# I am strong because of the [ingroup].
ds$IGF02 <- as.numeric(ds$Q11_2)

# I make the [ingroup] strong.  
ds$IGF03 <- as.numeric(ds$Q11_3)

# I am one with the [ingroup]
## TWO IDENTICAL ITEMS!!! 
ds$IGF04a <- as.numeric(ds$Q11_4)
ds$IGF04b <- as.numeric(ds$Q11_5)

table(ds$IGF04a)


  1   2   3   4   5   6   7 
  4   6   8  31  80 172 203

Display code

table(ds$IGF04b)


  1   2   3   4   5   6   7 
 35  60  46 101 115 101  46

Display code

corr.test(ds$IGF04a, ds$IGF04b)

Call:corr.test(x = ds$IGF04a, y = ds$IGF04b)
Correlation matrix 
[1] 0.08
Sample Size 
[1] 504
These are the unadjusted probability values.
  The probability values  adjusted for multiple tests are in the p.adj object. 
[1] 0.07

 To see confidence intervals of the correlations, print with the short=FALSE option

Display code

## Four outgroup fusion items:

# I have a deep emotional bond with the [outgroup].
# DOES NOT EXIST!!!

## Assuming Q11_5 is first outgroup fusion item 
## (instead of duplicate of 4th ingroup fusion item):
ds$IGF04 <- as.numeric(ds$Q11_4)
ds$OGF01 <- as.numeric(ds$Q11_5)

# I am strong because of the [outgroup].
ds$OGF02 <- as.numeric(ds$Q11_6)

# I make the [outgroup] strong. 
ds$OGF03 <- as.numeric(ds$Q11_7)

# I am one with the [outgroup].
ds$OGF04 <- as.numeric(ds$Q11_8)


## Four ingroup identification items:

# I identify with the [ingroup].
ds$IGI01 <- as.numeric(ds$Q11_9)

# I have a lot in common with the [ingroup].
ds$IGI02 <- as.numeric(ds$Q11_10)

# I connect with the values of the [ingroup].
ds$IGI03 <- as.numeric(ds$Q11_11)

# I feel a sense of belonging with the [ingroup].
ds$IGI04 <- as.numeric(ds$Q11_12)


## Four outgroup identification items:

# I identify with the [outgroup].   
ds$OGI01 <- as.numeric(ds$Q11_13)

# I have a lot in common with the [outgroup].   
ds$OGI02 <- as.numeric(ds$Q11_14)

# I connect with the values of the [outgroup].  
ds$OGI03 <- as.numeric(ds$Q11_15)

# I feel a sense of belonging with the [outgroup].  
ds$OGI04 <- as.numeric(ds$Q11_16)

Display code

## Bonds dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04,
                          ds$IGI01, ds$IGI02, ds$IGI03, ds$IGI04,
                          ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04,
                          ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:16)])

Shorthand for item names:
IG = Ingroup, OG = Outgroup
F = Fusion, I = Identification
So for example:
IGF01 = “ingroup fusion: item 1”
OGI04 = “outgroup identification: item 4”, and so on

Display code

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the sixteen different items (eight for ingroup fusion/identification and eight for outgroup fusion/identification).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code

KMO(r=cor(bonds))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(bonds))
Overall MSA =  0.9
MSA for each item = 
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.94  0.87  0.86  0.93  0.94  0.87  0.86  0.91  0.92  0.93  0.88  0.91  0.93 
OGI02 OGI03 OGI04 
 0.91  0.87  0.89

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.9), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code

cortest.bartlett(bonds)

$chisq
[1] 4939

$p.value
[1] 0

$df
[1] 120

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code

parallel <- fa.parallel(bonds)

Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Based on the scree plot, factor analysis with two factors is the most appropriate. We will proceed with promax rotation, which assumes that the items are inter-correlated (that is, not independent from each other).

Factor analysis: Two factor model with promax rotation

Display code

fit01 <- factanal(bonds, 2, rotation="promax")
fit01


Call:
factanal(x = bonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.46  0.39  0.42  0.40  0.61  0.37  0.31  0.61  0.67  0.49  0.36  0.38  0.63 
OGI02 OGI03 OGI04 
 0.40  0.28  0.23 

Loadings:
      Factor1 Factor2
IGF01          0.727 
IGF02          0.779 
IGF03          0.756 
IGF04          0.774 
IGI01          0.618 
IGI02          0.794 
IGI03          0.834 
IGI04          0.629 
OGF01  0.579         
OGF02  0.711         
OGF03  0.801         
OGF04  0.782         
OGI01  0.592  -0.197 
OGI02  0.774         
OGI03  0.846         
OGI04  0.873         

               Factor1 Factor2
SS loadings       4.53    4.46
Proportion Var    0.28    0.28
Cumulative Var    0.28    0.56

Factor Correlations:
        Factor1 Factor2
Factor1   1.000  -0.083
Factor2  -0.083   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 602 on 89 degrees of freedom.
The p-value is 0.000000000000000000000000000000000000000000000000000000000000000000000000000043

The promax rotated FA output suggests that there are two factors:

Factor 1 consisting of the eight outgroup fusion/identification items
Factor 2 consisting of the eight ingroup fusion/identification items
The two factors are negatively correlated (coefficient = -0.082)
The two factors cumulatively explain 56% of the variance in the data. Factor 1 explains 28% of the variance, and factor 2 explains 28% of the variance.

Four factor solution:

Display code

fit02 <- factanal(bonds, 4, rotation="promax")
fit02


Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.47  0.25  0.25  0.38  0.58  0.27  0.18  0.50  0.67  0.47  0.22  0.28  0.62 
OGI02 OGI03 OGI04 
 0.31  0.17  0.24 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01          0.621   0.179         
IGF02          0.912                 
IGF03          0.931  -0.134         
IGF04          0.693   0.123         
IGI01          0.386   0.265   0.202 
IGI02          0.327   0.632         
IGI03          0.368   0.648         
IGI04          0.274   0.520  -0.175 
OGF01  0.570                         
OGF02  0.690                   0.162 
OGF03  0.781          -0.108   0.305 
OGF04  0.764                   0.280 
OGI01  0.592          -0.144  -0.123 
OGI02  0.817           0.103  -0.276 
OGI03  0.899                  -0.301 
OGI04  0.873                         

               Factor1 Factor2 Factor3 Factor4
SS loadings       4.59    3.04    1.28   0.469
Proportion Var    0.29    0.19    0.08   0.029
Cumulative Var    0.29    0.48    0.56   0.586

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1  1.0000   0.101  0.0151  0.1378
Factor2  0.1007   1.000  0.5346  0.2157
Factor3  0.0151   0.535  1.0000  0.0623
Factor4  0.1378   0.216  0.0623  1.0000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 135 on 62 degrees of freedom.
The p-value is 0.00000025

Display code

## Remove 4th item from all subscales and retry:

## New dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03,
                              ds$IGI01, ds$IGI02, ds$IGI03,
                              ds$OGF01, ds$OGF02, ds$OGF03,
                              ds$OGI01, ds$OGI02, ds$OGI03)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:12)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Parallel test
parallel <- fa.parallel(bonds)

Parallel analysis suggests that the number of factors =  3  and the number of components =  2

Display code

## factor loadings: 4 factor model:
fit04 <- factanal(bonds, 4, rotation="promax")
print(fit04$loadings)


Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01          0.593   0.208         
IGF02          0.973  -0.131         
IGF03          0.885                 
IGI01          0.329   0.298   0.172 
IGI02          0.143   0.820         
IGI03          0.303   0.636         
OGF01  0.562                   0.123 
OGF02  0.628                   0.394 
OGF03  0.688                   0.216 
OGI01  0.619          -0.164         
OGI02  0.901           0.101  -0.292 
OGI03  0.918                  -0.223 

               Factor1 Factor2 Factor3 Factor4
SS loadings       3.23    2.31    1.28   0.391
Proportion Var    0.27    0.19    0.11   0.033
Cumulative Var    0.27    0.46    0.57   0.601

Three factor solution:

Display code

## factor loadings: 3 factor model:
fit05 <- factanal(bonds, 3, rotation="promax")
print(fit05$loadings)


Loadings:
      Factor1 Factor2 Factor3
IGF01          0.664   0.159 
IGF02          0.924  -0.123 
IGF03          0.862         
IGI01          0.502   0.204 
IGI02          0.483   0.649 
IGI03          0.570   0.493 
OGF01  0.600                 
OGF02  0.694                 
OGF03  0.728                 
OGI01  0.613  -0.118  -0.122 
OGI02  0.821           0.107 
OGI03  0.859                 

               Factor1 Factor2 Factor3
SS loadings       3.17    2.87   0.791
Proportion Var    0.26    0.24   0.066
Cumulative Var    0.26    0.50   0.569

Three factor solution looks reasonable. We will split the factor solution into: two factors for ingroup fusion and identification, and one factor for outgroup fusion/identification (called outgroup bonds).

Reliability analysis
Scale 1: Ingroup fusion

Display code

igfusion <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03)

names(igfusion) <- sub('ds\\$', '', names(igfusion))

igfusion <- na.omit(igfusion)
mtx2 <- cor(igfusion[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

All items are decently correlated with each other.

Display code

## Reliability:
alpha02 <- psych::alpha(igfusion)
alpha02


Reliability analysis   
Call: psych::alpha(x = igfusion)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.85      0.85     0.8      0.66 5.8 0.011  5.9 1.1     0.63

    95% confidence boundaries 
         lower alpha upper
Feldt     0.83  0.85  0.87
Duhachek  0.83  0.85  0.87

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGF01      0.85      0.85    0.75      0.75 5.9    0.013    NA  0.75
IGF02      0.74      0.75    0.60      0.60 3.0    0.022    NA  0.60
IGF03      0.77      0.77    0.63      0.63 3.4    0.020    NA  0.63

 Item statistics 
        n raw.r std.r r.cor r.drop mean  sd
IGF01 503  0.83  0.85  0.70   0.66  6.1 1.2
IGF02 503  0.90  0.90  0.84   0.77  5.9 1.3
IGF03 503  0.90  0.89  0.82   0.75  5.8 1.4

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
IGF01 0.01 0.02 0.02 0.07 0.09 0.36 0.45    0
IGF02 0.01 0.02 0.02 0.09 0.14 0.31 0.41    0
IGF03 0.02 0.02 0.03 0.07 0.16 0.32 0.38    0

The scale is quite reliable.

Reliability analysis
Scale 2: Ingroup identification

Display code

igidentification <- cbind.data.frame(ds$IGI01, ds$IGI02, ds$IGI03)

names(igidentification) <- sub('ds\\$', '', names(igidentification))

igidentification <- na.omit(igidentification)
mtx3 <- cor(igidentification[, c(1:3)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha03 <- psych::alpha(igidentification)
alpha03


Reliability analysis   
Call: psych::alpha(x = igidentification)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.81      0.82    0.78       0.6 4.5 0.015    6 1.1     0.53

    95% confidence boundaries 
         lower alpha upper
Feldt     0.78  0.81  0.84
Duhachek  0.78  0.81  0.84

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGI01      0.88      0.88    0.78      0.78 7.1    0.011    NA  0.78
IGI02      0.68      0.69    0.53      0.53 2.2    0.028    NA  0.53
IGI03      0.66      0.66    0.50      0.50 2.0    0.030    NA  0.50

 Item statistics 
        n raw.r std.r r.cor r.drop mean  sd
IGI01 503  0.81  0.79  0.58   0.54  5.9 1.4
IGI02 503  0.87  0.89  0.84   0.72  6.0 1.2
IGI03 503  0.88  0.90  0.86   0.74  6.0 1.2

Non missing response frequency for each item
         1    2    3    4    5    6    7 9 miss
IGI01 0.02 0.02 0.02 0.09 0.10 0.34 0.41 0    0
IGI02 0.01 0.01 0.03 0.06 0.13 0.36 0.41 0    0
IGI03 0.01 0.02 0.01 0.05 0.13 0.35 0.42 0    0

All items have decent correlations with each other, and the scale is quite reliable.

Reliability analysis
Scale 3: Outgroup fusion/identification

Display code

ogbonds <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03,
                            ds$OGI01, ds$OGI02, ds$OGI03)

names(ogbonds) <- sub('ds\\$', '', names(ogbonds))

ogbonds <- na.omit(ogbonds)
mtx3 <- cor(ogbonds[, c(1:6)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alpha04 <- psych::alpha(ogbonds)
alpha04


Reliability analysis   
Call: psych::alpha(x = ogbonds)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.87      0.87    0.86      0.52 6.4 0.0093  3.6 1.4     0.49

    95% confidence boundaries 
         lower alpha upper
Feldt     0.85  0.87  0.88
Duhachek  0.85  0.87  0.88

 Reliability if an item is dropped:
      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
OGF01      0.86      0.86    0.84      0.55 6.0   0.0101 0.0109  0.53
OGF02      0.84      0.84    0.82      0.51 5.3   0.0112 0.0106  0.46
OGF03      0.84      0.84    0.82      0.51 5.1   0.0115 0.0091  0.48
OGI01      0.86      0.86    0.85      0.55 6.1   0.0099 0.0099  0.53
OGI02      0.83      0.83    0.81      0.50 5.0   0.0118 0.0068  0.47
OGI03      0.83      0.83    0.80      0.49 4.7   0.0123 0.0050  0.46

 Item statistics 
        n raw.r std.r r.cor r.drop mean  sd
OGF01 502  0.70  0.71  0.61   0.57  4.4 1.7
OGF02 502  0.78  0.78  0.73   0.67  3.5 1.7
OGF03 502  0.80  0.80  0.75   0.69  3.6 1.8
OGI01 502  0.70  0.70  0.60   0.57  2.9 1.8
OGI02 502  0.81  0.81  0.78   0.71  3.8 1.8
OGI03 502  0.84  0.84  0.82   0.76  3.5 1.8

Non missing response frequency for each item
         1    2    3    4    5    6    7 miss
OGF01 0.07 0.12 0.09 0.20 0.23 0.20 0.09    0
OGF02 0.11 0.26 0.13 0.20 0.14 0.12 0.04    0
OGI01 0.29 0.26 0.11 0.13 0.10 0.08 0.03    0
OGI02 0.13 0.19 0.12 0.15 0.18 0.18 0.05    0
OGI03 0.15 0.23 0.14 0.15 0.17 0.11 0.04    0

All items have decent correlations with each other, and the scale is quite reliable.

Section 3. Factor Analysis: BCL/BBL endorsement/experience

Display code

## BCL and BBL items:
# BCL_01:
# Seek out opportunities to bridge social divisions with their opponents, enemies, opposition groups, or other outgroups.   
# Variables: Q6.1, Not found

# BCL_02:
# Demonstrate willingness to compromise with their opponents, enemies, opposition groups, or other outgroups. 
# Variables: Q6.2, Q7.1

# BCL_03:
# Try to understand and empathize with their opponents, enemies, opposition groups, or other outgroups.  
# Variables: Q6.3, Q7.2

# BBL_01:
# Represent the interests of the communities and groups that they belong to even at the cost of other groups.
# Variables: Q6.4, Q7.3

# BBL_02:
# Focus on building stronger connections/relationships within the communities and groups they belong to rather than building stronger relationships with other groups across boundaries.
# Variables: Q6.5, (Q7.4 and 7.5)

# BBL_03:
# Try to gain benefits for the communities and groups they belong to even at the expense of other groups.
# Variables: Q6.6, Q7.6

### PROBLEM ###

## there is no item for EXP_BCL_01 (experience of BCL, item 01): 
# (Seek out opportunities to bridge social...)

## BUt, EXP_BBL_02 (endorsement of BBL, item 2) has two nearly identically worded items: Q7_4 and Q7_5:
# (Focus on building stronger relationships /connections.... )

## Going over this with research partners in Pakistan confirmed this. 
## So this scale for Pakistan will have a missing item (EXP_BCL_01)
## while one item (EXP_BBL_02) was asked twice, and only one will be retained

ds$ENDBCL01 <- as.numeric(ds$Q6_1)
ds$ENDBCL02 <- as.numeric(ds$Q6_2)
ds$ENDBCL03 <- as.numeric(ds$Q6_3)
ds$ENDBBL01 <- as.numeric(ds$Q6_4)
ds$ENDBBL02 <- as.numeric(ds$Q6_5)
ds$ENDBBL03 <- as.numeric(ds$Q6_6)

ds$EXPBCL01 <- NA
ds$EXPBCL02 <- as.numeric(ds$Q7_1)
ds$EXPBCL03 <- as.numeric(ds$Q7_2)
ds$EXPBBL01 <- as.numeric(ds$Q7_3)
## For EXP_BBL_02, 
# Q7.4: Focus on building stronger relationships...
# Q7.5: Focus on building stronger connections...
## Q7.4 was dropped and Q7.5 was used:
ds$EXPBBL02 <- as.numeric(ds$Q7_5)
ds$EXPBBL03 <- as.numeric(ds$Q7_6)

leadership <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03, 
                               ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03,
                               ds$EXPBCL02, ds$EXPBCL03,
                               ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(leadership) <- sub('ds\\$', '', names(leadership))

leadership <- na.omit(leadership)
mtx1 <- cor(leadership[, c(1:11)])

Shorthand for item names:
END = Endorse, EXP = Experience
BCL = BCL, BBL = BBL
So for example:
ENDBCL01 = “Endorsement of BCL, item 1”
EXPBBL02 = “Experience of BBL, item 2”, and so on

Display code

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the eleven different items.

KMO and Bartlett’s test:

Display code

## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(leadership))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(leadership))
Overall MSA =  0.72
MSA for each item = 
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL02 EXPBCL03 
    0.75     0.80     0.73     0.74     0.59     0.75     0.68     0.70 
EXPBBL01 EXPBBL02 EXPBBL03 
    0.68     0.69     0.74

Display code

## Bartlett's test of sphericity
cortest.bartlett(leadership)

$chisq
[1] 1749

$p.value
[1] 0

$df
[1] 55

Parallel test

Display code

parallel <- fa.parallel(leadership)

Parallel analysis suggests that the number of factors =  3  and the number of components =  3

Factor analysis: Four factor model with promax rotation

Display code

fit02 <- factanal(leadership, 4, rotation="promax")
fit02


Call:
factanal(x = leadership, factors = 4, rotation = "promax")

Uniquenesses:
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL02 EXPBCL03 
   0.374    0.413    0.281    0.711    0.005    0.647    0.005    0.524 
EXPBBL01 EXPBBL02 EXPBBL03 
   0.516    0.646    0.482 

Loadings:
         Factor1 Factor2 Factor3 Factor4
ENDBCL01  0.817                         
ENDBCL02  0.650           0.176         
ENDBCL03  0.918          -0.140         
ENDBBL01          0.447   0.122   0.161 
ENDBBL02         -0.129           1.056 
ENDBBL03 -0.104   0.403           0.266 
EXPBCL02                  1.053         
EXPBCL03         -0.188   0.589         
EXPBBL01          0.779          -0.227 
EXPBBL02          0.504           0.165 
EXPBBL03          0.743          -0.126 

               Factor1 Factor2 Factor3 Factor4
SS loadings       1.97    1.84    1.53    1.31
Proportion Var    0.18    0.17    0.14    0.12
Cumulative Var    0.18    0.35    0.49    0.60

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1   1.000  -0.025  0.5548  0.1970
Factor2  -0.025   1.000 -0.1248 -0.4759
Factor3   0.555  -0.125  1.0000  0.0873
Factor4   0.197  -0.476  0.0873  1.0000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 70 on 17 degrees of freedom.
The p-value is 0.000000024

Factor analysis suggests four factor structure for Endorsement and Experience of BCL / BBL.

Display code

end_bcl <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03)
end_bbl <- cbind.data.frame(ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03)
exp_bcl <- cbind.data.frame(ds$EXPBCL02, ds$EXPBCL03)
exp_bbl <- cbind.data.frame(ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(end_bcl) <- sub('ds\\$', '', names(end_bcl))
names(end_bbl) <- sub('ds\\$', '', names(end_bbl))
names(exp_bcl) <- sub('ds\\$', '', names(exp_bcl))
names(exp_bbl) <- sub('ds\\$', '', names(exp_bbl))

end_bcl <- na.omit(end_bcl)
end_bbl <- na.omit(end_bbl)
exp_bcl <- na.omit(exp_bcl)
exp_bbl <- na.omit(exp_bbl)

mtx1 <- cor(end_bcl[, c(1:3)])
mtx2 <- cor(end_bbl[, c(1:3)])
mtx3 <- cor(exp_bcl[, c(1:2)])
mtx4 <- cor(exp_bbl[, c(1:3)])

Reliability analysis
Scale 1: Endorsement of BCL

Display code

## Reliability:
alph01 <- psych::alpha(end_bcl)
alph01


Reliability analysis   
Call: psych::alpha(x = end_bcl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.83      0.83    0.77      0.62 4.8 0.013  5.6 1.3      0.6

    95% confidence boundaries 
         lower alpha upper
Feldt      0.8  0.83  0.85
Duhachek   0.8  0.83  0.85

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBCL01      0.75      0.75    0.60      0.60 3.0    0.022    NA  0.60
ENDBCL02      0.80      0.80    0.66      0.66 3.9    0.018    NA  0.66
ENDBCL03      0.74      0.74    0.59      0.59 2.9    0.023    NA  0.59

 Item statistics 
           n raw.r std.r r.cor r.drop mean  sd
ENDBCL01 504  0.87  0.87  0.77   0.70  5.8 1.5
ENDBCL02 504  0.85  0.85  0.71   0.65  5.4 1.6
ENDBCL03 504  0.87  0.87  0.78   0.71  5.6 1.4

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
ENDBCL01 0.04 0.03 0.02 0.06 0.08 0.42 0.35    0
ENDBCL02 0.03 0.06 0.04 0.07 0.17 0.40 0.23    0
ENDBCL03 0.02 0.02 0.04 0.09 0.16 0.38 0.29    0

Reliability analysis
Scale 2: Endorsement of BBL

Display code

## Reliability:
alph01 <- psych::alpha(end_bbl)
alph01


Reliability analysis   
Call: psych::alpha(x = end_bbl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.63      0.64    0.54      0.37 1.7 0.029  3.3 1.3     0.36

    95% confidence boundaries 
         lower alpha upper
Feldt     0.57  0.63  0.68
Duhachek  0.57  0.63  0.68

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r  S/N alpha se var.r med.r
ENDBBL01      0.59      0.59    0.42      0.42 1.46    0.036    NA  0.42
ENDBBL02      0.52      0.53    0.36      0.36 1.11    0.042    NA  0.36
ENDBBL03      0.49      0.49    0.32      0.32 0.96    0.046    NA  0.32

 Item statistics 
           n raw.r std.r r.cor r.drop mean  sd
ENDBBL01 504  0.76  0.74  0.50   0.40  3.4 1.9
ENDBBL02 504  0.77  0.77  0.57   0.45  3.7 1.8
ENDBBL03 504  0.75  0.78  0.60   0.48  2.7 1.5

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
ENDBBL01 0.16 0.26 0.17 0.12 0.11 0.12 0.07    0
ENDBBL02 0.09 0.27 0.15 0.15 0.14 0.13 0.08    0
ENDBBL03 0.20 0.36 0.20 0.09 0.07 0.06 0.02    0

Reliability analysis
Scale 3: Experience of BCL

Display code

## Reliability:
alph01 <- psych::alpha(exp_bcl)
alph01


Reliability analysis   
Call: psych::alpha(x = exp_bcl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.78      0.79    0.65      0.65 3.7 0.019  5.2 1.4     0.65

    95% confidence boundaries 
         lower alpha upper
Feldt     0.74  0.78  0.82
Duhachek  0.75  0.78  0.82

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBCL02      0.72      0.65    0.42      0.65 1.8       NA     0  0.65
EXPBCL03      0.58      0.65    0.42      0.65 1.8       NA     0  0.65

 Item statistics 
           n raw.r std.r r.cor r.drop mean  sd
EXPBCL02 504  0.92  0.91  0.73   0.65  5.1 1.7
EXPBCL03 504  0.90  0.91  0.73   0.65  5.3 1.5

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
EXPBCL02 0.05 0.04 0.07 0.13 0.19 0.31 0.21    0
EXPBCL03 0.03 0.03 0.05 0.15 0.17 0.35 0.22    0

Reliability analysis
Scale 4: Experience of BBL

Display code

## Reliability:
alph01 <- psych::alpha(exp_bbl)
alph01


Reliability analysis   
Call: psych::alpha(x = exp_bbl)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.68      0.68    0.59      0.41 2.1 0.025  3.5 1.4     0.43

    95% confidence boundaries 
         lower alpha upper
Feldt     0.63  0.68  0.72
Duhachek  0.63  0.68  0.73

 Reliability if an item is dropped:
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBBL01      0.63      0.63    0.46      0.46 1.7    0.033    NA  0.46
EXPBBL02      0.60      0.60    0.43      0.43 1.5    0.036    NA  0.43
EXPBBL03      0.53      0.53    0.36      0.36 1.1    0.042    NA  0.36

 Item statistics 
           n raw.r std.r r.cor r.drop mean  sd
EXPBBL01 503  0.78  0.76  0.56   0.46  3.4 1.9
EXPBBL02 503  0.76  0.78  0.59   0.48  3.9 1.7
EXPBBL03 503  0.80  0.80  0.65   0.53  3.2 1.7

Non missing response frequency for each item
            1    2    3    4    5    6    7 miss
EXPBBL01 0.21 0.19 0.13 0.17 0.14 0.10 0.06    0
EXPBBL02 0.07 0.16 0.20 0.19 0.17 0.14 0.07    0
EXPBBL03 0.18 0.25 0.16 0.17 0.10 0.10 0.04    0

The four subscales have varying reliability scores.

Section 4. Preliminary regression analysis

Research question:
Do group fusion / identification predict BCL vs BBL?

Variables in the analysis:

Ingroup fusion
Ingroup identification
Outgroup fusion + identification (termed outgroup bonds)
Endorsement of BCL
Endorsement of BBL
Experience of BCL
Experience of BBL

Demographics:

Age
Gender
Marital status
Socio-economic status

Regression models

Display code

## Proper (usable) variables in the model:

ds$Age1 <- as.numeric(gsub("\\D", "", ds$Age))
ds$Age <- as.numeric(ifelse(ds$Age1 == "6465", 64.5,
                     ifelse(ds$Age1 == "806", 80.5, 
                     ifelse(ds$Age1 < 18, NA, ds$Age1))))

ds$Female <- ifelse(ds$gender == "Female", 1, 0)
ds$Married <- ifelse(ds$married == "Married", 1, 0)
ds$`SES-` <- ds$ses

ds$Endorse_BCL <- as.numeric((ds$ENDBCL01+ds$ENDBCL02+ds$ENDBCL03)/3)
ds$Endorse_BBL <- as.numeric((ds$ENDBBL01+ds$ENDBBL02+ds$ENDBBL03)/3)
ds$Experience_BCL <- as.numeric((ds$EXPBCL02+ds$EXPBCL03)/2)
ds$Experience_BBL <- as.numeric((ds$EXPBBL01+ds$EXPBBL02+ds$EXPBBL03)/3)

ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03)/3)
ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03)/3)
ds$OG_Bonds <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03+
                             ds$OGI01+ds$OGI02+ds$OGI03)/6)

Display code

## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+`SES-`,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+Age+Female+Married+`SES-`, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	Endorse_BCL	Experience_BCL	Endorse_BBL	Experience_BBL
	(1)	(2)	(3)	(4)

IG_Fusion	0.170^*	0.270^***	0.120	0.023
	(0.069)	(0.076)	(0.072)	(0.077)

IG_Identification	0.180^*	0.140	-0.260^***	-0.077
	(0.071)	(0.078)	(0.075)	(0.080)

OG_Bonds	0.033	0.027	0.010	-0.070
	(0.041)	(0.045)	(0.043)	(0.046)

Age	0.011^*	-0.0005	-0.011	0.001
	(0.006)	(0.006)	(0.006)	(0.006)

Female	-0.010	-0.160	-0.260^*	-0.017
	(0.110)	(0.120)	(0.120)	(0.120)

Married	0.088	0.190	-0.035	-0.099
	(0.130)	(0.150)	(0.140)	(0.150)

`SES-`Middle	0.510^*	0.450^*	0.360	-0.001
	(0.200)	(0.220)	(0.210)	(0.230)

`SES-`Upper middle	0.440	0.030	0.480	0.530^*
	(0.230)	(0.260)	(0.240)	(0.260)

`SES-`Upper	1.000^**	1.200^**	0.840^*	0.088
	(0.390)	(0.430)	(0.410)	(0.440)

Constant	2.500^***	2.300^***	4.200^***	4.000^***
	(0.430)	(0.480)	(0.450)	(0.480)


Observations	500	500	500	499
R²	0.110	0.130	0.067	0.035
Adjusted R²	0.096	0.110	0.050	0.017
Residual Std. Error	1.200 (df = 490)	1.300 (df = 490)	1.300 (df = 490)	1.400 (df = 489)
F Statistic	6.900^*** (df = 9; 490)	7.800^*** (df = 9; 490)	3.900^*** (df = 9; 490)	2.000^* (df = 9; 489)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 6. Ingroup/outgroup empathy/hostility items

Empathic concern

Display code

## Empathic concern
ds$empathic_concern_01 <- as.numeric(ds$Q18_1)
ds$empathic_concern_01 <- ifelse(ds$empathic_concern_01 == 8, NA, ds$empathic_concern_01)

ds$empathic_concern_02a <- as.numeric(ds$Q18_2)
ds$empathic_concern_02 <- as.numeric(8 - ds$empathic_concern_02a)

ds$empathic_concern_03 <- as.numeric(ds$Q18_3)

ds$empathic_concern <- (ds$empathic_concern_01+ds$empathic_concern_02+
                        ds$empathic_concern_03)/3

summary(ds$empathic_concern)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    2.0     5.0     6.0     5.7     6.7     7.0       1

Display code

ds %>% drop_na(empathic_concern)%>%
ggplot(aes(x = empathic_concern))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.70", 
                 xintercept = 5.70, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Empathic concern score", 
       y = "Frequency", 
       title = "Empathic concern")+
  theme_bw()

Perspective taking

Display code

ds$perspective_taking_01 <- as.numeric(ds$Q18_4)
ds$perspective_taking_02 <- as.numeric(ds$Q18_5)
ds$perspective_taking_03 <- as.numeric(ds$Q18_6)
ds$perspective_taking_04 <- as.numeric(ds$Q18_7)
ds$perspective_taking_04 <- ifelse(ds$perspective_taking_04 == 37, NA,
                                   ds$perspective_taking_04)

ds$perspective_taking <- (ds$perspective_taking_01+ds$perspective_taking_02+
                          ds$perspective_taking_03+ds$perspective_taking_04)/4

summary(ds$perspective_taking)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     4.8     5.5     5.3     6.0     7.0       3

Display code

ds %>% drop_na(perspective_taking)%>%
ggplot(aes(x = perspective_taking))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.30", 
                 xintercept = 5.30, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Perspective taking score", 
       y = "Frequency", 
       title = "Perspective taking")+
  theme_bw()

Outgroup cooperation

Display code

ds$og_coop_01 <- as.numeric(ds$Q12_1)
ds$og_coop_02 <- as.numeric(ds$Q12_2)

ds$og_cooperation <- (ds$og_coop_01+ds$og_coop_02)/2

summary(ds$og_cooperation)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     5.0     6.0     5.5     6.5     7.0

Display code

ds %>% drop_na(og_cooperation)%>%
ggplot(aes(x = og_cooperation))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.50", 
                 xintercept = 5.50, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Outgroup cooperation score", 
       y = "Frequency", 
       title = "Outgroup cooperation")+
  theme_bw()

Perceived history of discrimination

Display code

ds$history_discrimination <- as.numeric(ds$Q12_3)

summary(ds$history_discrimination)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     3.0     4.0     4.4     6.0     7.0

Display code

ds %>% drop_na(history_discrimination)%>%
ggplot(aes(x = history_discrimination))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 20)+
  geom_textvline(label = "Mean = 4.40", 
                 xintercept = 4.40, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Perceived history of discrimination score", 
       y = "Frequency", 
       title = "Perceived history of discrimination")+
  theme_bw()

Outgroup hostility

Display code

ds$og_host_01 <- as.numeric(ds$Q12_4)
ds$og_host_02 <- as.numeric(ds$Q12_5)

ds$og_hostility <- (ds$og_host_01+ds$og_host_02)/2

summary(ds$og_hostility)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     2.0     3.5     3.6     4.5     7.0

Display code

ds %>% drop_na(og_hostility)%>%
ggplot(aes(x = og_hostility))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 3.60", 
                 xintercept = 3.60, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Outgroup hostility score", 
       y = "Frequency", 
       title = "Outgroup hostility")+
  theme_bw()

Willingess to fight outgroup

Display code

ds$fight_outgroup <- as.numeric(ds$Q12_6)

summary(ds$fight_outgroup)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     1.0     2.0     2.2     3.0     7.0

Display code

ds %>% drop_na(fight_outgroup)%>%
ggplot(aes(x = fight_outgroup))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 2.20", 
                 xintercept = 2.20, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Willingness to fight outgroup score", 
       y = "Frequency", 
       title = "Willingness to fight outgroup")+
  theme_bw()

Section 7. Correlation between ingroup/outgroup empathy/hostility items

Display code

ds3 <- cbind.data.frame(ds$empathic_concern, ds$perspective_taking,
                        ds$og_cooperation, ds$history_discrimination,
                        ds$og_hostility, ds$fight_outgroup)
ds3 <- na.omit(ds3)

names(ds3) <- sub('ds\\$', '', names(ds3))

mtx <- cor(ds3[, c(1:6)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 8. Regression with new variables

This set of regression models have additional predictor variables compared to the last set. It adds empathic concern, perspective taking, and perceived history of discrimination as additional predictors.

Display code

## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	Endorse_BCL	Experience_BCL	Endorse_BBL	Experience_BBL
	(1)	(2)	(3)	(4)

IG_Fusion	0.200^**	0.220^**	0.110	0.023
	(0.071)	(0.079)	(0.074)	(0.080)

IG_Identification	0.120	0.150	-0.250^**	-0.035
	(0.074)	(0.082)	(0.076)	(0.083)

OG_Bonds	0.023	0.039	0.006	-0.072
	(0.041)	(0.045)	(0.042)	(0.046)

empathic_concern	0.045	0.066	-0.240^***	-0.200^**
	(0.063)	(0.069)	(0.065)	(0.070)

perspective_taking	0.100	0.032	0.210^**	0.048
	(0.063)	(0.070)	(0.065)	(0.071)

history_discrimination	-0.082^*	0.061	0.085^*	0.044
	(0.034)	(0.037)	(0.035)	(0.038)

Age	0.011	0.0004	-0.009	0.001
	(0.006)	(0.006)	(0.006)	(0.006)

Female	-0.019	-0.140	-0.220	0.002
	(0.110)	(0.120)	(0.120)	(0.120)

Married	0.097	0.200	-0.064	-0.120
	(0.130)	(0.150)	(0.140)	(0.150)

`SES-`Middle	0.480^*	0.350	0.220	0.025
	(0.210)	(0.230)	(0.220)	(0.240)

`SES-`Upper middle	0.420	-0.099	0.340	0.530
	(0.240)	(0.270)	(0.250)	(0.270)

`SES-`Upper	0.960^*	1.100^*	0.560	0.052
	(0.400)	(0.440)	(0.410)	(0.440)

Constant	2.300^***	1.700^**	4.200^***	4.500^***
	(0.500)	(0.550)	(0.520)	(0.560)


Observations	496	496	496	495
R²	0.120	0.140	0.110	0.051
Adjusted R²	0.100	0.120	0.092	0.028
Residual Std. Error	1.200 (df = 483)	1.300 (df = 483)	1.300 (df = 483)	1.400 (df = 482)
F Statistic	5.800^*** (df = 12; 483)	6.400^*** (df = 12; 483)	5.200^*** (df = 12; 483)	2.200^* (df = 12; 482)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 9. Alternative regression models with different outcomes

This set of regression models have different outcome variables which are: outgroup cooperation, outgroup hostility, and willingness to fight outgroup.

Display code

lm01 <- lm(og_cooperation~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(og_hostility~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(fight_outgroup~IG_Fusion+IG_Identification+OG_Bonds+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	og_cooperation	og_hostility	fight_outgroup
	(1)	(2)	(3)

IG_Fusion	-0.005	0.150	0.034
	(0.076)	(0.078)	(0.085)

IG_Identification	0.380^***	-0.120	-0.250^**
	(0.079)	(0.081)	(0.088)

OG_Bonds	0.270^***	-0.190^***	0.079
	(0.044)	(0.045)	(0.049)

empathic_concern	0.160^*	-0.060	-0.460^***
	(0.067)	(0.069)	(0.075)

perspective_taking	-0.170^*	-0.067	0.010
	(0.068)	(0.069)	(0.076)

history_discrimination	-0.180^***	0.520^***	0.130^**
	(0.036)	(0.037)	(0.040)

Age	0.00001	0.001	0.004
	(0.006)	(0.006)	(0.007)

Female	-0.350^**	0.055	0.040
	(0.120)	(0.120)	(0.130)

Married	0.011	-0.092	-0.210
	(0.140)	(0.140)	(0.160)

`SES-`Middle	-0.046	-0.110	-0.063
	(0.230)	(0.230)	(0.250)

`SES-`Upper middle	0.200	-0.270	-0.073
	(0.260)	(0.260)	(0.290)

`SES-`Upper	0.520	0.018	0.230
	(0.420)	(0.430)	(0.470)

Constant	3.200^***	2.700^***	5.300^***
	(0.530)	(0.550)	(0.600)


Observations	496	496	496
R²	0.220	0.350	0.160
Adjusted R²	0.200	0.330	0.140
Residual Std. Error (df = 483)	1.300	1.300	1.400
F Statistic (df = 12; 483)	11.000^***	22.000^***	7.600^***

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 10. Religious freedom measures

Social perception of religious freedom

Display code

ds$religious_freedom_per_01 <- as.numeric(ds$Q16_1)
ds$religious_freedom_per_02a <- as.numeric(ds$Q16_2)
ds$religious_freedom_per_02 <- (8 - ds$religious_freedom_per_02a)
ds$religious_freedom_per_04 <- as.numeric(ds$Q16_3)
ds$religious_freedom_per_03 <- as.numeric(ds$Q16_4)
ds$religious_freedom_per_05 <- as.numeric(ds$Q16_5)
ds$religious_freedom_per_06 <- as.numeric(ds$Q16_6)
ds$religious_freedom_per_07 <- as.numeric(ds$Q16_7)
ds$religious_freedom_per_08 <- as.numeric(ds$Q16_8)

ds$sprf <- (ds$religious_freedom_per_01+ds$religious_freedom_per_02+
            ds$religious_freedom_per_03+ds$religious_freedom_per_04+
            ds$religious_freedom_per_05+ds$religious_freedom_per_06+
            ds$religious_freedom_per_07+ds$religious_freedom_per_08)/8

summary(ds$sprf)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    2.6     4.8     5.2     5.2     5.9     7.0

Display code

ds %>% drop_na(sprf)%>%
ggplot(aes(x = sprf))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.20", 
                 xintercept = 5.20, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "SPRF score", 
       y = "Frequency", 
       title = "Social perception of religious freedom: Pakistan")+
  theme_bw()

Experience of religious freedom

Display code

ds$religious_freedom_exp_01a <- as.numeric(ds$Q17_1)
ds$religious_freedom_exp_01 <- (8 - ds$religious_freedom_exp_01a)

ds$religious_freedom_exp_02a <- as.numeric(ds$Q17_2)
ds$religious_freedom_exp_02 <- (8 - ds$religious_freedom_exp_02a)

ds$religious_freedom_exp_03a <- as.numeric(ds$Q17_3)
ds$religious_freedom_exp_03 <- (8 - ds$religious_freedom_exp_03a)

ds$religious_freedom_exp_04a <- as.numeric(ds$Q17_4)
ds$religious_freedom_exp_04 <- (8 - ds$religious_freedom_exp_04a)
ds$religious_freedom_exp_04 <- ifelse(ds$religious_freedom_exp_04 > 0 , ds$religious_freedom_exp_04, NA)

ds$religious_freedom_exp_05a <- as.numeric(ds$Q17_5)
ds$religious_freedom_exp_05 <- (8 - ds$religious_freedom_exp_05a)

ds$religious_freedom_exp_06a <- as.numeric(ds$Q17_6)
ds$religious_freedom_exp_06 <- (8 - ds$religious_freedom_exp_06a)

ds$exp_religious_freedom <- (ds$religious_freedom_exp_01+ds$religious_freedom_exp_02+
                             ds$religious_freedom_exp_03+ds$religious_freedom_exp_04+
                              ds$religious_freedom_exp_05+ds$religious_freedom_exp_06)/6

summary(ds$exp_religious_freedom)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     4.7     5.7     5.4     6.5     7.0       2

Display code

ds %>% drop_na(exp_religious_freedom)%>%
ggplot(aes(x = exp_religious_freedom))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.40", 
                 xintercept = 5.40, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Experience of religious freedom score", 
       y = "Frequency", 
       title = "Experience of religious freedom: Pakistan")+
  theme_bw()

Experience of religious freedom based on religious affiliation

Display code

ds$Religion <- ds$religion
ds2 <- ds %>% 
drop_na(Religion, exp_religious_freedom)%>%
 group_by(Religion) %>% 
 summarise(Exp_religious_freedom=mean(exp_religious_freedom),.groups = 'drop') %>%
  as.data.frame()
ds2

                Religion Exp_religious_freedom
1   Christian (Catholic)                   5.3
2 Christian (Protestant)                   5.5
3         Muslim (Sunni)                   5.9
4          Muslim (Shia)                   4.8
5                  Hindu                   5.2
6                   Sikh                   5.6

Display code

ds %>% drop_na(religion, exp_religious_freedom)%>%
ggplot(aes(y = exp_religious_freedom, 
           x = religion))+
  geom_boxplot()+
  labs(x = "", 
       y = "Experience of religious freedom score", 
       title = "Experience of religious freedom: Pakistan")+
  coord_flip()+
  theme_bw()

Section 11. Positive/Negative contact with outgroup

Positive contact with outgroup

Display code

ds$pc01 <- as.character(haven::as_factor(ds$Q15))

ds$pc01 <- factor(ds$pc01, 
                       levels = c("Never", "Very rarely", "Rarely",
                                  "Sometimes", "Often", "Very Often", "Always"))


table(ds$pc01)


      Never Very rarely      Rarely   Sometimes       Often  Very Often 
         15          14          32          97          86         113 
     Always 
        147

Display code

# never, very rarely, rarely
# sometimes, often, very often, always

ds$pc02 <- as.numeric(ds$pc01)

summary(ds$pc02)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     4.0     6.0     5.3     7.0     7.0

Display code

ds %>% drop_na(pc02)%>%
ggplot(aes(x = pc02))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.30", 
                 xintercept = 5.30, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Positive contact score", 
       y = "Frequency", 
       title = "Positive contact with outgroup: Pakistan")+
  theme_bw()

Negative contact with outgroup

Display code

ds$nc01 <- as.character(haven::as_factor(ds$Q14))

ds$nc01 <- factor(ds$nc01, 
                       levels = c("Never", "Very rarely", "Rarely",
                                  "Sometimes", "Often", "Very Often", "Always"))


table(ds$nc01)


      Never Very rarely      Rarely   Sometimes       Often  Very Often 
        115         116         108          90          33          33 
     Always 
          9

Display code

# never, very rarely, rarely
# sometimes, often, very often, always

ds$nc02 <- as.numeric(ds$nc01)

summary(ds$nc02)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     2.0     3.0     2.9     4.0     7.0

Display code

ds %>% drop_na(pc02)%>%
ggplot(aes(x = pc02))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 2.90", 
                 xintercept = 2.90, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Negative contact score", 
       y = "Frequency", 
       title = "Negative contact with outgroup: Pakistan")+
  theme_bw()

Section 12. Intergroup marriage

No data for Pakistan.

Section 13. Outgroup affect

Outgroup affect: Scale construction

Display code

## Feel outgroup: negative to positive:

ds$ogaf1 <- as.character(haven::as_factor(ds$Q13_1))

## convert first character only to upper case:
ds$ogaf1 <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf1, perl = TRUE)

ds$og_aff_01 <- factor(ds$ogaf1, 
                       levels=c("Very negative", "Moderately negative",
                                 "A little negative", "Neutral", 
                                 "A little positive", "Moderately positive", 
                                 "Very positive"))
table(ds$og_aff_01)


      Very negative Moderately negative   A little negative             Neutral 
                  9                  22                  58                 121 
  A little positive Moderately positive       Very positive 
                 74                 125                  95

Display code

## Feel Outgroup: Hostile to friendly:

ds$ogaf2 <- as.character(haven::as_factor(ds$Q13_2))

## convert first character only to upper case:
ds$ogaf2 <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf2, perl = TRUE)

ds$og_aff_02 <- factor(ds$ogaf2, 
                       levels = c("Very hostile", "Moderately hostile", 
                                  "A little hostile", "Neutral", 
                                  "A little friendly", "Moderately friendly",
                                  "Very friendly"))

table(ds$og_aff_02)


       Very hostile  Moderately hostile    A little hostile             Neutral 
                  6                  10                  43                  94 
  A little friendly Moderately friendly       Very friendly 
                 99                 152                 100

Display code

## Feel Outgroup: Suspicious to trusting:

ds$ogaf3 <- as.character(haven::as_factor(ds$Q13_3))

## convert first character only to upper case:
ds$ogaf3 <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf3, perl = TRUE)

ds$og_aff_03 <- factor(ds$ogaf3, 
                       levels = c("Very suspicious", "Moderately suspicious", 
                                  "A little suspicious", "Neutral",
                                  "A little trusting", "Moderately trusting",
                                  "Very trusting"))
table(ds$og_aff_03)


      Very suspicious Moderately suspicious   A little suspicious 
                   15                    25                    67 
              Neutral     A little trusting   Moderately trusting 
                  102                    96                   134 
        Very trusting 
                   65

Display code

## Feel Outgroup: Contempt to respect:

ds$ogaf4 <- as.character(haven::as_factor(ds$Q13_4))

## convert first character only to upper case:
ds$ogaf4 <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf4, perl = TRUE)

ds$og_aff_04 <- factor(ds$ogaf4, 
                       levels = c("A lot of contempt", "Moderate contempt",
                                  "A little contempt", "Neutral", 
                                  "A little respect", "Moderate respect",
                                  "A lot of respect"))

table(ds$og_aff_04)


A lot of contempt Moderate contempt A little contempt           Neutral 
                8                14                33                89 
 A little respect  Moderate respect  A lot of respect 
               74               163               123

Display code

## Feel Outgroup: Concerned to Unconcerned:

ds$ogaf5 <- as.character(haven::as_factor(ds$Q13_5))

## convert first character only to upper case:
ds$ogaf5 <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf5, perl = TRUE)

ds$og_aff_05 <- factor(ds$ogaf5, 
                       levels = c("Very concerned", "Moderately concerned", 
                                  "A little concerned", "Neutral",
                                  "A little unconcerned", "Moderately unconcerned",
                                  "Very unconcerned"))
table(ds$og_aff_05)


        Very concerned   Moderately concerned     A little concerned 
                    33                    106                     96 
               Neutral   A little unconcerned Moderately unconcerned 
                   121                     62                     62 
      Very unconcerned 
                    24

Display code

## Feel Outgroup: Threatened to Relaxed:

ds$ogaf6 <- as.character(haven::as_factor(ds$Q13_6))

## convert first character only to upper case:
ds$ogaf6 <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf6, perl = TRUE)

ds$og_aff_06 <- factor(ds$ogaf6, 
                       levels = c("Very threatened", "Moderately threatened", 
                                  "A little threatened", "Neutral", 
                                  "A little relaxed", "Moderately relaxed", 
                                  "Very relaxed"))
table(ds$og_aff_06)


      Very threatened Moderately threatened   A little threatened 
                   14                    24                    83 
              Neutral      A little relaxed    Moderately relaxed 
                  124                    60                   113 
         Very relaxed 
                   85

Outgroup affect: Factor analysis

Display code

ds$NegativeToPositive <- as.numeric(ds$og_aff_01) 
ds$HostileToFriendly <- as.numeric(ds$og_aff_02) 
ds$SuspiciousToTrusting <- as.numeric(ds$og_aff_03) 
ds$ContemptToRespect <- as.numeric(ds$og_aff_04) 
ds$ConcernedToUnconcerned <- as.numeric(ds$og_aff_05) 
ds$ThreatenedToRelaxed <- as.numeric(ds$og_aff_06) 

og_affect <- cbind.data.frame(ds$NegativeToPositive, ds$HostileToFriendly,
                              ds$SuspiciousToTrusting, ds$ContemptToRespect,
                              ds$ConcernedToUnconcerned, ds$ThreatenedToRelaxed)

names(og_affect) <- sub('ds\\$', '', names(og_affect))

og_affect <- na.omit(og_affect)
mtx1 <- cor(og_affect[, c(1:6)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(og_affect))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(og_affect))
Overall MSA =  0.87
MSA for each item = 
    NegativeToPositive      HostileToFriendly   SuspiciousToTrusting 
                  0.85                   0.82                   0.89 
     ContemptToRespect ConcernedToUnconcerned    ThreatenedToRelaxed 
                  0.90                   0.74                   0.90

Display code

## Bartlett's test of sphericity
cortest.bartlett(og_affect)

$chisq
[1] 1441

$p.value
[1] 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000021

$df
[1] 15

Display code

## Parallel test
parallel <- fa.parallel(og_affect)

Parallel analysis suggests that the number of factors =  2  and the number of components =  1

Based on the scree plot, number of factors = 1.

Display code

fit01 <- factanal(og_affect, 1, rotation="promax")
fit01


Call:
factanal(x = og_affect, factors = 1, rotation = "promax")

Uniquenesses:
    NegativeToPositive      HostileToFriendly   SuspiciousToTrusting 
                  0.32                   0.22                   0.35 
     ContemptToRespect ConcernedToUnconcerned    ThreatenedToRelaxed 
                  0.53                   0.96                   0.47 

Loadings:
                       Factor1
NegativeToPositive      0.83  
HostileToFriendly       0.88  
SuspiciousToTrusting    0.81  
ContemptToRespect       0.69  
ConcernedToUnconcerned -0.20  
ThreatenedToRelaxed     0.73  

               Factor1
SS loadings       3.16
Proportion Var    0.53

Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 47 on 9 degrees of freedom.
The p-value is 0.00000047

Display code

## Reliability analysis

psych::alpha(og_affect)

Some items ( ConcernedToUnconcerned ) were negatively correlated with the total scale and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option


Reliability analysis   
Call: psych::alpha(x = og_affect)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean sd median_r
      0.75      0.77    0.81      0.35 3.3 0.016  4.8  1     0.56

    95% confidence boundaries 
         lower alpha upper
Feldt     0.72  0.75  0.79
Duhachek  0.72  0.75  0.79

 Reliability if an item is dropped:
                       raw_alpha std.alpha G6(smc) average_r S/N alpha se
NegativeToPositive          0.65      0.67    0.74      0.29 2.0   0.0234
HostileToFriendly           0.64      0.65    0.71      0.27 1.9   0.0240
SuspiciousToTrusting        0.66      0.67    0.75      0.29 2.1   0.0233
ContemptToRespect           0.70      0.71    0.77      0.33 2.5   0.0205
ConcernedToUnconcerned      0.89      0.89    0.87      0.62 8.1   0.0079
ThreatenedToRelaxed         0.68      0.69    0.77      0.31 2.3   0.0220
                        var.r med.r
NegativeToPositive     0.1728  0.55
HostileToFriendly      0.1619  0.54
SuspiciousToTrusting   0.1687  0.54
ContemptToRespect      0.1773  0.58
ConcernedToUnconcerned 0.0054  0.59
ThreatenedToRelaxed    0.1851  0.57

 Item statistics 
                         n raw.r std.r r.cor r.drop mean  sd
NegativeToPositive     503 0.832 0.835  0.82   0.72  5.0 1.5
HostileToFriendly      503 0.870 0.877  0.89   0.79  5.2 1.4
SuspiciousToTrusting   503 0.824 0.825  0.80   0.71  4.8 1.5
ContemptToRespect      503 0.719 0.729  0.66   0.57  5.4 1.5
ConcernedToUnconcerned 503 0.042 0.026 -0.22  -0.21  3.7 1.6
ThreatenedToRelaxed    503 0.781 0.778  0.72   0.64  4.7 1.6

Non missing response frequency for each item
                          1    2    3    4    5    6    7 miss
NegativeToPositive     0.02 0.04 0.12 0.24 0.15 0.25 0.19    0
HostileToFriendly      0.01 0.02 0.09 0.19 0.20 0.30 0.20    0
SuspiciousToTrusting   0.03 0.05 0.13 0.20 0.19 0.26 0.13    0
ContemptToRespect      0.02 0.03 0.07 0.18 0.15 0.32 0.24    0
ConcernedToUnconcerned 0.07 0.21 0.19 0.24 0.12 0.12 0.05    0
ThreatenedToRelaxed    0.03 0.05 0.17 0.25 0.12 0.22 0.17    0

Based on the output of factor analysis as well as reliability, it seems like one item (“concerned to unconcerned”) is problematic. Here is the scale without that item:

Outgroup affect (minus problematic item)

Display code

og_affect <- cbind.data.frame(ds$NegativeToPositive, ds$HostileToFriendly,
                              ds$SuspiciousToTrusting, ds$ContemptToRespect,
                              ds$ThreatenedToRelaxed)

names(og_affect) <- sub('ds\\$', '', names(og_affect))

og_affect <- na.omit(og_affect)
mtx1 <- cor(og_affect[, c(1:5)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(og_affect))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(og_affect))
Overall MSA =  0.87
MSA for each item = 
  NegativeToPositive    HostileToFriendly SuspiciousToTrusting 
                0.85                 0.83                 0.89 
   ContemptToRespect  ThreatenedToRelaxed 
                0.92                 0.90

Display code

## Bartlett's test of sphericity
cortest.bartlett(og_affect)

$chisq
[1] 1397

$p.value
[1] 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000055

$df
[1] 10

Display code

## Parallel test
parallel <- fa.parallel(og_affect)

Parallel analysis suggests that the number of factors =  1  and the number of components =  1

Display code

## Reliability analysis:
psych::alpha(og_affect)


Reliability analysis   
Call: psych::alpha(x = og_affect)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.89      0.89    0.87      0.62 8.1 0.0079    5 1.3     0.59

    95% confidence boundaries 
         lower alpha upper
Feldt     0.87  0.89   0.9
Duhachek  0.87  0.89   0.9

 Reliability if an item is dropped:
                     raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r
NegativeToPositive        0.86      0.86    0.83      0.61 6.2   0.0103 0.0036
HostileToFriendly         0.85      0.85    0.81      0.58 5.5   0.0112 0.0028
SuspiciousToTrusting      0.86      0.86    0.83      0.61 6.1   0.0105 0.0073
ContemptToRespect         0.88      0.88    0.86      0.66 7.6   0.0088 0.0049
ThreatenedToRelaxed       0.88      0.88    0.85      0.64 7.1   0.0090 0.0062
                     med.r
NegativeToPositive    0.59
HostileToFriendly     0.57
SuspiciousToTrusting  0.57
ContemptToRespect     0.67
ThreatenedToRelaxed   0.63

 Item statistics 
                       n raw.r std.r r.cor r.drop mean  sd
NegativeToPositive   503  0.85  0.85  0.81   0.75  5.0 1.5
HostileToFriendly    503  0.88  0.89  0.87   0.82  5.2 1.4
SuspiciousToTrusting 503  0.85  0.85  0.80   0.76  4.8 1.5
ContemptToRespect    503  0.77  0.78  0.69   0.65  5.4 1.5
ThreatenedToRelaxed  503  0.81  0.80  0.72   0.68  4.7 1.6

Non missing response frequency for each item
                        1    2    3    4    5    6    7 miss
NegativeToPositive   0.02 0.04 0.12 0.24 0.15 0.25 0.19    0
HostileToFriendly    0.01 0.02 0.09 0.19 0.20 0.30 0.20    0
SuspiciousToTrusting 0.03 0.05 0.13 0.20 0.19 0.26 0.13    0
ContemptToRespect    0.02 0.03 0.07 0.18 0.15 0.32 0.24    0
ThreatenedToRelaxed  0.03 0.05 0.17 0.25 0.12 0.22 0.17    0

Based on the above output for factor analysis and reliability analysis, the “outgroup affect” scale is highly reliable if we drop the problematic item (“concerned to unconcerned”). Here is the visualization for the scale after dropping the problematic item:

Display code

ds$og_affect <- (ds$NegativeToPositive+ds$HostileToFriendly+
                 ds$SuspiciousToTrusting+ds$ContemptToRespect+
                 ds$ThreatenedToRelaxed)/5

summary(ds$og_affect)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       4       5       5       6       7       1

Display code

ds %>% drop_na(og_affect)%>%
ggplot(aes(x = og_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Outgroup affect score", 
       y = "Frequency", 
       title = "Outgroup affect: Pakistan")+
  theme_bw()

Section 14. Updated regression models

This set of regression models have additional predictors: frequency of positive and negative contact, and interaction between perspective taking and perceived history of discrimination

Display code

ds$freq_positive_contact <- ds$pc02
ds$freq_negative_contact <- ds$nc02
ds$perspectiveXdiscrimination <- ds$perspective_taking*ds$history_discrimination

## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	Endorse_BCL	Experience_BCL	Endorse_BBL	Experience_BBL
	(1)	(2)	(3)	(4)

IG_Fusion	0.210^**	0.230^**	0.089	0.014
	(0.071)	(0.078)	(0.073)	(0.080)

IG_Identification	0.088	0.100	-0.190^*	0.003
	(0.075)	(0.083)	(0.077)	(0.084)

OG_Bonds	0.008	0.015	0.037	-0.051
	(0.042)	(0.046)	(0.043)	(0.047)

freq_positive_contact	0.063	0.110^*	-0.130^**	-0.086
	(0.040)	(0.044)	(0.041)	(0.045)

freq_negative_contact	0.001	0.037	0.081^*	0.019
	(0.038)	(0.042)	(0.039)	(0.043)

empathic_concern	0.041	0.069	-0.210^**	-0.200^**
	(0.063)	(0.070)	(0.065)	(0.071)

perspective_taking	-0.200	-0.180	0.190	0.052
	(0.130)	(0.140)	(0.130)	(0.150)

history_discrimination	-0.470^**	-0.210	0.002	0.019
	(0.160)	(0.170)	(0.160)	(0.180)

perspectiveXdiscrimination	0.075^**	0.053	0.008	0.001
	(0.028)	(0.032)	(0.029)	(0.032)

Age	0.010	0.001	-0.008	0.001
	(0.006)	(0.006)	(0.006)	(0.006)

Female	-0.028	-0.130	-0.220	-0.002
	(0.110)	(0.120)	(0.110)	(0.120)

Married	0.085	0.160	-0.020	-0.090
	(0.130)	(0.150)	(0.140)	(0.150)

`SES-`Middle	0.570^**	0.480^*	0.200	-0.016
	(0.210)	(0.240)	(0.220)	(0.240)

`SES-`Upper middle	0.530^*	0.074	0.290	0.460
	(0.250)	(0.270)	(0.250)	(0.280)

`SES-`Upper	1.000^*	1.200^**	0.490	-0.003
	(0.400)	(0.440)	(0.410)	(0.450)

Constant	3.700^***	2.300^*	4.300^***	4.700^***
	(0.810)	(0.900)	(0.840)	(0.920)


Observations	496	496	496	495
R²	0.140	0.150	0.140	0.060
Adjusted R²	0.120	0.130	0.120	0.031
Residual Std. Error	1.200 (df = 480)	1.300 (df = 480)	1.200 (df = 480)	1.400 (df = 479)
F Statistic	5.300^*** (df = 15; 480)	5.700^*** (df = 15; 480)	5.400^*** (df = 15; 480)	2.000^* (df = 15; 479)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 15. Updated regression models (alternate DVs)

Same predictors as section 14 above, but with different outcomes. This set of regression models has an additional outcome variable: affect towards outgroup.

Display code

## Four regression models:

lm01 <- lm(og_cooperation~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(og_hostility~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(fight_outgroup~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(og_affect~IG_Fusion+IG_Identification+OG_Bonds+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	og_cooperation	og_hostility	fight_outgroup	og_affect
	(1)	(2)	(3)	(4)

IG_Fusion	0.018	0.130	0.010	-0.060
	(0.074)	(0.076)	(0.083)	(0.056)

IG_Identification	0.280^***	-0.087	-0.180^*	0.160^**
	(0.078)	(0.081)	(0.089)	(0.060)

OG_Bonds	0.210^***	-0.180^***	0.110^*	0.190^***
	(0.043)	(0.045)	(0.049)	(0.033)

freq_positive_contact	0.250^***	-0.033	-0.110^*	0.320^***
	(0.042)	(0.043)	(0.047)	(0.032)

freq_negative_contact	0.022	0.160^***	0.150^**	-0.130^***
	(0.039)	(0.041)	(0.045)	(0.030)

empathic_concern	0.150^*	-0.017	-0.420^***	-0.009
	(0.065)	(0.068)	(0.074)	(0.050)

perspective_taking	-0.390^**	0.230	0.260	-0.240^*
	(0.130)	(0.140)	(0.150)	(0.100)

history_discrimination	-0.410^*	0.870^***	0.400^*	-0.430^***
	(0.160)	(0.170)	(0.190)	(0.120)

perspectiveXdiscrimination	0.050	-0.071^*	-0.059	0.052^*
	(0.030)	(0.031)	(0.034)	(0.023)

Age	0.001	0.004	0.007	-0.003
	(0.006)	(0.006)	(0.007)	(0.004)

Female	-0.340^**	0.087	0.063	-0.038
	(0.120)	(0.120)	(0.130)	(0.088)

Married	-0.086	-0.110	-0.190	0.013
	(0.140)	(0.140)	(0.160)	(0.100)

`SES-`Middle	0.170	-0.072	-0.082	-0.015
	(0.220)	(0.230)	(0.250)	(0.170)

`SES-`Upper middle	0.500	-0.190	-0.090	-0.060
	(0.260)	(0.270)	(0.290)	(0.200)

`SES-`Upper	0.700	0.020	0.180	0.002
	(0.410)	(0.430)	(0.470)	(0.310)

Constant	3.300^***	0.470	3.700^***	4.500^***
	(0.850)	(0.880)	(0.960)	(0.650)


Observations	496	496	496	495
R²	0.280	0.380	0.200	0.440
Adjusted R²	0.260	0.360	0.180	0.420
Residual Std. Error	1.200 (df = 480)	1.300 (df = 480)	1.400 (df = 480)	0.950 (df = 479)
F Statistic	13.000^*** (df = 15; 480)	20.000^*** (df = 15; 480)	8.000^*** (df = 15; 480)	25.000^*** (df = 15; 479)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 16. Imagistic Experience: Factor analysis

Display code

ds$event_negative_affect <- as.numeric(ds$Q9_1)
table(ds$event_negative_affect)


  1   2   3   4   5   6   7 
 61  47  13  26  50 117 190

Display code

ds$event_positive_affect_01 <- (8 - ds$event_negative_affect)
table(ds$event_positive_affect_01)


  1   2   3   4   5   6   7 
190 117  50  26  13  47  61

Display code

ds$event_positive_affect_02 <- as.numeric(ds$Q9_2)
table(ds$event_positive_affect_02)


  1   2   3   4   5   6   7 
242  94  26  25  16  52  48

Display code

ds$event_positive_affect <- ds$event_positive_affect_02

ds$event_episodic_recall_01 <- as.numeric(ds$Q9_3)
table(ds$event_episodic_recall_01)


  1   2   3   4   5   6   7 
  7  17  10  38  83 202 146

Display code

ds$event_episodic_recall_02 <- as.numeric(ds$Q9_4)
table(ds$event_episodic_recall_02)


  1   2   3   4   5   6   7 
 11  19  11  56  71 187 149

Display code

ds$event_shared_perception_01 <- as.numeric(ds$Q9_5)
table(ds$event_shared_perception_01)


  1   2   3   4   5   6   7 
 14  21  14  42  50 197 166

Display code

ds$event_shared_perception_02 <- as.numeric(ds$Q9_6)
table(ds$event_shared_perception_02)


  1   2   3   4   5   6   7 
 15  21  19  53  81 177 138

Display code

ds$event_event_reflection_01 <- as.numeric(ds$Q9_9)
# table(ds$event_event_reflection_01)
ds$event_event_reflection_01 <- ifelse(ds$event_event_reflection_01 > 7, NA,
                                     ds$event_event_reflection_01)
table(ds$event_event_reflection_01)


  1   2   3   4   5   6   7 
 18  41  29  76 135 141  63

Display code

ds$event_event_reflection_02 <- as.numeric(ds$Q9_10)
#table(ds$event_event_reflection_02)
ds$event_event_reflection_02 <- ifelse(ds$event_event_reflection_02 > 7, NA,
                                     ds$event_event_reflection_02)
table(ds$event_event_reflection_02)


  1   2   3   4   5   6   7 
 18  37  37 115 119 126  51

Display code

ds$event_transformative_indiv_01 <- as.numeric(ds$Q9_7)
table(ds$event_transformative_indiv_01)


  1   2   3   4   5   6   7 
 12  24  11  63  84 181 129

Display code

ds$event_transformative_indiv_02 <- as.numeric(ds$Q9_8)
table(ds$event_transformative_indiv_02)


  1   2   3   4   5   6   7 
 40  60  27 110  87 111  69

Display code

ds$event_transformative_group_01 <- as.numeric(ds$Q9_11)
table(ds$event_transformative_group_01)


  1   2   3   4   5   6   7 
 23  24  18  60  89 184 106

Display code

ds$event_transformative_group_02 <- as.numeric(ds$Q9_12)
table(ds$event_transformative_group_02)


  1   2   3   4   5   6   7 
 50  60  26  86  98 117  67

Display code

imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)

imagistic <- na.omit(imagistic)

names(imagistic) <- sub('ds\\$event\\_', '', names(imagistic))

mtx <- cor(imagistic[, c(1:12)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("darkred", "red", "red",
               "darkgrey", "blue", "darkblue"))

Imagistic experience: Factor analysis

Display code

## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(imagistic))

Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(imagistic))
Overall MSA =  0.78
MSA for each item = 
        negative_affect         positive_affect      episodic_recall_01 
                   0.54                    0.50                    0.82 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.86                    0.89                    0.82 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.77                    0.75                    0.81 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.81                    0.85                    0.82

Display code

## Bartlett's test of sphericity
cortest.bartlett(imagistic)

$chisq
[1] 2825

$p.value
[1] 0

$df
[1] 66

Display code

## Parallel test
parallel <- fa.parallel(imagistic)

Parallel analysis suggests that the number of factors =  5  and the number of components =  2

Scree plot suggests there needs to be two factors, but theoretically we want six factors. Here is how the two factors break down:

Display code

fit01 <- factanal(imagistic, 2, rotation="promax")
fit01


Call:
factanal(x = imagistic, factors = 2, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                   0.21                    0.15                    0.71 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.68                    0.43                    0.48 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.61                    0.61                    0.70 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.56                    0.45                    0.58 

Loadings:
                        Factor1 Factor2
negative_affect          0.106  -0.876 
positive_affect                  0.925 
episodic_recall_01       0.540         
episodic_recall_02       0.565         
shared_perception_01     0.733  -0.143 
shared_perception_02     0.715         
event_reflection_01      0.626         
event_reflection_02      0.623         
transformative_indiv_01  0.540         
transformative_indiv_02  0.653   0.161 
transformative_group_01  0.741         
transformative_group_02  0.650         

               Factor1 Factor2
SS loadings       4.14    1.70
Proportion Var    0.34    0.14
Cumulative Var    0.34    0.49

Factor Correlations:
        Factor1 Factor2
Factor1  1.0000  0.0585
Factor2  0.0585  1.0000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 569 on 43 degrees of freedom.
The p-value is 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000064

The factor structure above makes sense but is a little incoherent. There’s a seperate factor for positive affect, and the remaining items all go together into a single factor. Here is what happens if we “force” six factors:

Display code

fit02 <- factanal(imagistic, 6, rotation="promax")
fit02


Call:
factanal(x = imagistic, factors = 6, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                  0.305                   0.005                   0.420 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                  0.408                   0.374                   0.194 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                  0.455                   0.005                   0.005 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                  0.492                   0.405                   0.005 

Loadings:
                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
negative_affect                         -0.810                         
positive_affect                          1.013                         
episodic_recall_01                              -0.105           0.741 
episodic_recall_02                                               0.745 
shared_perception_01     0.731                                         
shared_perception_02     1.052                  -0.111                 
event_reflection_01      0.182   0.589                   0.177         
event_reflection_02     -0.115   1.148                  -0.125         
transformative_indiv_01         -0.118                   1.093         
transformative_indiv_02          0.291   0.107   0.393           0.116 
transformative_group_01  0.577                   0.249                 
transformative_group_02                          1.109                 

               Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
SS loadings       2.04    1.78    1.71    1.49    1.26   1.151
Proportion Var    0.17    0.15    0.14    0.12    0.10   0.096
Cumulative Var    0.17    0.32    0.46    0.58    0.69   0.786

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
Factor1   1.000 -0.0500 -0.5348  0.5911   0.615  0.4698
Factor2  -0.050  1.0000 -0.0499  0.0651  -0.173 -0.0976
Factor3  -0.535 -0.0499  1.0000 -0.5012  -0.525 -0.5540
Factor4   0.591  0.0651 -0.5012  1.0000   0.615  0.4353
Factor5   0.615 -0.1725 -0.5252  0.6150   1.000  0.6051
Factor6   0.470 -0.0976 -0.5540  0.4353   0.605  1.0000

Test of the hypothesis that 6 factors are sufficient.
The chi square statistic is 26 on 9 degrees of freedom.
The p-value is 0.0022

The factor breakdown above is much more aligned with theory. There are six factors that represent the six different measures that we want. However, the item transformative_indiv_02 is quite weird.

Imagistic experience: Reliability

Display code

## Reliability:
alph01 <- psych::alpha(imagistic)

Some items ( positive_affect ) were negatively correlated with the total scale and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option

Display code

alph01


Reliability analysis   
Call: psych::alpha(x = imagistic)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.79      0.82    0.89      0.28 4.6 0.014    5 0.92     0.35

    95% confidence boundaries 
         lower alpha upper
Feldt     0.76  0.79  0.81
Duhachek  0.76  0.79  0.81

 Reliability if an item is dropped:
                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
negative_affect              0.83      0.84    0.88      0.33 5.4    0.011
positive_affect              0.84      0.86    0.88      0.35 6.0    0.010
episodic_recall_01           0.77      0.80    0.87      0.27 4.1    0.015
episodic_recall_02           0.76      0.80    0.87      0.27 4.1    0.015
shared_perception_01         0.75      0.79    0.87      0.26 3.8    0.016
shared_perception_02         0.75      0.80    0.87      0.26 3.9    0.016
event_reflection_01          0.76      0.80    0.87      0.27 4.0    0.016
event_reflection_02          0.76      0.80    0.86      0.27 4.0    0.016
transformative_indiv_01      0.77      0.81    0.88      0.27 4.2    0.015
transformative_indiv_02      0.75      0.80    0.87      0.26 3.9    0.016
transformative_group_01      0.75      0.79    0.87      0.26 3.8    0.016
transformative_group_02      0.75      0.80    0.87      0.27 4.0    0.016
                        var.r med.r
negative_affect         0.038  0.38
positive_affect         0.025  0.38
episodic_recall_01      0.066  0.35
episodic_recall_02      0.065  0.34
shared_perception_01    0.059  0.32
shared_perception_02    0.061  0.33
event_reflection_01     0.063  0.34
event_reflection_02     0.062  0.35
transformative_indiv_01 0.066  0.34
transformative_indiv_02 0.063  0.33
transformative_group_01 0.060  0.32
transformative_group_02 0.063  0.34

 Item statistics 
                          n raw.r   std.r  r.cor r.drop mean  sd
negative_affect         500 0.172  0.1702  0.134 -0.024  5.1 2.2
positive_affect         500 0.011 -0.0033 -0.051 -0.181  2.7 2.2
episodic_recall_01      500 0.604  0.6281  0.579  0.520  5.7 1.3
episodic_recall_02      500 0.636  0.6516  0.605  0.548  5.6 1.4
shared_perception_01    500 0.730  0.7374  0.717  0.656  5.7 1.5
shared_perception_02    500 0.708  0.7163  0.697  0.628  5.5 1.5
event_reflection_01     500 0.682  0.6839  0.658  0.594  4.9 1.6
event_reflection_02     500 0.676  0.6765  0.653  0.590  4.7 1.5
transformative_indiv_01 500 0.608  0.6175  0.560  0.512  5.5 1.5
transformative_indiv_02 500 0.717  0.6980  0.668  0.621  4.5 1.8
transformative_group_01 500 0.739  0.7383  0.718  0.662  5.3 1.6
transformative_group_02 500 0.690  0.6689  0.634  0.584  4.5 1.9

Non missing response frequency for each item
                           1    2    3    4    5    6    7 miss
negative_affect         0.12 0.09 0.03 0.05 0.10 0.23 0.38    0
positive_affect         0.48 0.19 0.05 0.05 0.03 0.10 0.10    0
episodic_recall_01      0.01 0.03 0.02 0.07 0.17 0.40 0.29    0
episodic_recall_02      0.02 0.04 0.02 0.11 0.14 0.37 0.30    0
shared_perception_01    0.03 0.04 0.03 0.08 0.10 0.39 0.33    0
shared_perception_02    0.03 0.04 0.04 0.10 0.16 0.35 0.27    0
event_reflection_01     0.04 0.08 0.06 0.15 0.27 0.28 0.13    0
event_reflection_02     0.04 0.07 0.07 0.23 0.24 0.25 0.10    0
transformative_indiv_01 0.02 0.05 0.02 0.12 0.17 0.36 0.26    0
transformative_indiv_02 0.08 0.12 0.05 0.22 0.17 0.22 0.14    0
transformative_group_01 0.04 0.05 0.04 0.12 0.18 0.37 0.21    0
transformative_group_02 0.10 0.12 0.05 0.17 0.20 0.23 0.13    0

Imagistic experience: Visualization

The “imagistic experience” scale (which comprises of the six different subscales) is quite reliable. The Cronbach’s alpha value of 0.79 is close to 0.80.

If we conceptualize “imagistic experience” as a single measure that is the sum of the seven scales, here is the visualization of the measure:

Display code

ds$imagistic <- (((ds$event_positive_affect)+
                 (ds$event_negative_affect)+ 
                ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)+
                ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)+
                ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)+
                ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)+
                ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)))

summary(ds$imagistic)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      7      30      34      34      38      49       4

Display code

ds %>% drop_na(imagistic)%>%
ggplot(aes(x = imagistic))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 34.00", 
                 xintercept = 34.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Imagistic experience", 
       y = "Frequency", 
       title = "Imagistic experience: Pakistan")+
  theme_bw()

Section 17. Imagistic experience: sub-scales

Below are the individual Cronbach’s alpha for each of the seven subscales, followed by their individual visualizations, followed by the correlation between the six sub-scales

Negative affect about event

Display code

## Positive affect:
negative_affect <- cbind.data.frame(ds$event_negative_affect)
negative_affect <- na.omit(negative_affect)

## Visualization:
ds$event_negative_affect <- (ds$event_negative_affect)
summary(ds$event_negative_affect)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     4.0     6.0     5.1     7.0     7.0

Display code

ds %>% drop_na(event_negative_affect)%>%
ggplot(aes(x = event_negative_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.10", 
                 xintercept = 5.10, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Negative affect about event", 
       y = "Frequency", 
       title = "Negative affect about event: Pakistan")+
  theme_bw()

Positive affect about event

Display code

## Positive affect:
positive_affect <- cbind.data.frame(ds$event_positive_affect)
positive_affect <- na.omit(positive_affect)

## Visualization:
ds$event_positive_affect <- (ds$event_positive_affect)
summary(ds$event_positive_affect)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     1.0     2.0     2.7     4.0     7.0       1

Display code

ds %>% drop_na(event_positive_affect)%>%
ggplot(aes(x = event_positive_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 2.70", 
                 xintercept = 2.70, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Positive affect about event", 
       y = "Frequency", 
       title = "Positive affect about event: Pakistan")+
  theme_bw()

Episodic recall of event

Display code

## Episodic recall:
episodic_recall <- cbind.data.frame(ds$event_episodic_recall_01, ds$event_episodic_recall_02)
episodic_recall <- na.omit(episodic_recall)

names(episodic_recall) <- sub('ds\\$event\\_', '', names(episodic_recall))

mtx <- cor(episodic_recall[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(episodic_recall)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.72      0.73    0.57      0.57 2.6 0.024  5.7 1.2     0.57

Display code

## Visualization:
ds$event_episodic_recall <- ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)
summary(ds$event_episodic_recall)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     5.0     6.0     5.7     6.5     7.0       1

Display code

ds %>% drop_na(event_episodic_recall)%>%
ggplot(aes(x = event_episodic_recall))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.70", 
                 xintercept = 5.70, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Episodic recall of event", 
       y = "Frequency", 
       title = "Episodic recall of event: Pakistan")+
  theme_bw()

Shared perception of event

Display code

## Shared perception:
shared_perception <- cbind.data.frame(ds$event_shared_perception_01, ds$event_shared_perception_02)
shared_perception <- na.omit(shared_perception)

names(shared_perception) <- sub('ds\\$event\\_', '', names(shared_perception))

mtx <- cor(shared_perception[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(shared_perception)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.82      0.82    0.69      0.69 4.4 0.016  5.6 1.4     0.69

Display code

## Visualization:
ds$event_shared_perception <- ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)
summary(ds$event_shared_perception)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     5.0     6.0     5.6     6.5     7.0

Display code

ds %>% drop_na(event_shared_perception)%>%
ggplot(aes(x = event_shared_perception))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.60", 
                 xintercept = 5.60, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Shared perception of event", 
       y = "Frequency", 
       title = "Shared perception of event: Pakistan")+
  theme_bw()

Reflection of event

Display code

## Reflection:
event_reflection <- cbind.data.frame(ds$event_event_reflection_01, ds$event_event_reflection_02)
event_reflection <- na.omit(event_reflection)

names(event_reflection) <- sub('ds\\$event\\_', '', names(event_reflection))

mtx <- cor(event_reflection[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(event_reflection)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.81      0.81    0.67      0.67 4.1 0.017  4.8 1.4     0.67

Display code

## Visualization:
ds$event_event_reflection <- ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)
summary(ds$event_event_reflection)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     4.0     5.0     4.8     6.0     7.0       2

Display code

ds %>% drop_na(event_event_reflection)%>%
ggplot(aes(x = event_event_reflection))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 4.80", 
                 xintercept = 4.80, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Reflection of event", 
       y = "Frequency", 
       title = "Reflection of event: Pakistan")+
  theme_bw()

Transformative event for individual

Display code

## Reflection:
event_transformative_indiv <- cbind.data.frame(ds$event_transformative_indiv_01, ds$event_transformative_indiv_02)
event_transformative_indiv <- na.omit(event_transformative_indiv)

names(event_transformative_indiv) <- sub('ds\\$event\\_', '', names(event_transformative_indiv))

mtx <- cor(event_transformative_indiv[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(event_transformative_indiv)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.56      0.57     0.4       0.4 1.3 0.038    5 1.4      0.4

Display code

## Visualization:
ds$event_transformative_indiv <- ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)
summary(ds$event_transformative_indiv)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      1       4       5       5       6       7

Display code

ds %>% drop_na(event_transformative_indiv)%>%
ggplot(aes(x = event_transformative_indiv))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.00", 
                 xintercept = 5.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for individual: Pakistan")+
  theme_bw()

Transformative event for group

Display code

event_transformative_group <- cbind.data.frame(ds$event_transformative_group_01, ds$event_transformative_group_02)
event_transformative_group <- na.omit(event_transformative_group)

names(event_transformative_group) <- sub('ds\\$event\\_', '', names(event_transformative_group))

mtx <- cor(event_transformative_group[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code

## Reliability:
alph01 <- psych::alpha(event_transformative_group)
summary(alph01)


Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.72      0.72    0.56      0.56 2.6 0.025  4.9 1.5     0.56

Display code

## Visualization:
ds$event_transformative_group <- ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)
summary(ds$event_transformative_group)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0     4.0     5.0     4.9     6.0     7.0

Display code

ds %>% drop_na(event_transformative_group)%>%
ggplot(aes(x = event_transformative_group))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 4.90", 
                 xintercept = 4.90, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for group: Pakistan")+
  theme_bw()

Correlation between six subscales

Display code

imagistic_subscales <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                                        ds$event_episodic_recall, 
                                        ds$event_shared_perception, ds$event_event_reflection, 
                                        ds$event_transformative_indiv, ds$event_transformative_group)


imagistic_subscales <- na.omit(imagistic_subscales)

names(imagistic_subscales) <- sub('ds\\$event\\_', '', names(imagistic_subscales))

mtx <- cor(imagistic_subscales[, c(1:6)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("darkred", "red", "red",
               "darkgrey", "blue", "darkblue"))

Section 18. Regression models: Relationship between imagistic experience and IG/OG fusion/identification

Display code

## Four regression models predicting IG/OG Fusion/Identification:

lm01 <- lm(IG_Fusion~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)

lm02 <- lm(IG_Identification~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)

lm03 <- lm(OG_Bonds~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)

Display code

## Tabulated results:

stargazer(lm01, lm02,
          lm03, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")

Display code

htmltools::includeHTML("table1.html")


	Dependent variable:

	IG_Fusion	IG_Identification	OG_Bonds
	(1)	(2)	(3)

event_positive_affect	0.045	0.003	0.075
	(0.038)	(0.035)	(0.049)

event_negative_affect	0.032	0.031	0.070
	(0.038)	(0.036)	(0.050)

event_episodic_recall	0.130^**	0.160^***	-0.150^*
	(0.047)	(0.045)	(0.061)

event_shared_perception	0.053	0.073	-0.059
	(0.046)	(0.043)	(0.060)

event_event_reflection	0.069	-0.017	-0.014
	(0.041)	(0.039)	(0.054)

event_transformative_indiv	0.048	0.120^**	0.099
	(0.047)	(0.044)	(0.062)

event_transformative_group	0.130^**	0.065	0.160^**
	(0.042)	(0.040)	(0.056)

Age	0.013^**	0.015^***	-0.009
	(0.005)	(0.004)	(0.006)

Female	0.120	0.082	-0.150
	(0.093)	(0.088)	(0.120)

Married	0.019	0.120	0.140
	(0.110)	(0.100)	(0.150)

`SES-`Middle	-0.720^***	-0.500^**	0.760^***
	(0.170)	(0.160)	(0.220)

`SES-`Upper middle	-0.810^***	-0.660^***	0.700^**
	(0.190)	(0.180)	(0.250)

`SES-`Upper	-1.100^***	-1.100^***	0.750
	(0.320)	(0.300)	(0.420)

Constant	3.600^***	3.600^***	2.600^***
	(0.390)	(0.370)	(0.520)


Observations	499	499	498
R²	0.230	0.240	0.078
Adjusted R²	0.210	0.220	0.054
Residual Std. Error	1.000 (df = 485)	0.960 (df = 485)	1.300 (df = 484)
F Statistic	11.000^*** (df = 13; 485)	12.000^*** (df = 13; 485)	3.200^*** (df = 13; 484)

Note:	^p<0.05; ^p<0.01; ^**p<0.001

Section 1. Demographics

Variable: Age

Variable: Gender

Variable: Province

Variable: Household wealth

Variable: Socioeconomic status

Variable: Annual income

Correlation between income, wealth, and socioeconomic status

Variable: Nature of employment

Variable: Religious affiliation

Variable: Marital status

Variable: Number of children

Variable: Ethnicity

Variable: Education

Section 2. Factor Analysis: Group fusion/identification

Kaiser-Meyer-Olkin (KMO) test of factorability

Bartlett’s test of sphericity

Parallel test

Factor analysis: Two factor model with promax rotation

Four factor solution:

Three factor solution:

Reliability analysis Scale 1: Ingroup fusion

Reliability analysis Scale 2: Ingroup identification

Reliability analysis Scale 3: Outgroup fusion/identification

Section 3. Factor Analysis: BCL/BBL endorsement/experience

KMO and Bartlett’s test:

Parallel test

Factor analysis: Four factor model with promax rotation

Reliability analysis Scale 1: Endorsement of BCL

Reliability analysis Scale 2: Endorsement of BBL

Reliability analysis Scale 3: Experience of BCL

Reliability analysis Scale 4: Experience of BBL

Section 4. Preliminary regression analysis

Research question: Do group fusion / identification predict BCL vs BBL?

Variables in the analysis:

Demographics:

Regression models

Section 6. Ingroup/outgroup empathy/hostility items

Empathic concern

Perspective taking

Outgroup cooperation

Perceived history of discrimination

Outgroup hostility

Willingess to fight outgroup

Section 7. Correlation between ingroup/outgroup empathy/hostility items

Section 8. Regression with new variables

Section 9. Alternative regression models with different outcomes

Section 10. Religious freedom measures

Social perception of religious freedom

Experience of religious freedom

Experience of religious freedom based on religious affiliation

Section 11. Positive/Negative contact with outgroup

Positive contact with outgroup

Negative contact with outgroup

Section 12. Intergroup marriage

Section 13. Outgroup affect

Outgroup affect: Scale construction

Outgroup affect: Factor analysis

Outgroup affect (minus problematic item)

Section 14. Updated regression models

Section 15. Updated regression models (alternate DVs)

Section 16. Imagistic Experience: Factor analysis

Imagistic experience: Factor analysis

Imagistic experience: Reliability

Imagistic experience: Visualization

Section 17. Imagistic experience: sub-scales

Negative affect about event

Positive affect about event

Episodic recall of event

Shared perception of event

Reflection of event

Transformative event for individual

Transformative event for group

Correlation between six subscales

Section 18. Regression models: Relationship between imagistic experience and IG/OG fusion/identification

Reliability analysis
Scale 1: Ingroup fusion

Reliability analysis
Scale 2: Ingroup identification

Reliability analysis
Scale 3: Outgroup fusion/identification

Reliability analysis
Scale 1: Endorsement of BCL

Reliability analysis
Scale 2: Endorsement of BBL

Reliability analysis
Scale 3: Experience of BCL

Reliability analysis
Scale 4: Experience of BBL

Research question:
Do group fusion / identification predict BCL vs BBL?