Data Analysis and Visualization
Uganda Community Data

Author

Gagan Atreya

Published

July 31, 2023

Section 1. Demographics

Display code
rm(list=ls())

## Install "pacman" package if not installed
# (remove the # symbol from the line below):
# install.packages("pacman")

## Load R packages:
pacman::p_load(data.table, tidyverse, haven, vtable, 
               psych, scales, weights, clipr, forcats,
               stargazer, ggthemes, ggcharts, geomtextpath,
               corrplot, tm, readxl, textdata, pdftools, 
               stringr, lubridate, tidytext, kableExtra, 
               patchwork, dotwhisker, vtable, 
               corpus, wordcloud, wordcloud2, RColorBrewer)

## Import dataset:
# ds <- fread("~/Desktop/oxford/data/uganda/Uganda Data Community.csv")
ds <- read_excel("~/Desktop/oxford/data/uganda/Uganda Data Entry.xlsx")

options(digits = 2)

Variable: Age

Display code
ds$Age1 <- as.numeric(gsub("\\D", "", ds$Age))

ds$age <- as.numeric(ifelse(ds$Age1 == "6465", 64.5,
                     ifelse(ds$Age1 == "806", 80.5, 
                     ifelse(ds$Age1 < 18, NA, ds$Age1))))
summary(ds$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     18      26      36      40      50      92       2 
Display code
ds %>% drop_na(age)%>%
ggplot(aes(x = age))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 39.68", 
                 xintercept = 39.68, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Age", 
       y = "Frequency", 
       title = "Age distribution")+
  theme_bw()

Variable: Gender

Display code
ds$gender <- ifelse(ds$Gender == "female", "Female",
             ifelse(ds$Gender == "Female", "Female",
             ifelse(ds$Gender == "male", "Male",
             ifelse(ds$Gender == "Male", "Male", NA))))


lp02 <- ds %>% drop_na(gender) %>%
lollipop_chart(x = gender,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Gender distribution")+
  theme_bw()

lp02

Display code
bp01 <- ds %>% drop_na(gender) %>% 
  ggplot(aes(y = age, 
             x = gender))+
geom_boxplot(fill = "grey")+
  labs(y = "Age",
       x = "",
       title = "Age distribution by gender")+
  coord_flip()+
  theme_bw()

bp01

Variable: Province

Display code
ds$province <- ifelse(ds$Province == "Average", NA,
                      ds$Province)

lp03 <- ds %>% drop_na(province) %>%
lollipop_chart(x = province,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Province distribution")+
  theme_bw()

lp03

Variable: Household wealth

Display code
ds$hhw <- ds$Household_level_of_wealth

ds$hh_wealth <- ifelse(ds$hhw == "Average", "Average", 
                ifelse(ds$hhw == "much higher", "Much higher",
                ifelse(ds$hhw == "Much Higher", "Much higher",
                ifelse(ds$hhw == "much lower", "Much lower",
                ifelse(ds$hhw == "Much lower", "Much lower",
                ifelse(ds$hhw == "Much Lower", "Much lower",
                ifelse(ds$hhw == "slightly higher", "Slightly higher",
                ifelse(ds$hhw == "Slightly higher", "Slightly higher",
                ifelse(ds$hhw == "Slightly Higher", "Slightly higher",
                ifelse(ds$hhw == "Slightly lower", "Slightly lower",
                ifelse(ds$hhw == "Slightly Lower", "Slightly lower", NA)))))))))))

ds$hh_wealth <- factor(ds$hh_wealth, 
                       levels=c("Much lower", "Slightly lower", "Average",
                                "Slightly higher", "Much higher"))

lp04 <- ds %>% drop_na(hh_wealth) %>%
lollipop_chart(x = hh_wealth,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Household wealth distribution")+
  theme_bw()

lp04

Variable: Socioeconomic status

Display code
ds$ses1 <- ds$Socioeconomic_status

ds$ses <- ifelse(ds$ses1 == "lower middle", "Lower middle",
          ifelse(ds$ses1 == "lower Middle", "Lower middle",
          ifelse(ds$ses1 == "Lower middle", "Lower middle",
          ifelse(ds$ses1 == "middle", "Middle",
          ifelse(ds$ses1 == "Middle", "Middle",
          ifelse(ds$ses1 == "upper middle", "Upper middle",
          ifelse(ds$ses1 == "Upper middle", "Upper middle",
          ifelse(ds$ses1 == "Upper Middle", "Upper middle", NA))))))))

ds$ses <- factor(ds$ses, 
                 levels=c("Lower middle", "Middle", "Upper middle"))

lp05 <- ds %>% drop_na(ses) %>%
lollipop_chart(x = ses,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Socioeconomic status")+
  theme_bw()

lp05

Variable: Annual income

Display code
ds$income1 <- as.numeric(gsub("\\D", "", ds$Annual_income))
ds$income <- ifelse(ds$income1 == "400000500000", ((400000+500000)/2), ds$income1)

ds$income <- ifelse(ds$income < 10000000, ds$income, NA)

summary(ds$income)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  15000  300000  800000 1396262 1800000 9600000      65 
Display code
ds %>% drop_na(income)%>%
ggplot(aes(x = income))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 80)+
  geom_textvline(label = "Mean = 1.4 M", 
                 xintercept = 1663154, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  scale_x_continuous(labels = scales::unit_format(unit = "M", 
                                                  scale = 1e-6))+
  labs(x = "Annual income", 
       y = "Frequency", 
       title = "Annual income distribution")+
  theme_bw()

Correlation between income, wealth, and socioeconomic status

Display code
ds$fses <- as.numeric(ds$ses)
ds$fhhw <- as.numeric(ds$hh_wealth)

ds3 <- cbind.data.frame(ds$fses, ds$fhhw, ds$income)
ds3 <- na.omit(ds3)

ds3$Annual_income <- ds3$`ds$income`
ds3$Household_wealth <- ds3$`ds$fhhw`
ds3$SES <- ds3$`ds$fses`

mtx <- cor(ds3[, c(4:6)])
corrplot(mtx, method = "number")

Variable: Nature of employment

Display code
ds$jobnature <- ifelse(ds$Job_Nature == "government", "Government",
                ifelse(ds$Job_Nature == "Government", "Government",
                ifelse(ds$Job_Nature == "non-government", "Non-government",
                ifelse(ds$Job_Nature == "Non-government", "Non-government",
                ifelse(ds$Job_Nature == "Non-Government", "Non-government",
                ifelse(ds$Job_Nature == "Other", "Other",
                ifelse(ds$Job_Nature == "Others- Both government and NGO", "Other",
                ifelse(ds$Job_Nature == "retired", "Retired",
                ifelse(ds$Job_Nature == "Self-employed", "Self-employed",
                ifelse(ds$Job_Nature == "Self-Employed", "Self-employed", NA))))))))))

lp06 <- ds %>% drop_na(jobnature) %>%
lollipop_chart(x = jobnature,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Nature of employment")+
  theme_bw()

lp06

Variable: Religious affiliation

Display code
ds$rel <- ds$Religious_Affiliation

ds$rel01 <- ifelse(str_detect(ds$rel, "testant") == T, "Protestant", "")

ds$rel02 <- ifelse(str_detect(ds$rel, "atholic") == T, "Catholic", "")

ds$rel03 <- ifelse(str_detect(ds$rel, "unni") == T, "Sunni", "")

ds$rel04 <- ifelse(str_detect(ds$rel, "hia") == T, "Shia", "")

ds$religion <- ifelse(ds$rel01 == "Protestant", "Christian: Protestant",
               ifelse(ds$rel02 == "Catholic", "Christian: Catholic",
               ifelse(ds$rel03 == "Sunni", "Muslim: Sunni",
               ifelse(ds$rel04 == "Shia", "Muslim: Shia", "Other"))))

lp07 <- ds %>% drop_na(religion) %>%
lollipop_chart(x = religion,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Religious affiliation")+
  theme_bw()

lp07

Variable: Marital status

Display code
ds$mrg00 <- ds$`19A. Married?`
ds$married <- ifelse(ds$mrg00 %in% c("no", "No"), "No", "Yes")
ds$mlength <- ds$`19B. How long married`

ds$mrg01 <- as.numeric(gsub("\\D", "", ds$`19B. How long married`))
ds$mrg02 <- removeNumbers(ds$`19B. How long married`)
ds$mrg03 <- ifelse(str_detect(ds$mrg02, "ear") == T, "Years", "")
ds$mrg04 <- ifelse(str_detect(ds$mrg02, "onth") == T, "Months", "")

ds$mrg011 <- ifelse(ds$mrg01 == 3034, ((30+34)/2), 
             ifelse(ds$mrg01 == 112, 1.5, ds$mrg01))

ds$mrg033 <- ifelse(ds$mlength == 6, "Years",
             ifelse(ds$mlength == 30, "Years",
             ifelse(ds$mlength == 64, "Years", ds$mrg03)))       
                   
ds$mrg044 <- ifelse(ds$mrg033 == "Years", "Years",
             ifelse(ds$mrg04 == "Months", "Months", NA))

ds$mrg_length <- ifelse(ds$mrg044 == "Years", (ds$mrg011*12), 
                 ifelse(ds$mrg044 == "Months", ds$mrg011, NA))

ds$married_01 <- ifelse(ds$married == "Yes", "Yes",
                 ifelse(ds$married == "No" & ds$mrg_length >= 0, "Yes", "No"))

ds <- ds %>% mutate_at(c('married_01'), ~replace_na(.,"No"))

ds2 <- ds[, c("married", "married_01", 
              "mlength", "mrg01", "mrg011",
              "mrg02", "mrg03", "mrg033", "mrg04", 
              "mrg044", "mrg_length")]

ds$married <- ds$married_01
ds$married_months <- ds$mrg_length

ds <- as.data.table(ds)

ds <- subset(ds, select = -c(married_01, mlength, mrg01, 
                            mrg011, mrg02, mrg03, 
                            mrg033, mrg04, mrg044, 
                            mrg_length))

ds$married <- ifelse(ds$married == "Yes", "Married",
              ifelse(ds$married == "No", "Not married", ""))

bp01 <- ds %>% drop_na(married) %>%
  ggplot(aes(x = fct_rev(fct_infreq(married))))+
  geom_bar(fill = "black")+
  labs(x = "",
       y = "Frequency",
       title = "Marital status")+
  coord_flip()+
  theme_bw()

# bp01

lp08 <- ds %>% drop_na(married) %>%
lollipop_chart(x = married,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Marital status")+
  theme_bw()

lp08

Variable: Length of marriage

Display code
ds2$marriage_length <- (ds2$mrg_length/12)

summary(ds2$marriage_length)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      0       6      15      17      26      80     136 
Display code
ds2 %>% drop_na(marriage_length)%>%
ggplot(aes(x = marriage_length))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 17 years", 
                 xintercept = 17.00, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Years", 
       y = "Frequency", 
       title = "Length of marriage")+
  theme_bw()

Variable: Spouse background

Display code
ds$spb1 <- ds$`19C. Same background?`

ds$spb <- ifelse(ds$spb1 == "Yes", "Same background",
          ifelse(ds$spb1 == "yes", "Same background",
          ifelse(ds$spb1 == "no", "Different background",
          ifelse(ds$spb1 == "No", "Different background",
          ifelse(ds$spb1 == "None", "Different background", NA)))))

lp09 <- ds %>% 
lollipop_chart(x = spb,
               line_color = "black",
               point_color = "black")+
  labs(y = "Frequency",
       x = "",
       title = "Spouse background")+
  theme_bw()

lp09

Variable: Number of children

Display code
ds$children <- as.numeric(gsub("\\D", "", ds$Number_of_Children))

summary(ds$children)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    0.0     1.0     3.0     3.6     6.0    16.0      11 
Display code
ds %>% drop_na(children)%>%
ggplot(aes(x = children))+
  geom_bar(color = "black",
                 fill = "gray",
                 width = 0.75)+
  geom_textvline(label = "Mean = 3.60", 
                 xintercept = 3.60, 
                 vjust = 1.3, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Number of children", 
       y = "Frequency", 
       title = "Number of children")+
  theme_bw()

Variable: Ethnicity

This variable needs lots of cleaning by someone familiar with ethnic distribution in Uganda. It is not possible to visualize it in its current form, which looks like this:

Display code
table(ds$Ethnicity)

      Acholi       Adhola         Alur     Amudaama     Amudagma     Amuganda 
           1           18            2           12            1            1 
       Ateso       Atesot        Bantu       Busigu        Eteso       Etesot 
           1           69           11            1            1           74 
       Gishu        Iteso       Itesot        Jaluo          Jap    Japadhola 
           8            2           32            1            2          107 
  japadollah   Japadollah    Japhadola  japhadollah  Japhadollah   Jupadollah 
           1            2            1           24           30            2 
       langi        Langi        Lango      muganda      mugishu      Mugishu 
           1            1            1            1            1            5 
      Mugisu      Mugwere   Munyankole      munyole      Munyole      Munyoli 
           4            1            1            2            7            1 
     Musamia      Musamic      Musaoga       musoga       Musoga Muteso-Kenya 
           1            1            1            1            8            1 
     Mutooro       Mwamba          N/A Nilo Hamites   Nilohamite  Nilohamites 
           1            1            5            1            5            4 
     nilotic      Nilotic     nilotics     Nilotics       Nubian        Sabin 
           1           27            2            1            1            1 
       Samia       Samiya 
           6            1 

Variable: Education

This variable needs lots of cleaning by someone familiar to Uganda education system. It is not possible to visualize it in its current form, which looks something like this:

Display code
head(table(ds$Level_of_education), n = 15)

                                     'A' Level 
                                             3 
                                     "A" Level 
                                             1 
                                       0 Level 
                                             1 
                                       A level 
                                             1 
                                      A' Level 
                                             1 
Advance Certificate in Electrical Installation 
                                             1 
                                     bachelors 
                                             2 
                              bachelors degree 
                                             1 
                                     Bed Maker 
                                             1 
                                   certificate 
                                             1 
                                   Certificate 
                                             5 
   Certificate in early chiildhood development 
                                             1 
          Certificate in Laboratory technology 
                                             1 
                       certificate IN PLUMBING 
                                             1 
     Certification in Interior/Exterior design 
                                             1 

Section 2. Factor Analysis: Group fusion/identification

Display code
## Four ingroup fusion items:
ds$IGF01 <- as.numeric(ds$`Q11.1 Group Bonds`)
ds$IGF02 <- as.numeric(ds$`Q11.2 Group Bonds`)
ds$IGF03 <- as.numeric(ds$`Q11.3 Group Bonds`)
ds$IGF04 <- as.numeric(ds$`Q11.4 Group Bonds`)

## Four ingroup identification items:
ds$IGI01 <- as.numeric(ds$`Q11.9 Group Bonds`)
ds$IGI02 <- as.numeric(ds$`Q11.10 Group Bonds`)
ds$IGI03 <- as.numeric(ds$`Q11.11 Group Bonds`)
ds$IGI04 <- as.numeric(ds$`Q11.12 Group Bonds`)

## Four outgroup fusion items:
ds$OGF01 <- as.numeric(ds$`Q11.5 Group Bonds`)
ds$OGF02 <- as.numeric(ds$`Q11.6 Group Bonds`)
ds$OGF03 <- as.numeric(ds$`Q11.7 Group Bonds`)
ds$OGF04 <- as.numeric(ds$`Q11.8 Group Bonds`)

## Four outgroup identification items:
ds$OGI01 <- as.numeric(ds$`Q11.13 Group Bonds`)
ds$OGI02 <- as.numeric(ds$`Q11.14 Group Bonds`)
ds$OGI03 <- as.numeric(ds$`Q11.15 Group Bonds`)
ds$OGI04 <- as.numeric(ds$`Q11.16 Group Bonds`)

## Bonds dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04,
                          ds$IGI01, ds$IGI02, ds$IGI03, ds$IGI04,
                          ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04,
                          ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:16)])

Shorthand for item names:
IG = Ingroup, OG = Outgroup
F = Fusion, I = Identification
So for example:
IGF01 = “ingroup fusion: item 1”
OGI04 = “outgroup identification: item 4”, and so on

Display code
corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the sixteen different items (eight for ingroup fusion/identification and eight for outgroup fusion/identification).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code
KMO(r=cor(bonds))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(bonds))
Overall MSA =  0.9
MSA for each item = 
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
 0.88  0.82  0.80  0.89  0.86  0.83  0.83  0.94  0.92  0.90  0.91  0.93  0.95 
OGI02 OGI03 OGI04 
 0.95  0.88  0.94 

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.9), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code
cortest.bartlett(bonds)
$chisq
[1] 6910

$p.value
[1] 0

$df
[1] 120

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code
parallel <- fa.parallel(bonds)

Parallel analysis suggests that the number of factors =  3  and the number of components =  2 

Based on the scree plot, factor analysis with two factors is the most appropriate. We will proceed with promax rotation, which assumes that the items are inter-correlated (that is, not independent from each other).

Factor analysis: Two factor model with promax rotation

Display code
fit01 <- factanal(bonds, 2, rotation="promax")
fit01

Call:
factanal(x = bonds, factors = 2, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 IGI04 OGF01 OGF02 OGF03 OGF04 OGI01 
0.529 0.311 0.336 0.428 0.697 0.467 0.492 0.858 0.344 0.248 0.319 0.385 0.151 
OGI02 OGI03 OGI04 
0.101 0.062 0.086 

Loadings:
      Factor1 Factor2
IGF01          0.689 
IGF02          0.834 
IGF03 -0.115   0.818 
IGF04          0.759 
IGI01          0.548 
IGI02          0.724 
IGI03          0.703 
IGI04  0.335   0.143 
OGF01  0.806         
OGF02  0.864         
OGF03  0.810         
OGF04  0.778         
OGI01  0.925         
OGI02  0.952         
OGI03  0.973         
OGI04  0.960         

               Factor1 Factor2
SS loadings        6.4    3.79
Proportion Var     0.4    0.24
Cumulative Var     0.4    0.64

Factor Correlations:
        Factor1 Factor2
Factor1  1.0000 -0.0954
Factor2 -0.0954  1.0000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 1049 on 89 degrees of freedom.
The p-value is 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011 

Factor analysis interpretation

The promax rotated FA output suggests that there are two factors:

  • Factor 1 consisting of the eight outgroup fusion/identification items

  • Factor 2 consisting of the eight ingroup fusion/identification items

  • The two factors are negatively correlated (coefficient = -0.0954)

  • The two factors cumulatively explain 64% of the variance in the data. Factor 1 explains 40% of the variance, and factor 2 explains 24% of the variance.

  • One item (out of the 16 total items) is “weird” - “IGI04” or “ingroup identification item 4.” It loads better with factor 1 instead of factor 2. The prompt for this item is “I feel a sense of belonging with the [ingroup]”

  • For our purposes, I am going to combine the ingroup fusion/identification items together to form one scale (based on Factor 2), and combine the outgroup fusion/identification items together to form a different scale (based on Factor 1).

Reliability analysis
Scale 1: Ingroup fusion/identification

Display code
igbonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04,
                            ds$IGI01, ds$IGI02, ds$IGI03, ds$IGI04)

names(igbonds) <- sub('ds\\$', '', names(igbonds))

igbonds <- na.omit(igbonds)
mtx2 <- cor(igbonds[, c(1:8)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Again, the output shows that the item “IGI04” (ingroup identification - item 4) has the weakest correlation with the other items, while the other items have moderate to strong correlation with each other.

Display code
## Reliability:
alpha02 <- psych::alpha(igbonds)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.78      0.85    0.87      0.41 5.6 0.017  6.2 0.67     0.48

Cronbach’s alpha = 0.78. This is a decent score, although the cut-off is usually 0.80.

Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean   sd
IGF01 463  0.68  0.71  0.66   0.57  6.4 0.93
IGF02 463  0.77  0.80  0.79   0.69  6.3 0.86
IGF03 463  0.74  0.76  0.75   0.62  6.1 1.11
IGF04 463  0.73  0.76  0.73   0.64  6.3 0.91
IGI01 463  0.60  0.63  0.55   0.46  6.2 1.00
IGI02 463  0.76  0.80  0.79   0.69  6.3 0.77
IGI03 463  0.75  0.79  0.77   0.68  6.3 0.72
IGI04 463  0.47  0.33  0.17   0.15  5.4 1.84

In the “item statistics” output above, the output for “raw.r” for the last item “IGI04” is 0.47. This is the item-total correlation (a measure of how much this item correlates with the entire scale), and the value is substantially lower for this item compared to the other items.

Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGF01      0.74      0.83    0.85      0.41 4.8   0.0196 0.049  0.47
IGF02      0.73      0.81    0.83      0.39 4.4   0.0206 0.042  0.46
IGF03      0.73      0.82    0.83      0.39 4.6   0.0206 0.041  0.48
IGF04      0.74      0.82    0.85      0.39 4.6   0.0202 0.046  0.46
IGI01      0.76      0.84    0.86      0.43 5.3   0.0185 0.048  0.49
IGI02      0.74      0.81    0.83      0.38 4.4   0.0202 0.046  0.44
IGI03      0.74      0.82    0.84      0.39 4.4   0.0199 0.049  0.47
IGI04      0.87      0.88    0.89      0.51 7.3   0.0091 0.017  0.50

In the “alpha drop” output above, the output for “raw_alpha” for the last item “IGI04” is 0.87. This means that Cronbach’s alpha for the scale will increase to 0.87 (from the current value of 0.74) if that item is dropped.

All of this combined suggests that this item (IGI04) does not really go well with this scale, and the internal consistency of this scale will substantially improve if this item is dropped.

I will still keep this item for the analysis. But it looks like something weird is going on with this one item that is messing up the reliability of the scale.

Reliability analysis
Scale 2: Outgroup fusion/identification

Display code
ogbonds <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04,
                            ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)

names(ogbonds) <- sub('ds\\$', '', names(ogbonds))

ogbonds <- na.omit(ogbonds)
mtx3 <- cor(ogbonds[, c(1:8)])

corrplot(mtx3, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha03 <- psych::alpha(ogbonds)

## Cronbach's Alpha:
summary(alpha03)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.97      0.97    0.97      0.79  29 0.0024  3.1 1.4     0.76
Display code
## Item statistics:
alpha03$item.stats
        n raw.r std.r r.cor r.drop mean  sd
OGF01 467  0.87  0.87  0.85   0.83  3.2 1.6
OGF02 467  0.92  0.92  0.91   0.89  3.0 1.5
OGF03 467  0.88  0.88  0.87   0.85  3.0 1.5
OGF04 467  0.85  0.85  0.82   0.81  3.1 1.6
OGI01 467  0.89  0.89  0.88   0.85  3.1 1.6
OGI02 467  0.93  0.93  0.92   0.90  3.0 1.5
OGI03 467  0.93  0.93  0.93   0.91  3.0 1.5
OGI04 467  0.93  0.93  0.93   0.91  3.0 1.6
Display code
### Reliability if each item is dropped:
alpha03$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
OGF01      0.96      0.96    0.97      0.80  27   0.0026 0.0062  0.79
OGF02      0.96      0.96    0.97      0.78  25   0.0029 0.0068  0.74
OGF03      0.96      0.96    0.97      0.79  26   0.0027 0.0066  0.77
OGF04      0.97      0.97    0.97      0.80  28   0.0025 0.0056  0.79
OGI01      0.96      0.96    0.97      0.79  26   0.0027 0.0047  0.77
OGI02      0.96      0.96    0.97      0.78  24   0.0029 0.0051  0.75
OGI03      0.96      0.96    0.97      0.77  24   0.0029 0.0043  0.75
OGI04      0.96      0.96    0.97      0.77  24   0.0030 0.0051  0.74

Contrary to the “ingroup fusion/identification” scale previously, this scale has high reliability score, all the items are strongly correlated with each other, and there are no “weird” items in this scale.

Overall, both the “ingroup fusion/identification” scale as well as the “outgroup fusion/identification” scale are quite reliable and appropriate for measuring the concepts that they represent. The only exception is one item for the the “ingroup fusion/identification” scale (IGI04 or ingroup identification item 4). The results get a lot less noisier if this item is dropped.

Section 3a. Factor analysis of separate fusion/identification items (Version 1)

Display code
## Three ingroup fusion items:
ds$IGF01 <- as.numeric(ds$`Q11.1 Group Bonds`)
ds$IGF02 <- as.numeric(ds$`Q11.2 Group Bonds`)
ds$IGF03 <- as.numeric(ds$`Q11.3 Group Bonds`)

## Three outgroup fusion items:
ds$OGF01 <- as.numeric(ds$`Q11.5 Group Bonds`)
ds$OGF02 <- as.numeric(ds$`Q11.6 Group Bonds`)
ds$OGF03 <- as.numeric(ds$`Q11.7 Group Bonds`)

## Three ingroup identification items:
ds$IGI01 <- as.numeric(ds$`Q11.9 Group Bonds`)
ds$IGI02 <- as.numeric(ds$`Q11.10 Group Bonds`)
ds$IGI03 <- as.numeric(ds$`Q11.11 Group Bonds`)

## Three outgroup identification items:
ds$OGI01 <- as.numeric(ds$`Q11.13 Group Bonds`)
ds$OGI02 <- as.numeric(ds$`Q11.14 Group Bonds`)
ds$OGI03 <- as.numeric(ds$`Q11.15 Group Bonds`)

## Bonds dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03,
                          ds$IGI01, ds$IGI02, ds$IGI03,
                          ds$OGF01, ds$OGF02, ds$OGF03,
                          ds$OGI01, ds$OGI02, ds$OGI03)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:9)])
Display code
corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the nine different items (three fusion / identification items each for ingroup and outgroup).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code
KMO(r=cor(bonds))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(bonds))
Overall MSA =  0.86
MSA for each item = 
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
 0.87  0.76  0.78  0.85  0.81  0.83  0.90  0.85  0.91  0.90  0.89  0.85 

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.86), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code
cortest.bartlett(bonds)
$chisq
[1] 5049

$p.value
[1] 0

$df
[1] 66

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code
parallel <- fa.parallel(bonds)

Parallel analysis suggests that the number of factors =  3  and the number of components =  2 

Factor analysis: Four factor model with promax rotation

Display code
fit01 <- factanal(bonds, 4, rotation="promax")
fit01

Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGI01 OGI02 OGI03 
0.546 0.059 0.362 0.515 0.176 0.291 0.214 0.047 0.214 0.129 0.091 0.049 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01          0.202   0.529         
IGF02                  1.023         
IGF03                  0.753         
IGI01          0.755                 
IGI02          0.928                 
IGI03          0.818                 
OGF01  0.817                  -0.248 
OGF02  0.888                  -0.294 
OGF03  0.823                  -0.206 
OGI01  0.937                   0.279 
OGI02  0.960                   0.243 
OGI03  0.974                   0.282 

               Factor1 Factor2 Factor3 Factor4
SS loadings       4.89    2.15    1.91   0.411
Proportion Var    0.41    0.18    0.16   0.034
Cumulative Var    0.41    0.59    0.74   0.780

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1  1.0000  0.0193  0.1518 -0.0960
Factor2  0.0193  1.0000 -0.1781  0.6551
Factor3  0.1518 -0.1781  1.0000 -0.0611
Factor4 -0.0960  0.6551 -0.0611  1.0000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 46 on 24 degrees of freedom.
The p-value is 0.0041 

Reliability analysis: Ingroup fusion

Display code
igfusion <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03)
names(igfusion) <- sub('ds\\$', '', names(igfusion))

igfusion <- na.omit(igfusion)
mtx2 <- cor(igbonds[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(igfusion)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.82      0.83    0.79      0.62   5 0.014  6.3 0.83     0.61
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean   sd
IGF01 491  0.81  0.82  0.65   0.59  6.4 0.91
IGF02 491  0.90  0.91  0.86   0.78  6.3 0.84
IGF03 491  0.89  0.87  0.79   0.70  6.1 1.11
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGF01      0.84      0.85    0.75      0.75 5.9    0.014    NA  0.75
IGF02      0.67      0.68    0.52      0.52 2.1    0.029    NA  0.52
IGF03      0.76      0.76    0.61      0.61 3.1    0.022    NA  0.61

Reliability analysis: Ingroup identification

Display code
igidnt <- cbind.data.frame(ds$IGI01, ds$IGI02, ds$IGI03)
names(igidnt) <- sub('ds\\$', '', names(igidnt))

igidnt <- na.omit(igidnt)
mtx2 <- cor(igidnt[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(igidnt)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.82      0.84    0.79      0.63 5.2 0.014  6.3 0.73     0.61
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean   sd
IGI01 483  0.86  0.82  0.66   0.62  6.2 1.01
IGI02 483  0.89  0.90  0.85   0.76  6.3 0.78
IGI03 483  0.85  0.88  0.80   0.70  6.3 0.73
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGI01      0.86      0.86    0.75      0.75 6.0    0.013    NA  0.75
IGI02      0.68      0.70    0.54      0.54 2.4    0.027    NA  0.54
IGI03      0.74      0.76    0.61      0.61 3.1    0.022    NA  0.61

Reliability analysis: Outgroup fusion

Display code
ogfusion <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03)
names(ogfusion) <- sub('ds\\$', '', names(ogfusion))

ogfusion <- na.omit(ogfusion)
mtx2 <- cor(ogfusion[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(ogfusion)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.93      0.93    0.91      0.82  14 0.0054  3.1 1.4     0.85
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean  sd
OGF01 480  0.93  0.93  0.87   0.84  3.2 1.6
OGF02 480  0.96  0.96  0.94   0.91  3.0 1.5
OGF03 480  0.93  0.93  0.88   0.84  3.0 1.5
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r  S/N alpha se var.r med.r
OGF01      0.92      0.92    0.85      0.85 11.6   0.0073    NA  0.85
OGF02      0.87      0.87    0.77      0.77  6.6   0.0120    NA  0.77
OGF03      0.92      0.92    0.85      0.85 11.5   0.0073    NA  0.85

Reliability analysis: Outgroup identification

Display code
ogidnt <- cbind.data.frame(ds$OGI01, ds$OGI02, ds$OGI03)
names(ogidnt) <- sub('ds\\$', '', names(ogidnt))

ogidnt <- na.omit(ogidnt)
mtx2 <- cor(ogidnt[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(ogidnt)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.96      0.96    0.94      0.89  24 0.0032  3.1 1.5     0.88
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean  sd
OGI01 487  0.95  0.95  0.91   0.89  3.2 1.6
OGI02 487  0.96  0.97  0.94   0.92  3.0 1.5
OGI03 487  0.97  0.97  0.95   0.93  3.0 1.6
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
OGI01      0.96      0.96    0.92      0.92  23   0.0037    NA  0.92
OGI02      0.94      0.94    0.88      0.88  15   0.0057    NA  0.88
OGI03      0.93      0.93    0.87      0.87  13   0.0065    NA  0.87

Section 3b. Factor analysis of separate fusion/identification items
(Version 2)

Note: This version only drops one item - the problematic ingroup identification item, and retains all the other items from the ingroup fusion/identification scales.

Display code
## Four ingroup fusion items:
ds$IGF01 <- as.numeric(ds$`Q11.1 Group Bonds`)
ds$IGF02 <- as.numeric(ds$`Q11.2 Group Bonds`)
ds$IGF03 <- as.numeric(ds$`Q11.3 Group Bonds`)
ds$IGF04 <- as.numeric(ds$`Q11.4 Group Bonds`)

## Three ingroup identification items:
ds$IGI01 <- as.numeric(ds$`Q11.9 Group Bonds`)
ds$IGI02 <- as.numeric(ds$`Q11.10 Group Bonds`)
ds$IGI03 <- as.numeric(ds$`Q11.11 Group Bonds`)

## Four outgroup fusion items:
ds$OGF01 <- as.numeric(ds$`Q11.5 Group Bonds`)
ds$OGF02 <- as.numeric(ds$`Q11.6 Group Bonds`)
ds$OGF03 <- as.numeric(ds$`Q11.7 Group Bonds`)
ds$OGF04 <- as.numeric(ds$`Q11.8 Group Bonds`)

## Four outgroup identification items:
ds$OGI01 <- as.numeric(ds$`Q11.13 Group Bonds`)
ds$OGI02 <- as.numeric(ds$`Q11.14 Group Bonds`)
ds$OGI03 <- as.numeric(ds$`Q11.15 Group Bonds`)
ds$OGI04 <- as.numeric(ds$`Q11.16 Group Bonds`)


## Bonds dataframe:
bonds <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04,
                          ds$IGI01, ds$IGI02, ds$IGI03,
                          ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04,
                          ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)

names(bonds) <- sub('ds\\$', '', names(bonds))

bonds <- na.omit(bonds)
mtx1 <- cor(bonds[, c(1:15)])
Display code
corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the nine different items (three fusion / identification items each for ingroup and outgroup).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code
KMO(r=cor(bonds))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(bonds))
Overall MSA =  0.9
MSA for each item = 
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGF04 OGI01 OGI02 
 0.89  0.82  0.79  0.89  0.87  0.83  0.83  0.92  0.90  0.91  0.94  0.94  0.95 
OGI03 OGI04 
 0.88  0.93 

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.90), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code
cortest.bartlett(bonds)
$chisq
[1] 6898

$p.value
[1] 0

$df
[1] 105

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code
parallel <- fa.parallel(bonds)

Parallel analysis suggests that the number of factors =  3  and the number of components =  2 

Factor analysis: Four factor model with promax rotation

Display code
fit01 <- factanal(bonds, 4, rotation="promax")
fit01

Call:
factanal(x = bonds, factors = 4, rotation = "promax")

Uniquenesses:
IGF01 IGF02 IGF03 IGF04 IGI01 IGI02 IGI03 OGF01 OGF02 OGF03 OGF04 OGI01 OGI02 
0.536 0.220 0.219 0.431 0.506 0.198 0.260 0.202 0.097 0.178 0.273 0.143 0.102 
OGI03 OGI04 
0.028 0.090 

Loadings:
      Factor1 Factor2 Factor3 Factor4
IGF01          0.558   0.171         
IGF02          0.904                 
IGF03          0.935                 
IGF04          0.659   0.137         
IGI01                  0.752         
IGI02                  0.881         
IGI03                  0.838         
OGF01  0.805                  -0.218 
OGF02  0.880                  -0.182 
OGF03  0.818                  -0.200 
OGF04  0.775                  -0.169 
OGI01  0.977                   0.324 
OGI02  0.995                   0.285 
OGI03  1.034                   0.362 
OGI04  1.002                   0.252 

               Factor1 Factor2 Factor3 Factor4
SS loadings       6.71    2.45    2.11   0.535
Proportion Var    0.45    0.16    0.14   0.036
Cumulative Var    0.45    0.61    0.75   0.787

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1  1.0000  0.0137  0.3115  0.1129
Factor2  0.0137  1.0000 -0.1254 -0.6657
Factor3  0.3115 -0.1254  1.0000  0.0621
Factor4  0.1129 -0.6657  0.0621  1.0000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 185 on 51 degrees of freedom.
The p-value is 0.00000000000000005 

Reliability analysis: Ingroup fusion

Display code
igfusion <- cbind.data.frame(ds$IGF01, ds$IGF02, ds$IGF03, ds$IGF04)
names(igfusion) <- sub('ds\\$', '', names(igfusion))

igfusion <- na.omit(igfusion)
mtx2 <- cor(igbonds[, c(1:4)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(igfusion)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd median_r
      0.86      0.87    0.84      0.62 6.4 0.01  6.3 0.8     0.62
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean   sd
IGF01 480  0.78  0.79  0.67   0.62  6.4 0.92
IGF02 480  0.88  0.89  0.85   0.79  6.3 0.85
IGF03 480  0.89  0.87  0.83   0.76  6.1 1.10
IGF04 480  0.83  0.83  0.75   0.70  6.3 0.92
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
IGF01      0.86      0.87    0.82      0.68 6.4    0.011 0.0046  0.67
IGF02      0.80      0.80    0.74      0.57 4.0    0.016 0.0072  0.52
IGF03      0.81      0.81    0.74      0.59 4.2    0.015 0.0030  0.61
IGF04      0.83      0.84    0.79      0.63 5.1    0.014 0.0143  0.61

Reliability analysis: Ingroup identification

Display code
igidnt <- cbind.data.frame(ds$IGI01, ds$IGI02, ds$IGI03)
names(igidnt) <- sub('ds\\$', '', names(igidnt))

igidnt <- na.omit(igidnt)
mtx2 <- cor(igidnt[, c(1:3)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(igidnt)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.82      0.84    0.79      0.63 5.2 0.014  6.3 0.73     0.61
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean   sd
IGI01 483  0.86  0.82  0.66   0.62  6.2 1.01
IGI02 483  0.89  0.90  0.85   0.76  6.3 0.78
IGI03 483  0.85  0.88  0.80   0.70  6.3 0.73
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
IGI01      0.86      0.86    0.75      0.75 6.0    0.013    NA  0.75
IGI02      0.68      0.70    0.54      0.54 2.4    0.027    NA  0.54
IGI03      0.74      0.76    0.61      0.61 3.1    0.022    NA  0.61

Reliability analysis: Outgroup fusion

Display code
ogfusion <- cbind.data.frame(ds$OGF01, ds$OGF02, ds$OGF03, ds$OGF04)
names(ogfusion) <- sub('ds\\$', '', names(ogfusion))

ogfusion <- na.omit(ogfusion)
mtx2 <- cor(ogfusion[, c(1:4)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(ogfusion)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.94      0.94    0.93       0.8  16 0.0046  3.1 1.4     0.79
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean  sd
OGF01 476  0.91  0.91  0.87   0.84  3.2 1.6
OGF02 476  0.94  0.94  0.93   0.90  3.0 1.5
OGF03 476  0.93  0.93  0.91   0.88  3.0 1.5
OGF04 476  0.90  0.90  0.85   0.82  3.1 1.6
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
OGF01      0.93      0.93    0.90      0.81  13   0.0058 0.0016  0.82
OGF02      0.91      0.91    0.87      0.77  10   0.0072 0.0017  0.77
OGF03      0.92      0.92    0.89      0.79  11   0.0068 0.0036  0.77
OGF04      0.93      0.93    0.91      0.82  14   0.0054 0.0024  0.85

Reliability analysis: Outgroup identification

Display code
ogidnt <- cbind.data.frame(ds$OGI01, ds$OGI02, ds$OGI03, ds$OGI04)
names(ogidnt) <- sub('ds\\$', '', names(ogidnt))

names(ogidnt)
[1] "OGI01" "OGI02" "OGI03" "OGI04"
Display code
ogidnt <- na.omit(ogidnt)
mtx2 <- cor(ogidnt[, c(1:4)])

corrplot(mtx2, method = "number", number.cex = 0.7,
         col=c("darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alpha02 <- psych::alpha(ogidnt)

## Cronbach's Alpha:
summary(alpha02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.97      0.97    0.96      0.89  32 0.0023  3.1 1.5     0.89
Display code
## Item statistics:
alpha02$item.stats
        n raw.r std.r r.cor r.drop mean  sd
OGI01 487  0.94  0.94  0.90   0.89  3.2 1.6
OGI02 487  0.96  0.96  0.95   0.93  3.0 1.5
OGI03 487  0.97  0.97  0.97   0.95  3.0 1.6
OGI04 487  0.96  0.96  0.94   0.92  3.0 1.6
Display code
### Reliability if each item is dropped:
alpha02$alpha.drop
      raw_alpha std.alpha G6(smc) average_r S/N alpha se   var.r med.r
OGI01      0.97      0.97    0.96      0.91  31   0.0025 0.00025  0.92
OGI02      0.96      0.96    0.94      0.88  23   0.0034 0.00137  0.88
OGI03      0.95      0.95    0.93      0.87  20   0.0038 0.00054  0.87
OGI04      0.96      0.96    0.94      0.89  24   0.0032 0.00077  0.88

Section 4. Factor Analysis: BCL/BBL endorsement/experience

Display code
## BCL and BBL items:

# BCL_01:
# Seek out opportunities to bridge social divisions with their opponents, enemies, opposition groups, or other outgroups.   
# Variables: Q6.1, Q7.1

# BCL_02:
# Demonstrate willingness to compromise with their opponents, enemies, opposition groups, or other outgroups. 
# Variables: Q6.2, Q7.2

# BCL_03:
# Try to understand and empathize with their opponents, enemies, opposition groups, or other outgroups.  
# Variables: Q6.3, Q7.3

# BBL_01:
# Represent the interests of the communities and groups that they belong to even at the cost of other groups.
# Variables: Q6.4, Q7.4

# BBL_02:
# Focus on building stronger connections within the communities and groups they belong to rather than building stronger relationships with other groups across boundaries.
# Variables: Q6.5, Q7.5

# BBL_03:
# Try to gain benefits for the communities and groups they belong to even at the expense of other groups.
# Variables: Q6.6, Q7.6

ds$ENDBCL01 <- as.numeric(ds$`Q6.1 Leadership Quality`)
ds$ENDBCL02 <- as.numeric(ds$`Q6.2 Leadership Quality`)
ds$ENDBCL03 <- as.numeric(ds$`Q6.3 Leadership Quality`)
ds$ENDBBL01 <- as.numeric(ds$`Q6.4 Leadership Quality`)
ds$ENDBBL02 <- as.numeric(ds$`Q6.5 Leadership Quality`)
ds$ENDBBL03 <- as.numeric(ds$`Q6.6 Leadership Quality`)

ds$EXPBCL01 <- as.numeric(ds$`Q7.1 Leadership Experience`)
ds$EXPBCL02 <- as.numeric(ds$`Q7.2 Leadership Experience`)
ds$EXPBCL03 <- as.numeric(ds$`Q7.3 Leadership Experience`)
ds$EXPBBL01 <- as.numeric(ds$`Q7.4 Leadership Experience`)
ds$EXPBBL02 <- as.numeric(ds$`Q7.5 Leadership Experience`)
ds$EXPBBL03 <- as.numeric(ds$`Q7.6 Leadership Experience`)

leadership <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03, 
                               ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03,
                               ds$EXPBCL01, ds$EXPBCL02, ds$EXPBCL03,
                               ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(leadership) <- sub('ds\\$', '', names(leadership))

leadership <- na.omit(leadership)
mtx1 <- cor(leadership[, c(1:12)])

Shorthand for item names:
END = Endorse, EXP = Experience
BCL = BCL, BBL = BBL
So for example:
ENDBCL01 = “Endorsement of BCL, item 1”
EXPBBL02 = “Experience of BBL, item 2”, and so on

Display code
corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

The correlation plot above shows the correlation coefficients between the twelve different items (six for endorsement of BCL/BBL, and six for experience of BCL/BBL).

Kaiser-Meyer-Olkin (KMO) test of factorability

Display code
KMO(r=cor(leadership))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(leadership))
Overall MSA =  0.76
MSA for each item = 
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.76     0.67     0.66     0.82     0.76     0.78     0.76     0.75 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.80     0.82     0.75     0.74 

The general criteria is that KMO needs to be greater than 0.60. Based on the results (overall MSA = 0.76), factor analysis is appropriate.

Bartlett’s test of sphericity

Display code
cortest.bartlett(leadership)
$chisq
[1] 3059

$p.value
[1] 0

$df
[1] 66

The test is significant, again suggesting that factor analysis is appropriate.

Parallel test

Display code
parallel <- fa.parallel(leadership)

Parallel analysis suggests that the number of factors =  4  and the number of components =  3 

Based on the scree plot, factor analysis with four factors is the most appropriate. We will proceed with promax rotation, which assumes that the items are inter-correlated (that is, not independent from each other).

Factor analysis: Four factor model with promax rotation

Display code
fit02 <- factanal(leadership, 4, rotation="promax")
fit02

Call:
factanal(x = leadership, factors = 4, rotation = "promax")

Uniquenesses:
ENDBCL01 ENDBCL02 ENDBCL03 ENDBBL01 ENDBBL02 ENDBBL03 EXPBCL01 EXPBCL02 
    0.54     0.44     0.42     0.27     0.17     0.28     0.29     0.26 
EXPBCL03 EXPBBL01 EXPBBL02 EXPBBL03 
    0.43     0.38     0.21     0.37 

Loadings:
         Factor1 Factor2 Factor3 Factor4
ENDBCL01 -0.142   0.324           0.384 
ENDBCL02                          0.746 
ENDBCL03         -0.124           0.826 
ENDBBL01  0.868                         
ENDBBL02  0.923                         
ENDBBL03  0.832                         
EXPBCL01          0.852                 
EXPBCL02          0.902                 
EXPBCL03          0.772                 
EXPBBL01                  0.737         
EXPBBL02                  0.924         
EXPBBL03                  0.799         

               Factor1 Factor2 Factor3 Factor4
SS loadings       2.33    2.28    2.05    1.40
Proportion Var    0.19    0.19    0.17    0.12
Cumulative Var    0.19    0.38    0.56    0.67

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4
Factor1   1.000  -0.188   0.427  -0.182
Factor2  -0.188   1.000   0.284   0.505
Factor3   0.427   0.284   1.000   0.155
Factor4  -0.182   0.505   0.155   1.000

Test of the hypothesis that 4 factors are sufficient.
The chi square statistic is 165 on 24 degrees of freedom.
The p-value is 0.000000000000000000000056 

Factor analysis interpretation

The promax rotated FA output suggests that there are four factors:

  • Factor 1 = Endorsement of BBL

  • Factor 2 = Experience of BCL

  • Factor 3 = Experience of BBL

  • Factor 4 = Endorsement of BCL

  • The four factors cumulatively explain 67% of the variance in the data. Factor 1 explains 19% of the variance, factor 2 explains 19%, factor 3 explains 17%, and factor 4 explains 12% .

  • The correlation between the four factors in the output above provide some interesting information. The output with existing factor names is a little hard to interpret, so I renamed the factor names to come up with a readable correlation matrix:

Display code
md <- c(1.000, -0.188, 0.427, -0.182,
        -0.188, 1.000, 0.284, 0.505,
        0.427, 0.284, 1.000, 0.155,
        -0.182, 0.505, 0.155, 1.000)

mat1 <- matrix(md, nrow=4, ncol=4,byrow=TRUE)

colnames(mat1) <- c("Endorsement_BBL", "Experience_BCL", 
                    "Experience_BBL", "Endorsement_BCL")

rownames(mat1) <- c("Endorsement_BBL", "Experience_BCL", 
                    "Experience_BBL", "Endorsement_BCL")

corrplot(mat1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
end_bcl <- cbind.data.frame(ds$ENDBCL01, ds$ENDBCL02, ds$ENDBCL03)
end_bbl <- cbind.data.frame(ds$ENDBBL01, ds$ENDBBL02, ds$ENDBBL03)
exp_bcl <- cbind.data.frame(ds$EXPBCL01, ds$EXPBCL02, ds$EXPBCL03)
exp_bbl <- cbind.data.frame(ds$EXPBBL01, ds$EXPBBL02, ds$EXPBBL03)

names(end_bcl) <- sub('ds\\$', '', names(end_bcl))
names(end_bbl) <- sub('ds\\$', '', names(end_bbl))
names(exp_bcl) <- sub('ds\\$', '', names(exp_bcl))
names(exp_bbl) <- sub('ds\\$', '', names(exp_bbl))

end_bcl <- na.omit(end_bcl)
end_bbl <- na.omit(end_bbl)
exp_bcl <- na.omit(exp_bcl)
exp_bbl <- na.omit(exp_bbl)

mtx1 <- cor(end_bcl[, c(1:3)])
mtx2 <- cor(end_bbl[, c(1:3)])
mtx3 <- cor(exp_bcl[, c(1:3)])
mtx4 <- cor(exp_bbl[, c(1:3)])

Reliability analysis
Scale 1: Endorsement of BCL

Display code
## Reliability:
alph01 <- psych::alpha(end_bcl)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
       0.7      0.72    0.64      0.46 2.6 0.023  5.8 0.99     0.44
Display code
## Item statistics:
alph01$item.stats
           n raw.r std.r r.cor r.drop mean  sd
ENDBCL01 495  0.82  0.77  0.56   0.48  5.8 1.5
ENDBCL02 495  0.81  0.83  0.70   0.57  5.8 1.2
ENDBCL03 495  0.76  0.81  0.66   0.55  5.8 1.0
Display code
### Reliability if each item is dropped:
alph01$alpha.drop
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBCL01      0.70      0.71    0.55      0.55 2.4    0.026    NA  0.55
ENDBCL02      0.54      0.57    0.40      0.40 1.3    0.038    NA  0.40
ENDBCL03      0.60      0.61    0.44      0.44 1.6    0.035    NA  0.44

Reliability analysis
Scale 2: Endorsement of BBL

Display code
## Reliability:
alph02 <- psych::alpha(end_bbl)
summary(alph02)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
       0.9       0.9    0.86      0.75 9.1 0.0078  4.9 1.6     0.77
Display code
## Item statistics:
alph02$item.stats
           n raw.r std.r r.cor r.drop mean  sd
ENDBBL01 492  0.91  0.91  0.83   0.79  5.0 1.8
ENDBBL02 492  0.93  0.93  0.88   0.83  5.0 1.8
ENDBBL03 492  0.91  0.91  0.83   0.79  4.7 1.8
Display code
### Reliability if each item is dropped:
alph02$alpha.drop
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
ENDBBL01      0.87      0.87    0.77      0.77 6.7    0.012    NA  0.77
ENDBBL02      0.83      0.83    0.71      0.71 5.0    0.015    NA  0.71
ENDBBL03      0.87      0.87    0.77      0.77 6.7    0.012    NA  0.77

Reliability analysis
Scale 3: Experience of BCL

Display code
## Reliability:
alph03 <- psych::alpha(exp_bcl)
summary(alph03)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.85      0.85     0.8      0.66 5.9 0.011  4.7 1.5     0.67
Display code
## Item statistics:
alph03$item.stats
           n raw.r std.r r.cor r.drop mean  sd
EXPBCL01 492  0.89  0.88  0.79   0.72  4.6 1.9
EXPBCL02 492  0.90  0.90  0.84   0.77  4.7 1.7
EXPBCL03 492  0.84  0.86  0.74   0.69  4.8 1.5
Display code
### Reliability if each item is dropped:
alph03$alpha.drop
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBCL01      0.80      0.80    0.67      0.67 4.0    0.018    NA  0.67
EXPBCL02      0.74      0.75    0.61      0.61 3.1    0.022    NA  0.61
EXPBCL03      0.83      0.83    0.71      0.71 4.9    0.015    NA  0.71

Reliability analysis
Scale 4: Experience of BBL

Display code
## Reliability:
alph04 <- psych::alpha(exp_bbl)
summary(alph04)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.86      0.86     0.8      0.66 5.9 0.011  4.6 1.4     0.69
Display code
## Item statistics:
alph04$item.stats
           n raw.r std.r r.cor r.drop mean  sd
EXPBBL01 494  0.87  0.87  0.77   0.71  4.7 1.5
EXPBBL02 494  0.90  0.90  0.84   0.77  4.7 1.6
EXPBBL03 494  0.87  0.87  0.76   0.70  4.5 1.5
Display code
### Reliability if each item is dropped:
alph04$alpha.drop
         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
EXPBBL01      0.82      0.82    0.69      0.69 4.4    0.017    NA  0.69
EXPBBL02      0.75      0.75    0.60      0.60 3.1    0.022    NA  0.60
EXPBBL03      0.82      0.82    0.70      0.70 4.6    0.016    NA  0.70

Reliability analysis: interpretation

Based on the results above, the four scales have relatively high alpha scores for the most part and there aren’t any “weird” items. However, some scales are more internally consistent than others. Here are the Cronbach’s alpha values for each scale:

  • Alpha for endorsement of BCL = 0.70. This is the lowest score of all four scales, and it falls below the conventional threshold of 0.80.

  • Alpha for endorsement of BBL = 0.90.

  • Alpha for experience of BCL = 0.85.

  • Alpha for experience of BBL = 0.86.

Correlation between fusion/identification items:

Display code
ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03+ds$IGF04)/4)

ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03+ds$IGI04)/4)

ds$OG_Fusion <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03+ds$OGF04)/4)

ds$OG_Identification <- as.numeric((ds$OGI01+ds$OGI02+ds$OGI03+ds$OGI04)/4)

ds3 <- cbind.data.frame(ds$IG_Fusion, ds$IG_Identification,
                        ds$OG_Fusion, ds$OG_Identification)
ds3 <- na.omit(ds3)

names(ds3) <- sub('ds\\$', '', names(ds3))

mtx <- cor(ds3[, c(1:4)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 5. Preliminary regression analysis

Research question:
Do group fusion / identification predict BCL vs BBL?

Variables in the analysis:

Eight scales that were calculated based on factor analysis:

  • Ingroup fusion

  • Ingroup identification

  • Outgroup fusion

  • Outgroup identification

  • Endorsement of BCL

  • Endorsement of BBL

  • Experience of BCL

  • Experience of BBL

Demographics:

  • Age

  • Gender

  • Marital status

  • Socio-economic status

Regression models

This set of regression models uses the ingroup fusion/identification with only three items. Since the fourth item on “ingroup identification” scale was problematic, that item was removed. To maintain consistency, the fourth item from the other three scales (ingroup fusion, outgroup fusion, and outgroup identification) are also removed.

Display code
## Proper (usable) variables in the model:

ds$Age1 <- as.numeric(gsub("\\D", "", ds$Age))
ds$Age <- as.numeric(ifelse(ds$Age1 == "6465", 64.5,
                     ifelse(ds$Age1 == "806", 80.5, 
                     ifelse(ds$Age1 < 18, NA, ds$Age1))))

ds$Female <- ifelse(ds$gender == "Female", 1, 0)
ds$Married <- ifelse(ds$married == "Married", 1, 0)
ds$`SES-` <- ds$ses

ds$Endorse_BCL <- as.numeric((ds$ENDBCL01+ds$ENDBCL02+ds$ENDBCL03)/3)
ds$Endorse_BBL <- as.numeric((ds$ENDBBL01+ds$ENDBBL02+ds$ENDBBL03)/3)
ds$Experience_BCL <- as.numeric((ds$EXPBCL01+ds$EXPBCL02+ds$EXPBCL03)/3)
ds$Experience_BBL <- as.numeric((ds$EXPBBL01+ds$EXPBBL02+ds$EXPBBL03)/3)

ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03)/3)
ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03)/3)
ds$OG_Fusion <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03)/3)
ds$OG_Identification <- as.numeric((ds$OGI01+ds$OGI02+ds$OGI03)/3)
Display code
## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
Endorse_BCLExperience_BCLEndorse_BBLExperience_BBL
(1)(2)(3)(4)
IG_Fusion-0.0530.0970.270*0.260**
(0.067)(0.100)(0.120)(0.097)
IG_Identification0.440***0.630***-0.490***-0.026
(0.076)(0.120)(0.130)(0.110)
OG_Fusion0.120*-0.072-0.120-0.140
(0.054)(0.084)(0.094)(0.078)
OG_Identification-0.0800.110-0.160-0.060
(0.052)(0.081)(0.091)(0.076)
Age0.0020.003-0.001-0.0003
(0.003)(0.004)(0.005)(0.004)
Female-0.0050.300*0.005-0.280*
(0.092)(0.140)(0.160)(0.130)
Married0.1600.1000.2100.130
(0.110)(0.160)(0.180)(0.150)
`SES-`Middle0.031-0.0530.150-0.260
(0.098)(0.150)(0.170)(0.140)
`SES-`Upper middle-0.4000.1400.240-0.015
(0.290)(0.470)(0.490)(0.430)
Constant3.100***-0.2907.000***3.900***
(0.460)(0.710)(0.780)(0.660)
Observations427423423425
R20.1200.1300.1100.083
Adjusted R20.1000.1100.0870.063
Residual Std. Error0.920 (df = 417)1.400 (df = 413)1.600 (df = 413)1.300 (df = 415)
F Statistic6.300*** (df = 9; 417)6.600*** (df = 9; 413)5.500*** (df = 9; 413)4.200*** (df = 9; 415)
Note:*p<0.05; **p<0.01; ***p<0.001

Preliminary OLS results:

  • Controlling for demographics, ingroup fusion positively predicts BBL endorsement and BBL experience.

  • Controlling for demographics, ingroup identification positively predicts BCL endorsement and BCL experience, and negatively predicts BBL endorsement.

  • Outgroup fusion positively predicts BCL endorsement and negatively predicts BBL experience.

  • Outgroup identification negatively predicts BBL endorsement.

Regression models (version 2)

This set of regression models use all four items for the the ingroup fusion/identification scales. It only leaves out the ONE problematic “ingroup identification” item. That is, ingroup fusion, outgroup fusion, and outgroup identification have four items, while ingroup identification has three items.

Display code
## Proper (usable) variables in the model:

ds$Age1 <- as.numeric(gsub("\\D", "", ds$Age))
ds$Age <- as.numeric(ifelse(ds$Age1 == "6465", 64.5,
                     ifelse(ds$Age1 == "806", 80.5, 
                     ifelse(ds$Age1 < 18, NA, ds$Age1))))

ds$Female <- ifelse(ds$gender == "Female", 1, 0)
ds$Married <- ifelse(ds$married == "Married", 1, 0)
ds$`SES-` <- ds$ses

ds$Endorse_BCL <- as.numeric((ds$ENDBCL01+ds$ENDBCL02+ds$ENDBCL03)/3)
ds$Endorse_BBL <- as.numeric((ds$ENDBBL01+ds$ENDBBL02+ds$ENDBBL03)/3)
ds$Experience_BCL <- as.numeric((ds$EXPBCL01+ds$EXPBCL02+ds$EXPBCL03)/3)
ds$Experience_BBL <- as.numeric((ds$EXPBBL01+ds$EXPBBL02+ds$EXPBBL03)/3)

ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03+ds$IGF04)/4)
ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03)/3)
ds$OG_Fusion <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03+ds$OGF04)/4)
ds$OG_Identification <- as.numeric((ds$OGI01+ds$OGI02+ds$OGI03+ds$OGI04)/4)
Display code
## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`,
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+Age+Female+Married+`SES-`, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
Endorse_BCLExperience_BCLEndorse_BBLExperience_BBL
(1)(2)(3)(4)
IG_Fusion-0.0560.1200.1900.240*
(0.070)(0.110)(0.120)(0.100)
IG_Identification0.440***0.600***-0.430**-0.011
(0.076)(0.120)(0.140)(0.110)
OG_Fusion0.098-0.079-0.090-0.088
(0.056)(0.089)(0.100)(0.083)
OG_Identification-0.0640.130-0.200*-0.096
(0.055)(0.086)(0.098)(0.081)
Age-0.00010.0010.0010.003
(0.001)(0.002)(0.002)(0.002)
Female-0.0250.320*0.008-0.230
(0.090)(0.140)(0.160)(0.130)
Married0.1300.1300.1800.086
(0.100)(0.170)(0.190)(0.150)
`SES-`Middle0.058-0.0210.120-0.260
(0.098)(0.150)(0.170)(0.140)
`SES-`Upper middle-0.3700.1700.220-0.032
(0.280)(0.460)(0.500)(0.430)
Constant3.200***-0.2107.100***3.800***
(0.440)(0.700)(0.780)(0.650)
Observations415411411413
R20.1200.1300.1000.079
Adjusted R20.1000.1100.0800.059
Residual Std. Error0.900 (df = 405)1.400 (df = 401)1.600 (df = 401)1.300 (df = 403)
F Statistic6.200*** (df = 9; 405)6.400*** (df = 9; 401)5.000*** (df = 9; 401)3.800*** (df = 9; 403)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 6. Ingroup/outgroup empathy/hostility items

Empathic concern

Display code
## Empathic concern
ds$empathic_concern_01 <- as.numeric(ds$`Q18.1 Empathy`)
ds$empathic_concern_02a <- as.numeric(ds$`Q18.2 Empathy`)
ds$empathic_concern_02 <- as.numeric(8 - ds$empathic_concern_02a)
ds$empathic_concern_03 <- as.numeric(ds$`Q18.3 Empathy`)

ds$empathic_concern <- (ds$empathic_concern_01+ds$empathic_concern_02+
                        ds$empathic_concern_03)/3

summary(ds$empathic_concern)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     4.7     5.7     5.6     6.3     7.0       4 
Display code
ds %>% drop_na(empathic_concern)%>%
ggplot(aes(x = empathic_concern))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.60", 
                 xintercept = 5.6, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Empathic concern score", 
       y = "Frequency", 
       title = "Empathic concern")+
  theme_bw()

Perspective taking

Display code
ds$perspective_taking_01 <- as.numeric(ds$`Q18.4 Empathy`)
ds$perspective_taking_02 <- as.numeric(ds$`Q18.5 Empathy`)
ds$perspective_taking_03 <- as.numeric(ds$`Q18.6 Empathy`)
ds$perspective_taking_04 <- as.numeric(ds$`Q18.7 Empathy`)


ds$perspective_taking <- (ds$perspective_taking_01+ds$perspective_taking_02+
                          ds$perspective_taking_03+ds$perspective_taking_04)/4

summary(ds$perspective_taking)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    2.5     5.5     6.0     5.9     6.2     7.0       8 
Display code
ds %>% drop_na(perspective_taking)%>%
ggplot(aes(x = perspective_taking))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.9", 
                 xintercept = 5.9, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Perspective taking score", 
       y = "Frequency", 
       title = "Perspective taking")+
  theme_bw()

Outgroup cooperation

Display code
ds$og_coop_01 <- as.numeric(ds$`Q12.1 Outgroup`)
ds$og_coop_02 <- as.numeric(ds$`Q12.2 Outgroup`)

ds$og_cooperation <- (ds$og_coop_01+ds$og_coop_02)/2

summary(ds$og_cooperation)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     4.0     5.5     4.9     6.0     7.0       5 
Display code
ds %>% drop_na(og_cooperation)%>%
ggplot(aes(x = og_cooperation))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 4.90", 
                 xintercept = 4.90, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Outgroup cooperation score", 
       y = "Frequency", 
       title = "Outgroup cooperation")+
  theme_bw()

Perceived history of discrimination

Display code
ds$history_discrimination <- as.numeric(ds$`Q12.3 Outgroup`)

summary(ds$history_discrimination)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     2.0     3.0     3.3     5.0     7.0       6 
Display code
ds %>% drop_na(history_discrimination)%>%
ggplot(aes(x = history_discrimination))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 20)+
  geom_textvline(label = "Mean = 3.30", 
                 xintercept = 3.30, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Perceived history of discrimination score", 
       y = "Frequency", 
       title = "Perceived history of discrimination")+
  theme_bw()

Outgroup hostility

Display code
ds$og_host_01 <- as.numeric(ds$`Q12.4 Outgroup`)
ds$og_host_02 <- as.numeric(ds$`Q12.5 Outgroup`)

ds$og_hostility <- (ds$og_host_01+ds$og_host_02)/2

summary(ds$og_hostility)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     1.0     2.0     2.7     4.0     7.0      11 
Display code
ds %>% drop_na(og_hostility)%>%
ggplot(aes(x = og_hostility))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 2.70", 
                 xintercept = 2.7, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Outgroup hostility score", 
       y = "Frequency", 
       title = "Outgroup hostility")+
  theme_bw()

Willingess to fight outgroup

Display code
ds$fight_outgroup <- as.numeric(ds$`Q12.6 Outgroup`)

summary(ds$fight_outgroup)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     1.0     1.0     2.1     2.0     7.0       5 
Display code
ds %>% drop_na(fight_outgroup)%>%
ggplot(aes(x = fight_outgroup))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 2.10", 
                 xintercept = 2.1, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Willingness to fight outgroup score", 
       y = "Frequency", 
       title = "Willingness to fight outgroup")+
  theme_bw()

Section 7. Correlation between ingroup/outgroup empathy/hostility items

Display code
ds3 <- cbind.data.frame(ds$empathic_concern, ds$perspective_taking,
                        ds$og_cooperation, ds$history_discrimination,
                        ds$og_hostility, ds$fight_outgroup)
ds3 <- na.omit(ds3)

names(ds3) <- sub('ds\\$', '', names(ds3))

mtx <- cor(ds3[, c(1:6)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Section 8. Regression with new variables

This set of regression models have additional predictor variables compared to the last set. It adds empathic concern, perspective taking, and perceived history of discrimination as additional predictors.

Display code
## Proper (usable) variables in the model:

ds$Age1 <- as.numeric(gsub("\\D", "", ds$Age))
ds$Age <- as.numeric(ifelse(ds$Age1 == "6465", 64.5,
                     ifelse(ds$Age1 == "806", 80.5, 
                     ifelse(ds$Age1 < 18, NA, ds$Age1))))

ds$Female <- ifelse(ds$gender == "Female", 1, 0)
ds$Married <- ifelse(ds$married == "Married", 1, 0)
ds$`SES-` <- ds$ses

ds$Endorse_BCL <- as.numeric((ds$ENDBCL01+ds$ENDBCL02+ds$ENDBCL03)/3)
ds$Endorse_BBL <- as.numeric((ds$ENDBBL01+ds$ENDBBL02+ds$ENDBBL03)/3)
ds$Experience_BCL <- as.numeric((ds$EXPBCL01+ds$EXPBCL02+ds$EXPBCL03)/3)
ds$Experience_BBL <- as.numeric((ds$EXPBBL01+ds$EXPBBL02+ds$EXPBBL03)/3)

ds$IG_Fusion <- as.numeric((ds$IGF01+ds$IGF02+ds$IGF03+ds$IGF04)/4)
ds$IG_Identification <- as.numeric((ds$IGI01+ds$IGI02+ds$IGI03)/3)
ds$OG_Fusion <- as.numeric((ds$OGF01+ds$OGF02+ds$OGF03+ds$OGF04)/4)
ds$OG_Identification <- as.numeric((ds$OGI01+ds$OGI02+ds$OGI03+ds$OGI04)/4)


## Empathic concern
ds$empathic_concern_01 <- as.numeric(ds$`Q18.1 Empathy`)
ds$empathic_concern_02a <- as.numeric(ds$`Q18.2 Empathy`)
ds$empathic_concern_02 <- as.numeric(8 - ds$empathic_concern_02a)
ds$empathic_concern_03 <- as.numeric(ds$`Q18.3 Empathy`)

ds$empathic_concern <- (ds$empathic_concern_01+ds$empathic_concern_02+
                        ds$empathic_concern_03)/3


ds$perspective_taking_01 <- as.numeric(ds$`Q18.4 Empathy`)
ds$perspective_taking_02 <- as.numeric(ds$`Q18.5 Empathy`)
ds$perspective_taking_03 <- as.numeric(ds$`Q18.6 Empathy`)
ds$perspective_taking_04 <- as.numeric(ds$`Q18.7 Empathy`)


ds$perspective_taking <- (ds$perspective_taking_01+ds$perspective_taking_02+
                          ds$perspective_taking_03+ds$perspective_taking_04)/4

ds$og_coop_01 <- as.numeric(ds$`Q12.1 Outgroup`)
ds$og_coop_02 <- as.numeric(ds$`Q12.2 Outgroup`)

ds$og_cooperation <- (ds$og_coop_01+ds$og_coop_02)/2

ds$history_discrimination <- as.numeric(ds$`Q12.3 Outgroup`)

ds$og_host_01 <- as.numeric(ds$`Q12.4 Outgroup`)
ds$og_host_02 <- as.numeric(ds$`Q12.5 Outgroup`)

ds$og_hostility <- (ds$og_host_01+ds$og_host_02)/2

ds$fight_outgroup <- as.numeric(ds$`Q12.6 Outgroup`)
Display code
## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
Endorse_BCLExperience_BCLEndorse_BBLExperience_BBL
(1)(2)(3)(4)
IG_Fusion-0.0590.1700.300*0.310**
(0.065)(0.110)(0.130)(0.100)
IG_Identification0.380***0.450***-0.450**-0.095
(0.072)(0.120)(0.140)(0.120)
OG_Fusion0.062-0.140-0.076-0.120
(0.052)(0.084)(0.100)(0.083)
OG_Identification-0.0610.130-0.180-0.076
(0.050)(0.081)(0.097)(0.080)
empathic_concern0.160***0.450***0.0650.180*
(0.044)(0.071)(0.085)(0.070)
perspective_taking0.160**0.061-0.470***-0.130
(0.056)(0.092)(0.110)(0.091)
history_discrimination-0.043-0.130***0.010-0.084*
(0.023)(0.038)(0.045)(0.037)
Age0.0010.001-0.001-0.002
(0.003)(0.004)(0.005)(0.004)
Female-0.1100.1000.062-0.260
(0.084)(0.140)(0.160)(0.140)
Married0.1300.1400.1900.075
(0.095)(0.150)(0.180)(0.150)
`SES-`Middle0.0340.0160.120-0.180
(0.090)(0.150)(0.170)(0.140)
`SES-`Upper middle-0.3700.3800.3800.240
(0.260)(0.440)(0.500)(0.430)
Constant2.100***-1.800*8.700***4.200***
(0.520)(0.840)(1.000)(0.830)
Observations404400401402
R20.2000.2500.1500.110
Adjusted R20.1700.2300.1200.081
Residual Std. Error0.810 (df = 391)1.300 (df = 387)1.600 (df = 388)1.300 (df = 389)
F Statistic8.100*** (df = 12; 391)11.000*** (df = 12; 387)5.600*** (df = 12; 388)3.900*** (df = 12; 389)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 9. Alternative regression models with different outcomes

This set of regression models have different outcome variables which are: outgroup cooperation, outgroup hostility, and willingness to fight outgroup.

Display code
## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(og_cooperation~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(og_hostility~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(fight_outgroup~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+empathic_concern+perspective_taking+history_discrimination+Age+Female+Married+`SES-`, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
og_cooperationog_hostilityfight_outgroup
(1)(2)(3)
IG_Fusion-0.310**0.0810.060
(0.120)(0.100)(0.110)
IG_Identification0.390**-0.005-0.066
(0.130)(0.110)(0.120)
OG_Fusion0.220*-0.250**-0.170*
(0.094)(0.081)(0.085)
OG_Identification0.330***-0.0240.016
(0.090)(0.078)(0.081)
empathic_concern0.400***-0.140*-0.260***
(0.079)(0.068)(0.071)
perspective_taking0.200*0.0740.490***
(0.100)(0.088)(0.092)
history_discrimination-0.095*0.510***0.370***
(0.042)(0.036)(0.037)
Age-0.001-0.001-0.0004
(0.005)(0.004)(0.004)
Female0.160-0.120-0.190
(0.150)(0.130)(0.140)
Married-0.2500.1000.190
(0.170)(0.150)(0.150)
`SES-`Middle-0.0750.2200.150
(0.160)(0.140)(0.150)
`SES-`Upper middle-0.4200.7801.000*
(0.470)(0.420)(0.420)
Constant-0.1601.700*-0.200
(0.940)(0.800)(0.840)
Observations407401405
R20.3700.4700.340
Adjusted R20.3500.4500.320
Residual Std. Error1.500 (df = 394)1.300 (df = 388)1.300 (df = 392)
F Statistic19.000*** (df = 12; 394)28.000*** (df = 12; 388)17.000*** (df = 12; 392)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 10. Religious freedom measures

Social perception of religious freedom

Display code
ds$religious_freedom_per_01 <- as.numeric(ds$`Q16.1 Religious Freedom`)
ds$religious_freedom_per_02a <- as.numeric(ds$`Q16.2 Religious Freedom`)
ds$religious_freedom_per_02 <- (8 - ds$religious_freedom_per_02a)
ds$religious_freedom_per_03 <- as.numeric(ds$`Q16.3 Religious Freedom`)
ds$religious_freedom_per_04 <- as.numeric(ds$`Q16.4 Religious Freedom`)
ds$religious_freedom_per_05 <- as.numeric(ds$`Q16.5 Religious Freedom`)
ds$religious_freedom_per_06 <- as.numeric(ds$`Q16.6 Religious Freedom`)
ds$religious_freedom_per_07 <- as.numeric(ds$`Q16.7 Religious Freedom`)
ds$religious_freedom_per_08 <- as.numeric(ds$`Q16.8 Religious Freedom`)

ds$sprf <- (ds$religious_freedom_per_01+ds$religious_freedom_per_02+
            ds$religious_freedom_per_03+ds$religious_freedom_per_04+
            ds$religious_freedom_per_05+ds$religious_freedom_per_06+
            ds$religious_freedom_per_07+ds$religious_freedom_per_08)/8

summary(ds$sprf)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    2.8     5.2     5.5     5.6     6.0     7.0      13 
Display code
ds %>% drop_na(sprf)%>%
ggplot(aes(x = sprf))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.60", 
                 xintercept = 5.60, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "SPRF score", 
       y = "Frequency", 
       title = "Social perception of religious freedom")+
  theme_bw()

Experience of religious freedom

Display code
ds$religious_freedom_exp_01a <- as.numeric(ds$`Q17.1 Life experience`)
ds$religious_freedom_exp_01 <- (8 - ds$religious_freedom_exp_01)

ds$religious_freedom_exp_02a <- as.numeric(ds$`Q17.2 Life experience`)
ds$religious_freedom_exp_02 <- (8 - ds$religious_freedom_exp_02)

ds$religious_freedom_exp_03a <- as.numeric(ds$`Q17.3 Life experience`)
ds$religious_freedom_exp_03 <- (8 - ds$religious_freedom_exp_03)

ds$religious_freedom_exp_04a <- as.numeric(ds$`Q17.4 Life experience`)
ds$religious_freedom_exp_04 <- (8 - ds$religious_freedom_exp_04)

ds$religious_freedom_exp_05a <- as.numeric(ds$`Q17.5 Life experience`)
ds$religious_freedom_exp_05 <- (8 - ds$religious_freedom_exp_05)

ds$religious_freedom_exp_06a <- as.numeric(ds$`Q17.6 Life experience`)
ds$religious_freedom_exp_06 <- (8 - ds$religious_freedom_exp_06)

ds$exp_religious_freedom <- (ds$religious_freedom_exp_01+ds$religious_freedom_exp_02+
                             ds$religious_freedom_exp_03+ds$religious_freedom_exp_04+
                              ds$religious_freedom_exp_05+ds$religious_freedom_exp_06)/6

summary(ds$exp_religious_freedom)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     5.5     6.5     6.2     7.0     7.0       8 
Display code
ds %>% drop_na(exp_religious_freedom)%>%
ggplot(aes(x = exp_religious_freedom))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.30", 
                 xintercept = 5.30, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Experience of religious freedom score", 
       y = "Frequency", 
       title = "Experience of religious freedom")+
  theme_bw()

Experience of religious freedom based on religious affiliation

Display code
ds$Religion <- ds$religion
ds2 <- ds %>% 
drop_na(Religion, exp_religious_freedom)%>%
 group_by(Religion) %>% 
 summarise(Exp_religious_freedom=mean(exp_religious_freedom),.groups = 'drop') %>%
  as.data.frame()
ds2
               Religion Exp_religious_freedom
1   Christian: Catholic                   6.3
2 Christian: Protestant                   6.3
3          Muslim: Shia                   5.9
4         Muslim: Sunni                   5.6
5                 Other                   5.8
Display code
ds %>% drop_na(religion, exp_religious_freedom)%>%
ggplot(aes(y = exp_religious_freedom, 
           x = religion))+
  geom_boxplot()+
  labs(x = "", 
       y = "Experience of religious freedom score", 
       title = "Experience of religious freedom")+
  coord_flip()+
  theme_bw()

Section 11. Positive/Negative contact with outgroup

Positive contact with outgroup

Display code
ds$pc <- ds$`Q15. Positve Contact`

# never, very rarely, rarely
# sometimes, often, very often, always

ds$pc01 <- ifelse(ds$pc == "never", "Never",
           ifelse(ds$pc == "Never", "Never",
           ifelse(ds$pc == "very rarely", "Very rarely",
           ifelse(ds$pc == "Very rarely", "Very rarely",
           ifelse(ds$pc == "Very Rarely", "Very rarely",
           ifelse(ds$pc == "rarely", "Rarely",
           ifelse(ds$pc == "Rarely", "Rarely",
           ifelse(ds$pc == "sometimes", "Sometimes",
           ifelse(ds$pc == "Sometimes", "Sometimes",
           ifelse(ds$pc == "often", "Often",
           ifelse(ds$pc == "Often", "Often",
           ifelse(ds$pc == "very often", "Very often",
           ifelse(ds$pc == "Very often", "Very often",
           ifelse(ds$pc == "Very Often", "Very often",
           ifelse(ds$pc == "Always", "Always", NA)))))))))))))))

ds$pc01 <- factor(ds$pc01, 
                  level = c("Never", "Very rarely", "Rarely",
                            "Sometimes", "Often", "Very often", "Always"))

table(ds$pc01)

      Never Very rarely      Rarely   Sometimes       Often  Very often 
         82          26          31         129          99          88 
     Always 
         42 
Display code
ds$pc02 <- as.numeric(ds$pc01)

summary(ds$pc02)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     3.0     4.0     4.1     6.0     7.0       3 
Display code
ds %>% drop_na(pc02)%>%
ggplot(aes(x = pc02))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 4.10", 
                 xintercept = 4.10, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Positive contact score", 
       y = "Frequency", 
       title = "Positive contact with outgroup")+
  theme_bw()

Negative contact with outgroup

Display code
ds$nc <- ds$`Q14. Negativ Outgroup`

ds$nc01 <- ifelse(ds$nc == "never", "Never",
           ifelse(ds$nc == "Never", "Never", 
           ifelse(ds$nc == "very rarely", "Very rarely", 
           ifelse(ds$nc == "Very rarely", "Very rarely",
           ifelse(ds$nc == "Very Rarely", "Very rarely", 
           ifelse(ds$nc == "Veyr rarely", "Very rarely",
           ifelse(ds$nc == "rarely", "Rarely", 
           ifelse(ds$nc == "Rarely", "Rarely",
           ifelse(ds$nc == "some times", "Sometimes", 
           ifelse(ds$nc == "sometimes", "Sometimes",
           ifelse(ds$nc == "Sometimes", "Sometimes", 
           ifelse(ds$nc == "often", "Often",
           ifelse(ds$nc == "verry often", "Very often", 
           ifelse(ds$nc == "very often", "Very often",
           ifelse(ds$nc == "Very often", "Very often", 
           ifelse(ds$nc == "Very Often", "Very often",
           ifelse(ds$nc == "always", "Always", 
           ifelse(ds$nc == "Always", "Always",
           ifelse(ds$nc == "Aways", "Always", NA)))))))))))))))))))
              
ds$nc01 <- factor(ds$nc01, 
                  level = c("Never", "Very rarely", "Rarely",
                            "Sometimes", "Often", "Very often", "Always"))

table(ds$nc01)

      Never Very rarely      Rarely   Sometimes       Often  Very often 
        194         102          50         103           5           9 
     Always 
         15 
Display code
ds$nc02 <- as.numeric(ds$nc01)

summary(ds$nc02)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     1.0     2.0     2.4     4.0     7.0      22 
Display code
ds %>% drop_na(nc02)%>%
ggplot(aes(x = nc02))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 2.40", 
                 xintercept = 2.40, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Negative contact score", 
       y = "Frequency", 
       title = "Negative contact with outgroup")+
  theme_bw()

Section 12. Intergroup marriage

Support for inter-group marriage: factor analysis

Display code
ds$intergroup_marriage_01 <- as.numeric(ds$`Q20.1 Marriage`)
ds$intergroup_marriage_02 <- as.numeric(ds$`Q20.2 Marriage`)
ds$intergroup_marriage_03 <- as.numeric(ds$`Q20.3 Marriage`)
ds$intergroup_marriage_04 <- as.numeric(ds$`Q20.4 Marriage`)

ig_marriage <- cbind.data.frame(ds$intergroup_marriage_01, ds$intergroup_marriage_02,
                                ds$intergroup_marriage_03, ds$intergroup_marriage_04)

names(ig_marriage) <- sub('ds\\$', '', names(ig_marriage))

ig_marriage <- na.omit(ig_marriage)
mtx1 <- cor(ig_marriage[, c(1:4)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(ig_marriage))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(ig_marriage))
Overall MSA =  0.81
MSA for each item = 
intergroup_marriage_01 intergroup_marriage_02 intergroup_marriage_03 
                  0.81                   0.79                   0.81 
intergroup_marriage_04 
                  0.83 
Display code
## Bartlett's test of sphericity
cortest.bartlett(ig_marriage)
$chisq
[1] 1807

$p.value
[1] 0

$df
[1] 6
Display code
## Parallel test
parallel <- fa.parallel(ig_marriage)

Parallel analysis suggests that the number of factors =  2  and the number of components =  1 

Based on the scree plot, number of factors = 1.

Display code
fit01 <- factanal(ig_marriage, 1, rotation="promax")
fit01

Call:
factanal(x = ig_marriage, factors = 1, rotation = "promax")

Uniquenesses:
intergroup_marriage_01 intergroup_marriage_02 intergroup_marriage_03 
                  0.25                   0.14                   0.20 
intergroup_marriage_04 
                  0.27 

Loadings:
                       Factor1
intergroup_marriage_01 0.87   
intergroup_marriage_02 0.93   
intergroup_marriage_03 0.89   
intergroup_marriage_04 0.85   

               Factor1
SS loadings       3.13
Proportion Var    0.78

Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 117 on 2 degrees of freedom.
The p-value is 0.000000000000000000000000031 
Display code
## Reliability analysis
psych::alpha(ig_marriage)

Reliability analysis   
Call: psych::alpha(x = ig_marriage)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.93      0.93    0.93      0.78  14 0.0049  5.4 1.5     0.78

    95% confidence boundaries 
         lower alpha upper
Feldt     0.92  0.93  0.94
Duhachek  0.92  0.93  0.94

 Reliability if an item is dropped:
                       raw_alpha std.alpha G6(smc) average_r  S/N alpha se
intergroup_marriage_01      0.92      0.92    0.89      0.80 12.2   0.0061
intergroup_marriage_02      0.90      0.90    0.87      0.76  9.3   0.0077
intergroup_marriage_03      0.91      0.91    0.88      0.77 10.0   0.0072
intergroup_marriage_04      0.92      0.92    0.90      0.80 11.8   0.0060
                        var.r med.r
intergroup_marriage_01 0.0015  0.81
intergroup_marriage_02 0.0056  0.73
intergroup_marriage_03 0.0060  0.76
intergroup_marriage_04 0.0034  0.81

 Item statistics 
                         n raw.r std.r r.cor r.drop mean  sd
intergroup_marriage_01 498  0.90  0.90  0.85   0.82  5.5 1.6
intergroup_marriage_02 498  0.94  0.93  0.91   0.88  5.4 1.7
intergroup_marriage_03 498  0.92  0.92  0.90   0.86  5.4 1.5
intergroup_marriage_04 498  0.90  0.90  0.86   0.82  5.2 1.7

Non missing response frequency for each item
                          1    2    3    4    5    6    7 miss
intergroup_marriage_01 0.01 0.11 0.02 0.04 0.09 0.47 0.24    0
intergroup_marriage_02 0.05 0.08 0.03 0.06 0.10 0.44 0.24    0
intergroup_marriage_03 0.02 0.08 0.04 0.08 0.13 0.44 0.22    0
intergroup_marriage_04 0.02 0.10 0.06 0.09 0.14 0.38 0.20    0

Based on the above output for factor analysis and reliability analysis, the “support for intergroup marriage” scale is highly reliable. Here is the visualization for the scale:

Display code
ds$endorse_intergroup_marriage <- (ds$intergroup_marriage_01+ds$intergroup_marriage_02+
                                   ds$intergroup_marriage_03+ds$intergroup_marriage_04)/4

summary(ds$endorse_intergroup_marriage)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     5.0     6.0     5.4     6.2     7.0       2 
Display code
ds %>% drop_na(endorse_intergroup_marriage)%>%
ggplot(aes(x = endorse_intergroup_marriage))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 30)+
  geom_textvline(label = "Mean = 5.40", 
                 xintercept = 5.40, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Intergroup marriage endorsement score", 
       y = "Frequency", 
       title = "Support for intergroup marriage")+
  theme_bw()

Section 13. Outgroup affect

Outgroup affect: Scale construction

Display code
## Feel outgroup: negative to positive:

ds$ogaf1 <- ds$`Q13.1 Feel Outgroup- Negative to Positive`

ds$og_aff_01 <- ifelse(ds$ogaf1 == "very negative", "Very negative",
                ifelse(ds$ogaf1 == "Very negative", "Very negative",
                ifelse(ds$ogaf1 == "Very Negative", "Very negative",
                ifelse(ds$ogaf1 == "moderately negative", "Moderately negative",
                ifelse(ds$ogaf1 == "Moderately negative", "Moderately negative",
                ifelse(ds$ogaf1 == "Moderately Negative", "Moderately negative",
                ifelse(ds$ogaf1 == "a little negative", "A little negative",
                ifelse(ds$ogaf1 == "A little negative", "A little negative",
                ifelse(ds$ogaf1 == "neutral", "Neutral",
                ifelse(ds$ogaf1 == "Neutral", "Neutral",
                ifelse(ds$ogaf1 == "a little positive", "A little positive",
                ifelse(ds$ogaf1 == "A little positive", "A little positive",
                ifelse(ds$ogaf1 == "moderately positive", "Moderately positive",
                ifelse(ds$ogaf1 == "Moderately positive", "Moderately positive",
                ifelse(ds$ogaf1 == "Moderately Positive", "Moderately positive",
                ifelse(ds$ogaf1 == "very positive", "Very positive",
                ifelse(ds$ogaf1 == "Very positive", "Very positive",
                ifelse(ds$ogaf1 == "Very Positive", "Very positive", NA))))))))))))))))))

ds$og_aff_01 <- factor(ds$og_aff_01, 
                       levels=c("Very negative", "Moderately negative",
                                 "A little negative", "Neutral", 
                                 "A little positive", "Moderately positive", 
                                 "Very positive"))
table(ds$og_aff_01)

      Very negative Moderately negative   A little negative             Neutral 
                 99                  29                  37                  87 
  A little positive Moderately positive       Very positive 
                194                  16                  34 
Display code
## Feel Outgroup: Hostile to friendly:

ds$ogaf2 <- (ds$`Q13.2 Feel Outgroup - Hostile to Friendly`)

ds$og_aff_02 <- ifelse(ds$ogaf2 == "very hostile", "Very hostile", 
                ifelse(ds$ogaf2 == "Very hostile", "Very hostile", 
                ifelse(ds$ogaf2 == "Very Hostile", "Very hostile", 
                ifelse(ds$ogaf2 == "moderately Hostile", "Moderately hostile", 
                ifelse(ds$ogaf2 == "Moderately hostile", "Moderately hostile", 
                ifelse(ds$ogaf2 == "Moderately Hostile", "Moderately hostile", 
                ifelse(ds$ogaf2 == "a little hostile", "A little hostile", 
                ifelse(ds$ogaf2 == "A little hostile", "A little hostile", 
                ifelse(ds$ogaf2 == "neutral", "Neutral", 
                ifelse(ds$ogaf2 == "Neutral", "Neutral", 
                ifelse(ds$ogaf2 == "a little friendly", "A little friendly", 
                ifelse(ds$ogaf2 == "A little friendly", "A little friendly", 
                ifelse(ds$ogaf2 == "A little Friendly", "A little friendly", 
                ifelse(ds$ogaf2 == "moderately friendly", "Moderately friendly", 
                ifelse(ds$ogaf2 == "Moderately friendly", "Moderately friendly", 
                ifelse(ds$ogaf2 == "Moderately Friendly", "Moderately friendly", 
                ifelse(ds$ogaf2 == "very friendly", "Very friendly", 
                ifelse(ds$ogaf2 == "Very friendly", "Very friendly", ""))))))))))))))))))
                
ds$og_aff_02 <- factor(ds$og_aff_02, 
                       levels = c("Very hostile", "Moderately hostile", 
                                  "A little hostile", "Neutral", 
                                  "A little friendly", "Moderately friendly",
                                  "Very friendly"))

table(ds$og_aff_02)

       Very hostile  Moderately hostile    A little hostile             Neutral 
                 27                  23                  40                 111 
  A little friendly Moderately friendly       Very friendly 
                227                  29                  39 
Display code
## Feel Outgroup: Suspicious to trusting:

ds$ogaf3 <- ds$`Q13.3 Feel Outgroup - Suspicious to Trusting`

ds$og_aff_03 <- ifelse(ds$ogaf3 == "very suspicious", "Very suspicious", 
                ifelse(ds$ogaf3 == "moderately suspicious", "Moderately suspicious",
                ifelse(ds$ogaf3 == "Moderately suspicious", "Moderately suspicious",
                ifelse(ds$ogaf3 == "a little suspicious", "A little suspicious",
                ifelse(ds$ogaf3 == "A little suspicious", "A little suspicious",
                ifelse(ds$ogaf3 == "A  little suspicious", "A little suspicious", 
                ifelse(ds$ogaf3 == "neutral", "Neutral",
                ifelse(ds$ogaf3 == "Neutral", "Neutral",
                ifelse(ds$ogaf3 == "a little respect", "A little trusting",
                ifelse(ds$ogaf3 == "a little trusting", "A little trusting",
                ifelse(ds$ogaf3 == "A little trusting", "A little trusting", 
                ifelse(ds$ogaf3 == "A little Trusting", "A little trusting",
                ifelse(ds$ogaf3 == "moderately trusting", "Moderately trusting",
                ifelse(ds$ogaf3 == "Moderately trusting", "Moderately trusting",
                ifelse(ds$ogaf3 == "very trusting", "Very trusting",
                ifelse(ds$ogaf3 == "Very trusting", "Very trusting", NA))))))))))))))))
                       
ds$og_aff_03 <- factor(ds$og_aff_03, 
                       levels = c("Very suspicious", "Moderately suspicious", 
                                  "A little suspicious", "Neutral",
                                  "A little trusting", "Moderately trusting",
                                  "Very trusting"))
table(ds$og_aff_03)

      Very suspicious Moderately suspicious   A little suspicious 
                   41                    26                    50 
              Neutral     A little trusting   Moderately trusting 
                  125                   185                    36 
        Very trusting 
                   33 
Display code
## Feel Outgroup: Contempt to respect:

ds$ogaf4 <- ds$`Q13.4 Feel Outgroup - Contempt to Respect`

ds$og_aff_04 <- ifelse(ds$ogaf4 == "A lot of contempt", "A lot of contempt",
                ifelse(ds$ogaf4 == "A lot of Contempt", "A lot of contempt",
                ifelse(ds$ogaf4 == "Moderate contempt", "Moderate contempt",
                ifelse(ds$ogaf4 == "a little contempt", "A little contempt",
                ifelse(ds$ogaf4 == "A little contempt", "A little contempt",
                ifelse(ds$ogaf4 == "neutral", "Neutral",
                ifelse(ds$ogaf4 == "Neutral", "Neutral",
                ifelse(ds$ogaf4 == "a little respect", "A little respect",
                ifelse(ds$ogaf4 == "A little respect", "A little respect",
                ifelse(ds$ogaf4 == "Moderate respect", "Moderate respect",
                ifelse(ds$ogaf4 == "a lot of respect", "A lot of respect",
                ifelse(ds$ogaf4 == "A lot of respect", "A lot of respect", NA))))))))))))
                       
ds$og_aff_04 <- factor(ds$og_aff_04, 
                       levels = c("A lot of contempt", "Moderate contempt",
                                  "A little contempt", "Neutral", 
                                  "A little respect", "Moderate respect",
                                  "A lot of respect"))

table(ds$og_aff_04)

A lot of contempt Moderate contempt A little contempt           Neutral 
               35                19                45                82 
 A little respect  Moderate respect  A lot of respect 
              155                81                78 
Display code
## Feel Outgroup: Concerned to Unconcerned:

ds$ogaf5 <- ds$`Q13.5 Feel Outgroup - Concerned to Unconcerned`

table(ds$ogaf5)

    a little concerned     A little concerned    A little uconcerned 
                     2                    113                      1 
  a little unconcerned   A little unconcerned   moderately concerned 
                     4                     32                     20 
  Moderately concerned Moderately unconcerned                    N/A 
                    28                     13                      4 
               neutral                Neutral         very concerned 
                     9                    175                     38 
        Very concerned         Very Concerned       very unconcerned 
                    24                      1                     35 
      Very unconcerned 
                     1 
Display code
ds$og_aff_05 <- ifelse(ds$ogaf5 == "very concerned", "Very concerned",
                ifelse(ds$ogaf5 == "Very concerned", "Very concerned", 
                ifelse(ds$ogaf5 == "Very Concerned", "Very concerned", 
                ifelse(ds$ogaf5 == "moderately concerned", "Moderately concerned", 
                ifelse(ds$ogaf5 == "Moderately concerned", "Moderately concerned", 
                ifelse(ds$ogaf5 == "a little concerned", "A little concerned",
                ifelse(ds$ogaf5 == "A little concerned", "A little concerned", 
                ifelse(ds$ogaf5 == "neutral", "Neutral", 
                ifelse(ds$ogaf5 == "Neutral", "Neutral", 
                ifelse(ds$ogaf5 == "A little uconcerned", "A little unconcerned", 
                ifelse(ds$ogaf5 == "a little unconcerned", "A little unconcerned",
                ifelse(ds$ogaf5 == "A little unconcerned", "A little unconcerned", 
                ifelse(ds$ogaf5 == "Moderately unconcerned", "Moderately unconcerned", 
                ifelse(ds$ogaf5 == "very unconcerned", "Very unconcerned", 
                ifelse(ds$ogaf5 == "Very unconcerned", "Very unconcerned", NA)))))))))))))))

ds$og_aff_05 <- factor(ds$og_aff_05, 
                       levels = c("Very concerned", "Moderately concerned", 
                                  "A little concerned", "Neutral",
                                  "A little unconcerned", "Moderately unconcerned",
                                  "Very unconcerned"))
table(ds$og_aff_05)

        Very concerned   Moderately concerned     A little concerned 
                    63                     48                    115 
               Neutral   A little unconcerned Moderately unconcerned 
                   184                     37                     13 
      Very unconcerned 
                    36 
Display code
## Feel Outgroup: Threatened to Relaxed:

ds$ogaf6 <- ds$`Q13.6 Feel Outgroup - Threatened to Relaxed`

## convert first character only to upper case:
ds$ogaf6a <- gsub("(\\D)(\\D+)", "\\U\\1\\L\\2", ds$ogaf6, perl = TRUE)

ds$og_aff_06 <- ifelse(ds$ogaf6a == "Alittle relaxed", "A little relaxed", 
                ifelse(ds$ogaf6a == "N/a", NA, 
                ifelse(ds$ogaf6a == "Never", NA,ds$ogaf6a)))


ds$og_aff_06 <- factor(ds$og_aff_06, 
                       levels = c("Very threatened", "Moderately threatened", 
                                  "A little threatened", "Neutral", 
                                  "A little relaxed", "Moderately relaxed", 
                                  "Very relaxed"))
table(ds$og_aff_06)

      Very threatened Moderately threatened   A little threatened 
                   35                    16                    32 
              Neutral      A little relaxed    Moderately relaxed 
                  154                   174                    26 
         Very relaxed 
                   58 

Outgroup affect: Factor analysis

Display code
ds$NegativeToPositive <- as.numeric(ds$og_aff_01) 
ds$HostileToFriendly <- as.numeric(ds$og_aff_02) 
ds$SuspiciousToTrusting <- as.numeric(ds$og_aff_03) 
ds$ContemptToRespect <- as.numeric(ds$og_aff_04) 
ds$ConcernedToUnconcerned <- as.numeric(ds$og_aff_05) 
ds$ThreatenedToRelaxed <- as.numeric(ds$og_aff_06) 

og_affect <- cbind.data.frame(ds$NegativeToPositive, ds$HostileToFriendly,
                              ds$SuspiciousToTrusting, ds$ContemptToRespect,
                              ds$ConcernedToUnconcerned, ds$ThreatenedToRelaxed)

names(og_affect) <- sub('ds\\$', '', names(og_affect))

og_affect <- na.omit(og_affect)
mtx1 <- cor(og_affect[, c(1:6)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(og_affect))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(og_affect))
Overall MSA =  0.89
MSA for each item = 
    NegativeToPositive      HostileToFriendly   SuspiciousToTrusting 
                  0.89                   0.87                   0.88 
     ContemptToRespect ConcernedToUnconcerned    ThreatenedToRelaxed 
                  0.91                   0.22                   0.91 
Display code
## Bartlett's test of sphericity
cortest.bartlett(og_affect)
$chisq
[1] 1792

$p.value
[1] 0

$df
[1] 15
Display code
## Parallel test
parallel <- fa.parallel(og_affect)

Parallel analysis suggests that the number of factors =  1  and the number of components =  1 

Based on the scree plot, number of factors = 1.

Display code
fit01 <- factanal(og_affect, 1, rotation="promax")
fit01

Call:
factanal(x = og_affect, factors = 1, rotation = "promax")

Uniquenesses:
    NegativeToPositive      HostileToFriendly   SuspiciousToTrusting 
                  0.31                   0.20                   0.22 
     ContemptToRespect ConcernedToUnconcerned    ThreatenedToRelaxed 
                  0.30                   1.00                   0.45 

Loadings:
                       Factor1
NegativeToPositive      0.829 
HostileToFriendly       0.894 
SuspiciousToTrusting    0.882 
ContemptToRespect       0.834 
ConcernedToUnconcerned        
ThreatenedToRelaxed     0.741 

               Factor1
SS loadings       3.51
Proportion Var    0.58

Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 32 on 9 degrees of freedom.
The p-value is 0.00018 
Display code
## Reliability analysis

psych::alpha(og_affect)
Some items ( ConcernedToUnconcerned ) were negatively correlated with the total scale and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option

Reliability analysis   
Call: psych::alpha(x = og_affect)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.84      0.84    0.86      0.46 5.2 0.011  4.2 1.2     0.66

    95% confidence boundaries 
         lower alpha upper
Feldt     0.81  0.84  0.86
Duhachek  0.81  0.84  0.86

 Reliability if an item is dropped:
                       raw_alpha std.alpha G6(smc) average_r  S/N alpha se
NegativeToPositive          0.78      0.78    0.82      0.42  3.6   0.0157
HostileToFriendly           0.77      0.77    0.80      0.40  3.4   0.0156
SuspiciousToTrusting        0.77      0.77    0.81      0.41  3.4   0.0158
ContemptToRespect           0.78      0.78    0.82      0.42  3.6   0.0153
ConcernedToUnconcerned      0.92      0.92    0.91      0.70 11.5   0.0059
ThreatenedToRelaxed         0.79      0.79    0.83      0.43  3.8   0.0143
                        var.r med.r
NegativeToPositive     0.1388  0.64
HostileToFriendly      0.1228  0.60
SuspiciousToTrusting   0.1249  0.60
ContemptToRespect      0.1311  0.62
ConcernedToUnconcerned 0.0044  0.71
ThreatenedToRelaxed    0.1575  0.71

 Item statistics 
                         n raw.r std.r  r.cor  r.drop mean  sd
NegativeToPositive     491  0.85  0.84 0.8209  0.7523  3.9 1.8
HostileToFriendly      491  0.87  0.88 0.8804  0.8124  4.5 1.4
SuspiciousToTrusting   491  0.87  0.87 0.8699  0.8018  4.3 1.5
ContemptToRespect      491  0.84  0.84 0.8139  0.7432  4.7 1.6
ConcernedToUnconcerned 491  0.22  0.22 0.0022 -0.0053  3.5 1.5
ThreatenedToRelaxed    491  0.80  0.81 0.7582  0.7039  4.5 1.5

Non missing response frequency for each item
                          1    2    3    4    5    6    7 miss
NegativeToPositive     0.20 0.06 0.08 0.17 0.39 0.03 0.07    0
HostileToFriendly      0.05 0.05 0.08 0.22 0.46 0.06 0.08    0
SuspiciousToTrusting   0.08 0.05 0.10 0.25 0.37 0.07 0.07    0
ContemptToRespect      0.07 0.04 0.09 0.17 0.32 0.16 0.16    0
ConcernedToUnconcerned 0.13 0.10 0.23 0.37 0.07 0.03 0.07    0
ThreatenedToRelaxed    0.07 0.03 0.07 0.31 0.35 0.05 0.12    0

Based on the output of factor analysis as well as reliability, it seems like one item (“concerned to unconcerned”) is problematic. Here is the scale without that item:

Outgroup affect (minus problematic item)

Display code
og_affect <- cbind.data.frame(ds$NegativeToPositive, ds$HostileToFriendly,
                              ds$SuspiciousToTrusting, ds$ContemptToRespect,
                              ds$ThreatenedToRelaxed)

names(og_affect) <- sub('ds\\$', '', names(og_affect))

og_affect <- na.omit(og_affect)
mtx1 <- cor(og_affect[, c(1:5)])

corrplot(mtx1, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(og_affect))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(og_affect))
Overall MSA =  0.89
MSA for each item = 
  NegativeToPositive    HostileToFriendly SuspiciousToTrusting 
                0.89                 0.87                 0.88 
   ContemptToRespect  ThreatenedToRelaxed 
                0.91                 0.92 
Display code
## Bartlett's test of sphericity
cortest.bartlett(og_affect)
$chisq
[1] 1778

$p.value
[1] 0

$df
[1] 10
Display code
## Parallel test
parallel <- fa.parallel(og_affect)

Parallel analysis suggests that the number of factors =  1  and the number of components =  1 
Display code
## Reliability analysis:
psych::alpha(og_affect)

Reliability analysis   
Call: psych::alpha(x = og_affect)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      0.92      0.92    0.91       0.7  12 0.0059  4.4 1.4     0.71

    95% confidence boundaries 
         lower alpha upper
Feldt     0.90  0.92  0.93
Duhachek  0.91  0.92  0.93

 Reliability if an item is dropped:
                     raw_alpha std.alpha G6(smc) average_r  S/N alpha se  var.r
NegativeToPositive        0.90      0.90    0.88      0.70  9.5   0.0071 0.0036
HostileToFriendly         0.89      0.89    0.86      0.67  8.1   0.0082 0.0047
SuspiciousToTrusting      0.89      0.89    0.87      0.68  8.3   0.0082 0.0050
ContemptToRespect         0.90      0.90    0.88      0.70  9.2   0.0075 0.0056
ThreatenedToRelaxed       0.91      0.92    0.90      0.74 11.4   0.0062 0.0011
                     med.r
NegativeToPositive    0.71
HostileToFriendly     0.67
SuspiciousToTrusting  0.68
ContemptToRespect     0.72
ThreatenedToRelaxed   0.75

 Item statistics 
                       n raw.r std.r r.cor r.drop mean  sd
NegativeToPositive   491  0.87  0.86  0.82   0.78  3.9 1.8
HostileToFriendly    491  0.90  0.91  0.89   0.85  4.5 1.4
SuspiciousToTrusting 491  0.90  0.90  0.88   0.84  4.3 1.5
ContemptToRespect    491  0.87  0.87  0.83   0.79  4.7 1.6
ThreatenedToRelaxed  491  0.80  0.81  0.74   0.70  4.5 1.5

Non missing response frequency for each item
                        1    2    3    4    5    6    7 miss
NegativeToPositive   0.20 0.06 0.08 0.17 0.39 0.03 0.07    0
HostileToFriendly    0.05 0.05 0.08 0.22 0.46 0.06 0.08    0
SuspiciousToTrusting 0.08 0.05 0.10 0.25 0.37 0.07 0.07    0
ContemptToRespect    0.07 0.04 0.09 0.17 0.32 0.16 0.16    0
ThreatenedToRelaxed  0.07 0.03 0.07 0.31 0.35 0.05 0.12    0

Based on the above output for factor analysis and reliability analysis, the “outgroup affect” scale is highly reliable if we drop the problematic item (“concerned to unconcerned”). Here is the visualization for the scale after dropping the problematic item:

Display code
ds$og_affect <- (ds$NegativeToPositive+ds$HostileToFriendly+
                 ds$SuspiciousToTrusting+ds$ContemptToRespect+
                 ds$ThreatenedToRelaxed)/5

summary(ds$og_affect)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     3.8     4.6     4.4     5.0     7.0       9 
Display code
ds %>% drop_na(og_affect)%>%
ggplot(aes(x = og_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 4.40", 
                 xintercept = 4.40, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Outgroup affect score", 
       y = "Frequency", 
       title = "Outgroup affect")+
  theme_bw()

Section 14. Updated regression models

This set of regression models have additional predictors: frequency of positive and negative contact, and interaction between perspective taking and perceived history of discrimination

Display code
ds$freq_positive_contact <- ds$pc02
ds$freq_negative_contact <- ds$nc02
ds$perspectiveXdiscrimination <- ds$perspective_taking*ds$history_discrimination

## Four regression models predicting endorsement and experience of BCL / BBL:

lm01 <- lm(Endorse_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(Experience_BCL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(Endorse_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(Experience_BBL~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
Endorse_BCLExperience_BCLEndorse_BBLExperience_BBL
(1)(2)(3)(4)
IG_Fusion-0.0540.210*0.370**0.330**
(0.067)(0.110)(0.130)(0.110)
IG_Identification0.360***0.360**-0.450**-0.097
(0.074)(0.120)(0.140)(0.120)
OG_Fusion0.064-0.180*-0.095-0.120
(0.053)(0.084)(0.100)(0.085)
OG_Identification-0.0910.078-0.130-0.072
(0.051)(0.081)(0.100)(0.083)
freq_positive_contact0.057*0.200***-0.050-0.029
(0.028)(0.045)(0.054)(0.046)
freq_negative_contact-0.064*0.0810.130*-0.007
(0.029)(0.046)(0.056)(0.047)
empathic_concern0.120*0.350***0.1600.210**
(0.048)(0.076)(0.092)(0.077)
perspective_taking0.280*0.250-1.000***-0.420*
(0.130)(0.210)(0.250)(0.210)
history_discrimination0.1800.250-0.830*-0.500
(0.170)(0.270)(0.330)(0.280)
perspectiveXdiscrimination-0.036-0.0650.140*0.067
(0.029)(0.046)(0.055)(0.046)
Age0.0020.001-0.002-0.004
(0.003)(0.004)(0.005)(0.004)
Female-0.1200.0440.067-0.240
(0.086)(0.140)(0.170)(0.140)
Married0.0890.1800.2700.073
(0.098)(0.150)(0.190)(0.160)
`SES-`Middle0.0800.0390.068-0.180
(0.093)(0.150)(0.180)(0.150)
`SES-`Upper middle-0.2900.4500.3800.330
(0.260)(0.430)(0.500)(0.440)
Constant1.600*-2.800*11.000***5.800***
(0.820)(1.300)(1.600)(1.300)
Observations390386386387
R20.2300.3100.1800.130
Adjusted R20.2000.2800.1500.090
Residual Std. Error0.810 (df = 374)1.300 (df = 370)1.500 (df = 370)1.300 (df = 371)
F Statistic7.300*** (df = 15; 374)11.000*** (df = 15; 370)5.600*** (df = 15; 370)3.600*** (df = 15; 371)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 15. Updated regression models (alternate DVs)

Same predictors as section 14 above, but with different outcomes. This set of regression models has an additional outcome variable: affect towards outgroup.

Display code
## Four regression models:

lm01 <- lm(og_cooperation~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm02 <- lm(og_hostility~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm03 <- lm(fight_outgroup~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)

lm04 <- lm(og_affect~IG_Fusion+IG_Identification+OG_Fusion+OG_Identification+freq_positive_contact+freq_negative_contact+empathic_concern+perspective_taking+history_discrimination+perspectiveXdiscrimination+Age+Female+Married+`SES-`, 
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04, 
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
og_cooperationog_hostilityfight_outgroupog_affect
(1)(2)(3)(4)
IG_Fusion-0.220*0.0670.080-0.075
(0.110)(0.100)(0.110)(0.077)
IG_Identification0.260*0.074-0.0370.012
(0.120)(0.110)(0.120)(0.089)
OG_Fusion0.150-0.230**-0.170*0.190**
(0.087)(0.079)(0.084)(0.062)
OG_Identification0.200*0.0420.0610.070
(0.084)(0.077)(0.081)(0.060)
freq_positive_contact0.370***-0.200***-0.110*0.330***
(0.047)(0.042)(0.045)(0.033)
freq_negative_contact-0.099*0.0310.130**-0.210***
(0.048)(0.043)(0.046)(0.034)
empathic_concern0.170*-0.007-0.1400.095
(0.078)(0.072)(0.075)(0.055)
perspective_taking-0.400-0.2000.280-0.280
(0.210)(0.190)(0.210)(0.150)
history_discrimination-0.830**0.030-0.011-0.360
(0.280)(0.260)(0.270)(0.200)
perspectiveXdiscrimination0.130**0.0770.0610.046
(0.047)(0.043)(0.045)(0.033)
Age0.002-0.004-0.002-0.0003
(0.004)(0.004)(0.004)(0.003)
Female0.031-0.073-0.150-0.033
(0.140)(0.130)(0.140)(0.099)
Married-0.2300.0920.230-0.110
(0.160)(0.150)(0.150)(0.110)
`SES-`Middle0.0420.1200.066-0.160
(0.150)(0.140)(0.150)(0.110)
`SES-`Upper middle-0.1700.7100.910*-0.600*
(0.430)(0.400)(0.410)(0.300)
Constant4.100**2.900*0.1304.700***
(1.300)(1.200)(1.300)(0.940)
Observations393386390386
R20.4800.5100.3800.580
Adjusted R20.4600.4900.3600.560
Residual Std. Error1.300 (df = 377)1.200 (df = 370)1.300 (df = 374)0.930 (df = 370)
F Statistic23.000*** (df = 15; 377)26.000*** (df = 15; 370)16.000*** (df = 15; 374)34.000*** (df = 15; 370)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 16. Imagistic Experience: Factor analysis

Display code
ds$event_negative_affect <- as.numeric(ds$`Q9.1 Feelings about Event`)
table(ds$event_negative_affect)

  1   2   3   4   5   6   7 
161  78   3   7   7  74 162 
Display code
ds$event_positive_affect_01 <- (8 - ds$event_negative_affect)
table(ds$event_positive_affect_01)

  1   2   3   4   5   6   7 
162  74   7   7   3  78 161 
Display code
ds$event_positive_affect_02 <- as.numeric(ds$`Q9.2 Feelings about Event`)
table(ds$event_positive_affect_02)

  1   2   3   4   5   6   7 
 66  89   8   5  16 117 190 
Display code
ds$event_positive_affect <- ds$event_positive_affect_02

ds$event_episodic_recall_01 <- as.numeric(ds$`Q9.3 Feelings about Event`)
table(ds$event_episodic_recall_01)

  1   2   3   4   5   6   7 
  1   1   1   6  16 185 284 
Display code
ds$event_episodic_recall_02 <- as.numeric(ds$`Q9.4 Feelings about Event`)
table(ds$event_episodic_recall_02)

  1   2   3   4   5   6   7 
  1   3   2   7  18 184 278 
Display code
ds$event_shared_perception_01 <- as.numeric(ds$`Q9.5 Feelings about Event`)
table(ds$event_shared_perception_01)

  1   2   3   4   5   6   7 
 23  32   2  18  33 185 201 
Display code
ds$event_shared_perception_02 <- as.numeric(ds$`Q9.6 Feelings about Event`)
table(ds$event_shared_perception_02)

  1   2   3   4   5   6   7 
 23  19   5  16  36 194 200 
Display code
ds$event_event_reflection_01 <- as.numeric(ds$`Q9.9 Feelings about Event`)
table(ds$event_event_reflection_01)

  1   2   3   4   5   6   7 
  3   7   2  10  34 208 230 
Display code
ds$event_event_reflection_02 <- as.numeric(ds$`Q9.10 Feelings about Event`)
table(ds$event_event_reflection_02)

  1   2   3   4   5   6   7 
  4  11   4  15  45 195 220 
Display code
ds$event_transformative_indiv_01 <- as.numeric(ds$`Q9.7 Feelings about Event`)
table(ds$event_transformative_indiv_01)

  1   2   3   4   5   6   7 
  8  10   4  13  25 169 264 
Display code
ds$event_transformative_indiv_02 <- as.numeric(ds$`Q9.8 Feelings about Event`)
table(ds$event_transformative_indiv_02)

  1   2   3   4   5   6   7 
 10  24  10  28  39 172 204 
Display code
ds$event_transformative_group_01 <- as.numeric(ds$`Q9.11 Feelings about Event`)
table(ds$event_transformative_group_01)

  1   2   3   4   5   6   7 
 24  18   7  42  45 218 139 
Display code
ds$event_transformative_group_02 <- as.numeric(ds$`Q9.12 Feelings about Event`)
table(ds$event_transformative_group_02)

  1   2   3   4   5   6   7 
 30  42   6  69  35 186 126 
Display code
imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)

imagistic <- na.omit(imagistic)

names(imagistic) <- sub('ds\\$event\\_', '', names(imagistic))

mtx <- cor(imagistic[, c(1:12)])

corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Imagistic experience: Factor analysis

Display code
## Kaiser-Meyer-Olkin (KMO) test of factorability
KMO(r=cor(imagistic))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(imagistic))
Overall MSA =  0.75
MSA for each item = 
        negative_affect         positive_affect      episodic_recall_01 
                   0.58                    0.60                    0.68 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.69                    0.78                    0.78 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.80                    0.77                    0.78 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.80                    0.78                    0.79 
Display code
## Bartlett's test of sphericity
cortest.bartlett(imagistic)
$chisq
[1] 2510

$p.value
[1] 0

$df
[1] 66
Display code
## Parallel test
parallel <- fa.parallel(imagistic)

Parallel analysis suggests that the number of factors =  4  and the number of components =  4 

Scree plot suggests there needs to be three factors, but theoretically we want six factors. Here is how the three factors break down:

Display code
fit03 <- factanal(imagistic, 3, rotation="promax")
fit03

Call:
factanal(x = imagistic, factors = 3, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                   0.37                    0.20                    0.79 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                   0.89                    0.48                    0.42 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                   0.50                    0.32                    0.76 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                   0.60                    0.16                    0.26 

Loadings:
                        Factor1 Factor2 Factor3
negative_affect                         -0.796 
positive_affect                          0.867 
episodic_recall_01      -0.123   0.476         
episodic_recall_02               0.258         
shared_perception_01     0.726          -0.165 
shared_perception_02     0.795                 
event_reflection_01              0.694         
event_reflection_02     -0.116   0.877         
transformative_indiv_01          0.459   0.122 
transformative_indiv_02  0.173   0.528         
transformative_group_01  0.940  -0.110         
transformative_group_02  0.797           0.128 

               Factor1 Factor2 Factor3
SS loadings       2.76    2.05    1.48
Proportion Var    0.23    0.17    0.12
Cumulative Var    0.23    0.40    0.52

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1   1.000   0.194   0.505
Factor2   0.194   1.000  -0.136
Factor3   0.505  -0.136   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 376 on 33 degrees of freedom.
The p-value is 0.0000000000000000000000000000000000000000000000000000000000078 

The factor structure above makes sense but is a little incoherent. Here is what happens if we “force” six factors:

Display code
fit06 <- factanal(imagistic, 6, rotation="promax")
fit06

Call:
factanal(x = imagistic, factors = 6, rotation = "promax")

Uniquenesses:
        negative_affect         positive_affect      episodic_recall_01 
                  0.486                   0.005                   0.147 
     episodic_recall_02    shared_perception_01    shared_perception_02 
                  0.662                   0.281                   0.187 
    event_reflection_01     event_reflection_02 transformative_indiv_01 
                  0.580                   0.005                   0.714 
transformative_indiv_02 transformative_group_01 transformative_group_02 
                  0.005                   0.005                   0.298 

Loadings:
                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
negative_affect                 -0.715                                 
positive_affect                  1.011                                 
episodic_recall_01                                               0.959 
episodic_recall_02       0.103                                   0.574 
shared_perception_01     0.875                                         
shared_perception_02     0.894                                         
event_reflection_01                              0.514                 
event_reflection_02                              1.080                 
transformative_indiv_01                  0.120           0.345   0.186 
transformative_indiv_02                                  1.094         
transformative_group_01                  1.024                         
transformative_group_02  0.166           0.631                         

               Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
SS loadings       1.62    1.55    1.49    1.46    1.36    1.30
Proportion Var    0.14    0.13    0.12    0.12    0.11    0.11
Cumulative Var    0.14    0.27    0.39    0.51    0.62    0.73

Factor Correlations:
        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
Factor1  1.0000 -0.0793  -0.547   0.593   0.338  0.3820
Factor2 -0.0793  1.0000   0.342   0.145   0.150 -0.0934
Factor3 -0.5475  0.3422   1.000  -0.371  -0.186 -0.6822
Factor4  0.5933  0.1445  -0.371   1.000   0.451  0.3806
Factor5  0.3378  0.1501  -0.186   0.451   1.000  0.2853
Factor6  0.3820 -0.0934  -0.682   0.381   0.285  1.0000

Test of the hypothesis that 6 factors are sufficient.
The chi square statistic is 13 on 9 degrees of freedom.
The p-value is 0.17 

FA solution still seems incoherent.

Imagistic experience: Reliability

Display code
imagistic <- cbind.data.frame(ds$event_negative_affect, ds$event_positive_affect, 
                              ds$event_episodic_recall_01, ds$event_episodic_recall_02,
                              ds$event_shared_perception_01, ds$event_shared_perception_02,
                              ds$event_event_reflection_01, ds$event_event_reflection_02,
                              ds$event_transformative_indiv_01, ds$event_transformative_indiv_02,
                              ds$event_transformative_group_01, ds$event_transformative_group_02)


## Reliability:
alph01 <- psych::alpha(imagistic)
Some items ( ds$event_negative_affect ) were negatively correlated with the total scale and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option
Display code
alph01

Reliability analysis   
Call: psych::alpha(x = imagistic)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.65      0.76    0.85      0.21 3.1 0.023  5.7 0.73     0.19

    95% confidence boundaries 
         lower alpha upper
Feldt      0.6  0.65  0.69
Duhachek   0.6  0.65  0.69

 Reliability if an item is dropped:
                                 raw_alpha std.alpha G6(smc) average_r S/N
ds$event_negative_affect              0.78      0.80    0.85      0.26 3.9
ds$event_positive_affect              0.72      0.78    0.84      0.25 3.7
ds$event_episodic_recall_01           0.64      0.75    0.84      0.21 2.9
ds$event_episodic_recall_02           0.63      0.75    0.84      0.21 3.0
ds$event_shared_perception_01         0.57      0.72    0.82      0.19 2.5
ds$event_shared_perception_02         0.56      0.71    0.81      0.19 2.5
ds$event_event_reflection_01          0.61      0.73    0.83      0.20 2.7
ds$event_event_reflection_02          0.61      0.72    0.82      0.19 2.6
ds$event_transformative_indiv_01      0.62      0.74    0.84      0.20 2.8
ds$event_transformative_indiv_02      0.59      0.72    0.83      0.19 2.6
ds$event_transformative_group_01      0.56      0.72    0.81      0.19 2.5
ds$event_transformative_group_02      0.54      0.71    0.81      0.18 2.5
                                 alpha se var.r med.r
ds$event_negative_affect            0.014 0.041  0.25
ds$event_positive_affect            0.019 0.045  0.23
ds$event_episodic_recall_01         0.024 0.066  0.22
ds$event_episodic_recall_02         0.025 0.069  0.24
ds$event_shared_perception_01       0.030 0.060  0.19
ds$event_shared_perception_02       0.030 0.058  0.17
ds$event_event_reflection_01        0.026 0.065  0.17
ds$event_event_reflection_02        0.026 0.064  0.17
ds$event_transformative_indiv_01    0.026 0.069  0.18
ds$event_transformative_indiv_02    0.027 0.065  0.18
ds$event_transformative_group_01    0.030 0.054  0.18
ds$event_transformative_group_02    0.031 0.055  0.17

 Item statistics 
                                   n raw.r std.r  r.cor r.drop mean   sd
ds$event_negative_affect         492 0.043 0.013 -0.071 -0.256  4.0 2.68
ds$event_positive_affect         491 0.188 0.132  0.069 -0.087  4.9 2.38
ds$event_episodic_recall_01      494 0.345 0.470  0.399  0.267  6.5 0.72
ds$event_episodic_recall_02      493 0.359 0.460  0.374  0.278  6.5 0.80
ds$event_shared_perception_01    494 0.714 0.675  0.661  0.595  5.8 1.68
ds$event_shared_perception_02    493 0.735 0.691  0.691  0.634  5.8 1.57
ds$event_event_reflection_01     494 0.538 0.603  0.555  0.447  6.3 0.98
ds$event_event_reflection_02     494 0.558 0.629  0.595  0.460  6.1 1.13
ds$event_transformative_indiv_01 493 0.486 0.535  0.456  0.369  6.2 1.20
ds$event_transformative_indiv_02 487 0.627 0.638  0.594  0.507  5.9 1.48
ds$event_transformative_group_01 493 0.754 0.689  0.701  0.660  5.6 1.57
ds$event_transformative_group_02 494 0.780 0.715  0.723  0.674  5.2 1.81

Non missing response frequency for each item
                                    1    2    3    4    5    6    7 miss
ds$event_negative_affect         0.33 0.16 0.01 0.01 0.01 0.15 0.33 0.02
ds$event_positive_affect         0.13 0.18 0.02 0.01 0.03 0.24 0.39 0.02
ds$event_episodic_recall_01      0.00 0.00 0.00 0.01 0.03 0.37 0.57 0.01
ds$event_episodic_recall_02      0.00 0.01 0.00 0.01 0.04 0.37 0.56 0.01
ds$event_shared_perception_01    0.05 0.06 0.00 0.04 0.07 0.37 0.41 0.01
ds$event_shared_perception_02    0.05 0.04 0.01 0.03 0.07 0.39 0.41 0.01
ds$event_event_reflection_01     0.01 0.01 0.00 0.02 0.07 0.42 0.47 0.01
ds$event_event_reflection_02     0.01 0.02 0.01 0.03 0.09 0.39 0.45 0.01
ds$event_transformative_indiv_01 0.02 0.02 0.01 0.03 0.05 0.34 0.54 0.01
ds$event_transformative_indiv_02 0.02 0.05 0.02 0.06 0.08 0.35 0.42 0.03
ds$event_transformative_group_01 0.05 0.04 0.01 0.09 0.09 0.44 0.28 0.01
ds$event_transformative_group_02 0.06 0.09 0.01 0.14 0.07 0.38 0.26 0.01

Imagistic experience: Visualization

If we conceptualize “imagistic experience” as a single measure that is the sum of the six scales, here is the visualization of the measure:

Display code
ds$imagistic <- (((ds$event_positive_affect)+
                 (ds$event_negative_affect)+ 
                ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)+
                ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)+
                ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)+
                ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)+
                ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)))

summary(ds$imagistic)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     21      36      40      39      42      49      27 
Display code
ds %>% drop_na(imagistic)%>%
ggplot(aes(x = imagistic))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 39.00", 
                 xintercept = 39.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Imagistic experience", 
       y = "Frequency", 
       title = "Imagistic experience: Uganda")+
  theme_bw()

Section 17. Imagistic experience: sub-scales

Below are the individual Cronbach’s alpha for each of the seven subscales, followed by their individual visualizations, followed by the correlation between the seven sub-scales

Negative affect about event

Display code
## Positive affect:
negative_affect <- cbind.data.frame(ds$event_negative_affect)
negative_affect <- na.omit(negative_affect)

## Visualization:
ds$event_negative_affect <- (ds$event_negative_affect)
summary(ds$event_negative_affect)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1       1       4       4       7       7       8 
Display code
ds %>% drop_na(event_negative_affect)%>%
ggplot(aes(x = event_negative_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 4.00", 
                 xintercept = 4.00, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Negative affect about event", 
       y = "Frequency", 
       title = "Negative affect about event: Uganda")+
  theme_bw()

Positive affect about event

Display code
## Positive affect:
positive_affect <- cbind.data.frame(ds$event_positive_affect)
positive_affect <- na.omit(positive_affect)

## Visualization:
ds$event_positive_affect <- (ds$event_positive_affect)
summary(ds$event_positive_affect)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     2.0     6.0     4.9     7.0     7.0       9 
Display code
ds %>% drop_na(event_positive_affect)%>%
ggplot(aes(x = event_positive_affect))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 4.90", 
                 xintercept = 4.90, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Positive affect about event", 
       y = "Frequency", 
       title = "Positive affect about event: Uganda")+
  theme_bw()

Episodic recall of event

Display code
## Episodic recall:
episodic_recall <- cbind.data.frame(ds$event_episodic_recall_01, ds$event_episodic_recall_02)
episodic_recall <- na.omit(episodic_recall)

names(episodic_recall) <- sub('ds\\$event\\_', '', names(episodic_recall))

mtx <- cor(episodic_recall[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(episodic_recall)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.66      0.66    0.49      0.49 1.9 0.031  6.5 0.66     0.49
Display code
## Visualization:
ds$event_episodic_recall <- ((ds$event_episodic_recall_01+ds$event_episodic_recall_02)/2)

summary(ds$event_episodic_recall)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    2.0     6.0     6.5     6.5     7.0     7.0       8 
Display code
ds %>% drop_na(event_episodic_recall)%>%
ggplot(aes(x = event_episodic_recall))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 6.50", 
                 xintercept = 6.50, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Episodic recall of event", 
       y = "Frequency", 
       title = "Episodic recall of event: Uganda")+
  theme_bw()

Shared perception of event

Display code
## Shared perception:
shared_perception <- cbind.data.frame(ds$event_shared_perception_01, ds$event_shared_perception_02)
shared_perception <- na.omit(shared_perception)

names(shared_perception) <- sub('ds\\$event\\_', '', names(shared_perception))

mtx <- cor(shared_perception[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(shared_perception)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.84      0.85    0.73      0.73 5.5 0.014  5.8 1.5     0.73
Display code
## Visualization:
ds$event_shared_perception <- ((ds$event_shared_perception_01+ds$event_shared_perception_02)/2)
summary(ds$event_shared_perception)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     5.5     6.0     5.8     7.0     7.0       8 
Display code
ds %>% drop_na(event_shared_perception)%>%
ggplot(aes(x = event_shared_perception))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.80", 
                 xintercept = 5.80, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Shared perception of event", 
       y = "Frequency", 
       title = "Shared perception of event: Uganda")+
  theme_bw()

Reflection of event

Display code
## Reflection:
event_reflection <- cbind.data.frame(ds$event_event_reflection_01, ds$event_event_reflection_02)
event_reflection <- na.omit(event_reflection)

names(event_reflection) <- sub('ds\\$event\\_', '', names(event_reflection))

mtx <- cor(event_reflection[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(event_reflection)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
      0.76      0.76    0.62      0.62 3.2 0.022  6.2 0.95     0.62
Display code
## Visualization:
ds$event_event_reflection <- ((ds$event_event_reflection_01+ds$event_event_reflection_02)/2)
summary(ds$event_event_reflection)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     6.0     6.5     6.2     7.0     7.0       7 
Display code
ds %>% drop_na(event_event_reflection)%>%
ggplot(aes(x = event_event_reflection))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 6.20", 
                 xintercept = 6.20, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Reflection of event", 
       y = "Frequency", 
       title = "Reflection of event: Uganda")+
  theme_bw()

Transformative event for individual

Display code
## Reflection:
event_transformative_indiv <- cbind.data.frame(ds$event_transformative_indiv_01, ds$event_transformative_indiv_02)

event_transformative_indiv <- na.omit(event_transformative_indiv)

names(event_transformative_indiv) <- sub('ds\\$event\\_', '', names(event_transformative_indiv))

mtx <- cor(event_transformative_indiv[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
psych::alpha(event_transformative_indiv)

Reliability analysis   
Call: psych::alpha(x = event_transformative_indiv)

  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
      0.61      0.62    0.45      0.45 1.6 0.034  6.1 1.1     0.45

    95% confidence boundaries 
         lower alpha upper
Feldt     0.54  0.61  0.68
Duhachek  0.55  0.61  0.68

 Reliability if an item is dropped:
                        raw_alpha std.alpha G6(smc) average_r  S/N alpha se
transformative_indiv_01      0.37      0.45     0.2      0.45 0.82       NA
transformative_indiv_02      0.55      0.45     0.2      0.45 0.82       NA
                        var.r med.r
transformative_indiv_01     0  0.45
transformative_indiv_02     0  0.45

 Item statistics 
                          n raw.r std.r r.cor r.drop mean  sd
transformative_indiv_01 485  0.82  0.85  0.57   0.45  6.2 1.2
transformative_indiv_02 485  0.88  0.85  0.57   0.45  5.9 1.5

Non missing response frequency for each item
                           1    2    3    4    5    6    7 miss
transformative_indiv_01 0.02 0.02 0.01 0.03 0.05 0.34 0.53    0
transformative_indiv_02 0.02 0.05 0.02 0.06 0.08 0.35 0.42    0
Display code
## Visualization:
ds$event_transformative_indiv <- ((ds$event_transformative_indiv_01+ds$event_transformative_indiv_02)/2)
summary(ds$event_transformative_indiv)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     6.0     6.5     6.1     7.0     7.0      15 
Display code
ds %>% drop_na(event_transformative_indiv)%>%
ggplot(aes(x = event_transformative_indiv))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 6.10", 
                 xintercept = 6.10, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for individual: Uganda")+
  theme_bw()

Transformative event for group

Display code
event_transformative_group <- cbind.data.frame(ds$event_transformative_group_01, ds$event_transformative_group_02)
event_transformative_group <- na.omit(event_transformative_group)

names(event_transformative_group) <- sub('ds\\$event\\_', '', names(event_transformative_group))

mtx <- cor(event_transformative_group[, c(1:2)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("white", "darkred", "red",
               "darkgrey", "blue", "darkblue"))

Display code
## Reliability:
alph01 <- psych::alpha(event_transformative_group)
summary(alph01)

Reliability analysis   
 raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd median_r
      0.88      0.89     0.8       0.8 7.9 0.01  5.4 1.6      0.8
Display code
## Visualization:
ds$event_transformative_group <- ((ds$event_transformative_group_01+ds$event_transformative_group_02)/2)
summary(ds$event_transformative_group)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    1.0     4.5     6.0     5.4     6.5     7.0       8 
Display code
ds %>% drop_na(event_transformative_group)%>%
ggplot(aes(x = event_transformative_group))+
  geom_histogram(color = "black",
                 fill = "gray",
                 bins = 50)+
  geom_textvline(label = "Mean = 5.40", 
                 xintercept = 5.40, 
                 vjust = 1.1, 
                 lwd = 1.05, 
                 linetype = 2)+
  labs(x = "Transformative event score", 
       y = "Frequency", 
       title = "Transformative event for group: Uganda")+
  theme_bw()

Correlation between seven subscales

Display code
imagistic_subscales <- cbind.data.frame(ds$event_positive_affect, ds$event_negative_affect,
                                        ds$event_episodic_recall, 
                                        ds$event_shared_perception, ds$event_event_reflection, 
                                        ds$event_transformative_indiv, ds$event_transformative_group)


imagistic_subscales <- na.omit(imagistic_subscales)

names(imagistic_subscales) <- sub('ds\\$event\\_', '', names(imagistic_subscales))

mtx <- cor(imagistic_subscales[, c(1:6)])

## Correlation plot:
corrplot(mtx, method = "number", number.cex = 0.7,
         col=c("darkred", "red", "red",
               "darkgrey", "blue", "darkblue"))

Section 18. Regression models: Relationship between imagistic experience and IG/OG fusion/identification

Display code
## Four regression models predicting IG/OG Fusion/Identification:

lm01 <- lm(IG_Fusion~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)

lm02 <- lm(IG_Identification~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)

lm03 <- lm(OG_Fusion~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)

lm04 <- lm(OG_Identification~event_positive_affect+event_negative_affect+event_episodic_recall+event_shared_perception+event_event_reflection+
event_transformative_indiv+event_transformative_group+Age+Female+Married+`SES-`,
           data = ds)
Display code
## Tabulated results:

stargazer(lm01, lm02,
          lm03, lm04,
          type = "html", 
          star.cutoffs = c(0.05, 0.01, 0.001),
          out = "table1.html")
Display code
htmltools::includeHTML("table1.html")
Dependent variable:
IG_FusionIG_IdentificationOG_FusionOG_Identification
(1)(2)(3)(4)
event_positive_affect0.010-0.016-0.130**-0.110*
(0.022)(0.021)(0.045)(0.047)
event_negative_affect-0.022-0.009-0.063-0.040
(0.018)(0.017)(0.037)(0.039)
event_episodic_recall0.280***0.190***0.2000.170
(0.055)(0.052)(0.110)(0.120)
event_shared_perception0.0220.090**-0.004-0.041
(0.032)(0.030)(0.065)(0.066)
event_event_reflection0.091-0.002-0.060-0.061
(0.047)(0.043)(0.096)(0.098)
event_transformative_indiv0.0410.100**0.0960.120
(0.037)(0.034)(0.074)(0.077)
event_transformative_group0.0630.017-0.026-0.017
(0.033)(0.031)(0.067)(0.070)
Age-0.0010.0004-0.001-0.004
(0.001)(0.001)(0.002)(0.005)
Female0.0720.0460.1600.140
(0.069)(0.065)(0.140)(0.150)
Married-0.0890.0200.1200.067
(0.082)(0.077)(0.170)(0.170)
`SES-`Middle-0.0590.0220.1300.130
(0.078)(0.072)(0.160)(0.160)
`SES-`Upper middle-0.170-0.150-0.400-0.900*
(0.200)(0.200)(0.400)(0.430)
Constant3.300***3.900***2.400**2.600**
(0.400)(0.380)(0.820)(0.850)
Observations432432428434
R20.1800.1600.0480.049
Adjusted R20.1500.1400.0200.022
Residual Std. Error0.700 (df = 419)0.660 (df = 419)1.400 (df = 415)1.500 (df = 421)
F Statistic7.500*** (df = 12; 419)6.600*** (df = 12; 419)1.700 (df = 12; 415)1.800* (df = 12; 421)
Note:*p<0.05; **p<0.01; ***p<0.001

Section 19. Text Analysis

Display code
ds02 <- fread("~/Desktop/oxford/data/uganda/Ethnic Data Uganda.csv")

ds02$education <- ds02$`Recoded Education`
ds02$ethnicity <- ds02$`Recoded Category`

ds02a <- ds02[, c("Entry", "education", "ethnicity")]

ds <- merge(ds, ds02a, by="Entry")

ds$description <- ds$`Q8D. Description`

ds$ethnicity <- ifelse(ds$ethnicity == "luo", "Luo",
                ifelse(ds$ethnicity == "N/A", NA,
                ifelse(ds$ethnicity == "Nilo-Ham", "Nilotic",
                ifelse(ds$ethnicity == "Nubis", "Nilotic", ds$ethnicity))))

table(ds$ethnicity)

  Bantu     Luo Nilotic 
     66     202     229 
Display code
## Import text related datasets:
bing <- as.data.table(get_sentiments("bing"))
stopwords <- as.data.table(stop_words)

## Corpus of most common words:
corpus1 <- ds %>%
  unnest_tokens(word, description)

corpus1 <- as.data.table(corpus1)
stopwords <- as.data.table(stopwords)

## Remove stop words:
corpus1 <- corpus1[!stopwords, on = "word"]

## Merge corpus with data:
df2 <- merge(corpus1, bing, by = "word")

Word cloud: Full sample

Display code
a <- sort(table(corpus1$word), decreasing = T)

tblwc <- as.data.table(a)
head(tblwc, n = 15)
           V1   N
 1:    church 327
 2:      life 234
 3:    people 226
 4:       day 223
 5:      home 178
 6:    school 147
 7:  children 144
 8:   husband 134
 9:    family 128
10:     event 113
11:   married 112
12:    forget 107
13:     money 107
14:      time 105
15: community 101
Display code
wordcloud(words = tblwc$V1,
                 freq = tblwc$N, 
                 min.freq = 1,
                 max.words=300,
                 random.order=F, 
                 rot.per=0.35,
                 colors=brewer.pal(8, "Dark2"))

Word cloud: Bantu

Display code
corpus2 <- corpus1[corpus1$ethnicity=="Bantu",]
a <- sort(table(corpus2$word), decreasing = T)

tblwc <- as.data.table(a)
head(tblwc, n = 15)
           V1  N
 1:    church 44
 2:      life 32
 3:  children 29
 4:       day 28
 5:      home 28
 6:    people 21
 7:    school 21
 8:   husband 20
 9:     money 18
10:      time 17
11:       god 16
12:    forget 15
13:       lot 15
14:   started 15
15: community 14
Display code
wordcloud(words = tblwc$V1,
                 freq = tblwc$N, 
                 min.freq = 1,
                 max.words=300,
                 random.order=F, 
                 rot.per=0.35,
                 colors=brewer.pal(8, "Dark2"))

Word cloud: Luo

Display code
corpus2 <- corpus1[corpus1$ethnicity=="Luo",]
a <- sort(table(corpus2$word), decreasing = T)

tblwc <- as.data.table(a)
head(tblwc, n = 15)
          V1   N
 1:   church 123
 2:      day 104
 3:   people  88
 4:     life  77
 5:     home  66
 6:     time  52
 7:   forget  51
 8:   school  49
 9:    event  47
10:   family  47
11:  married  46
12:    money  46
13: children  44
14:      lot  41
15:    happy  39
Display code
wordcloud(words = tblwc$V1,
                 freq = tblwc$N, 
                 min.freq = 1,
                 max.words=300,
                 random.order=F, 
                 rot.per=0.35,
                 colors=brewer.pal(8, "Dark2"))

Word cloud: Nilotic

Display code
corpus2 <- corpus1[corpus1$ethnicity=="Nilotic",]
a <- sort(table(corpus2$word), decreasing = T)

tblwc <- as.data.table(a)
head(tblwc, n = 15)
           V1   N
 1:    church 156
 2:      life 120
 3:    people 115
 4:       day  89
 5:      home  82
 6:    school  75
 7:   husband  73
 8:  children  68
 9:    family  66
10:     event  51
11: community  49
12:   married  48
13:       god  41
14:     money  41
15:    forget  40
Display code
wordcloud(words = tblwc$V1,
                 freq = tblwc$N, 
                 min.freq = 1,
                 max.words=300,
                 random.order=F, 
                 rot.per=0.35,
                 colors=brewer.pal(8, "Dark2"))

Word count

Display code
df2 <- as.data.table(df2)

## Positive - Negative words per entry:
df2 <- df2[, .(positive = sum(sentiment == "positive"),
               negative = sum(sentiment == "negative")),
          by = "Entry"]

df2 <- df2[, sentiment_raw := (positive - negative)]

df3 <- merge(ds, df2, by = "Entry")

## Create wordcount variable:
df3$wordcount <- stringi::stri_count_words(df3$description)

df3 <- as.data.table(df3)

## Standardized sentiment score for each entry:
df3 <- df3[, sentiment := (sentiment_raw / wordcount)]
ds <- df3

ds1 <- ds %>% drop_na(ethnicity)

## Boxplot:

boxp1 <- ggplot(data = ds1, 
              aes(x = ethnicity, 
                  y = wordcount))+ 
    geom_boxplot()+
#    ylim(-0.1, 0.1)+
labs(x="Ethnicity",
       y = "Word count",
       title = "Word count by ethnicity: Uganda")+
  theme_bw()

boxp1

Display code
## Density plot

dp1 <- ggplot(data = ds1, 
              aes(x = wordcount, 
                  group = ethnicity, 
                  fill = ethnicity)) +
      geom_density(adjust=1.5, alpha=.4)+
      geom_vline(aes(xintercept=0),
               color="black",
               size=0.75, 
               linetype=4)+
      #xlim(500, 500)+
      labs(x = "Word count",
           y = "Density",
           title = "Word count by ethnicity: Uganda",
           caption = "", 
           fill = "Ethnicity:")+
        theme_bw()+
      scale_fill_colorblind()

dp1

Sentiment analysis

Display code
## Boxplot:

boxp1 <- ggplot(data = ds1, 
              aes(x = ethnicity, 
                  y = sentiment))+ 
    geom_boxplot()+
    ylim(-0.1, 0.1)+
    geom_hline(aes(yintercept=0),
               color="red",
               size=0.5)+ 
  labs(x="Ethnicity",
       y = "Standardized Bing Sentiment Score",
       title = "Sentiment score by ethnicity: Uganda")+
  theme_bw()

boxp1

Display code
## Density plot

dp01 <- ggplot(data = ds1, 
              aes(x = sentiment, 
                  group = ethnicity, 
                  fill = ethnicity)) +
      geom_density(adjust=1.5, alpha=.4)+
      geom_vline(aes(xintercept=0),
               color="black",
               size=0.75, 
               linetype=4)+
      labs(x = "Standardized Bing Sentiment Score",
           y = "Density",
           title = "Sentiment score by ethnicity: Uganda",
           caption = "", 
           fill = "Ethnicity:")+
        theme_bw()+
      scale_fill_colorblind()

dp01

Display code
## Boxplot:

boxp02 <- ds1 %>% drop_na(gender) %>%
  ggplot(aes(x = gender, 
             y = sentiment))+ 
    geom_boxplot()+
    ylim(-0.15, 0.15)+
    geom_hline(aes(yintercept=0),
               color="red",
               size=0.5)+ 
  labs(x = "Gender",
       y = "Standardized Bing Sentiment Score",
       title = "Sentiment score by gender: Uganda")+
  theme_bw()

boxp02

Display code
## Density plot

dp02 <- ds1 %>% drop_na(gender) %>%
  ggplot(aes(x = sentiment, 
             group = gender, 
             fill = gender)) +
      geom_density(adjust=1.5, alpha=.4)+
      geom_vline(aes(xintercept=0),
               color="black",
               size=0.75, 
               linetype=4)+
      labs(x = "Standardized Bing Sentiment Score",
           y = "Density",
           title = "Sentiment score by gender: Uganda",
           caption = "", 
           fill = "Gender:")+
        theme_bw()+
      scale_fill_colorblind()

dp02