Data Analysis Flores and Sulmont

Import Data

library(haven)
library(car)

## Loading required package: carData

library(SDMTools)
library(ggplot2)
library(DescTools)

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:car':
## 
##     Recode

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(reshape2)


lapop_mx <- read_dta("http://datasets.americasbarometer.org/database/files/275973272Mexico%20LAPOP%20AmericasBarometer%202017%20V1.0_W.dta")

lapop_pe <- read_dta("http://datasets.americasbarometer.org/database/files/83639715Peru_LAPOP_AmericasBarometer_2017_V1.1_W.dta")

Special Functions

## Cross tabulation function: tab_cruzada

tab_cruzada <- function(tabla, pc="s"){
  tab.0 <- tabla
  if(pc=="s"){
    tab.1 <- addmargins(tab.0, 1, FUN = list(list(TOTALc = sum)))
    tab.1 <- addmargins(tab.1, 2, FUN = list(list(TOTALf = sum)))
    return(tab.1)}
  if (pc=="col"){
    tab.2 <- addmargins(tab.0, 2, FUN = list(list(TOTAL = sum)))
    tab.2 <- round(prop.table(tab.2, 2)*100,2)
    tab.2 <- round(addmargins(tab.2, 1, FUN = list(list(TOTAL = sum))),1)
    tab.0 <- tab.0
    tab.0 <- addmargins(tab.0, 2, FUN = list(list(TOTAL = sum)))
    tab.0 <- addmargins(tab.0, 1, FUN = list(list(Nvalid = sum)))
    tab.2 <- rbind(tab.2, tail(tab.0, 1))
    return(tab.2)}
  if (pc=="fila"){
    tab.2 <- addmargins(tab.0, 1, FUN = list(list(TOTAL = sum)))
    tab.2 <- round(prop.table(tab.2, 1)*100,2)
    tab.2 <- round(addmargins(tab.2, 2, FUN = list(list(TOTAL = sum))),1)
    tab.0 <- tab.0
    tab.0.1 <- addmargins(tab.0, 1, FUN = list(list(TOTAL = sum)))
    tab.0.1 <- addmargins(tab.0.1, 2, FUN = list(list(Nvalid = sum)))
    tab.2 <- cbind(tab.2, Nvalid=tab.0.1[, "Nvalid"])
    return(tab.2)}
}

# Stargazer 2 function
# from: https://cimentadaj.github.io/blog/2016-08-22-producing-stargazer-tables-with-odds-ratios-and-standard-errors-in-r/producing-stargazer-tables-with-odds-ratios-and-standard-errors-in-r/

stargazer2 <- function(model, odd.ratio = F, ...) {
  if(!("list" %in% class(model))) model <- list(model)
    
  if (odd.ratio) {
    coefOR2 <- lapply(model, function(x) exp(coef(x)))
    seOR2 <- lapply(model, function(x) exp(coef(x)) * summary(x)$coef[, 2])
    p2 <- lapply(model, function(x) summary(x)$coefficients[, 4])
    stargazer(model, coef = coefOR2, se = seOR2, p = p2, ...)
    
  } else {
    stargazer(model, ...)
  }
}

Recode Data

Experimental conditions

lapop_pe$treat <- c()
lapop_pe$treat[!is.na(lapop_pe$iiet1)] <- 1
lapop_pe$treat[!is.na(lapop_pe$iiet2)] <- 2
lapop_pe$treat[!is.na(lapop_pe$iiet3)] <- 3

lapop_mx$treat <- c()
lapop_mx$treat[!is.na(lapop_mx$mexiiet1)] <- 1
lapop_mx$treat[!is.na(lapop_mx$mexiiet2)] <- 2
lapop_mx$treat[!is.na(lapop_mx$mexiiet3)] <- 3

lapop_pe$treat <- factor(lapop_pe$treat, 
                         labels = c("Neutral", "Benefits", "Stigma"))


lapop_mx$treat <- factor(lapop_mx$treat, 
                         labels = c("Neutral", "Benefits", "Stigma"))

Experimental conditions Mexico: Valid cases

## 
##  Neutral Benefits   Stigma     <NA> 
##      490      505      520       48

Experimental conditions Peru: Valid cases

## 
##  Neutral Benefits   Stigma     <NA> 
##      791      849      832      175

Indigenous / non indigenous in experimental conditions

lapop_pe$indig <- c()
lapop_pe$indig[lapop_pe$iiet1 == 1] <- 1
lapop_pe$indig[lapop_pe$iiet1 == 2] <- 0
lapop_pe$indig[lapop_pe$iiet2 == 1] <- 1
lapop_pe$indig[lapop_pe$iiet2 == 2] <- 0
lapop_pe$indig[lapop_pe$iiet3 == 1] <- 1
lapop_pe$indig[lapop_pe$iiet3 == 2] <- 0

lapop_mx$indig <- c()
lapop_mx$indig[lapop_mx$mexiiet1 == 1] <- 1
lapop_mx$indig[lapop_mx$mexiiet1 == 2] <- 0
lapop_mx$indig[lapop_mx$mexiiet2 == 1] <- 1
lapop_mx$indig[lapop_mx$mexiiet2 == 2] <- 0
lapop_mx$indig[lapop_mx$mexiiet3 == 1] <- 1
lapop_mx$indig[lapop_mx$mexiiet3 == 2] <- 0

lapop_pe$indig <- factor(lapop_pe$indig, 
                         labels = c("Non indigenous", "Indigenous"))

lapop_mx$indig <- factor(lapop_mx$indig, 
                         labels = c("Non indigenous", "Indigenous"))

Cross tabulation Mexico: Experimental condition

tab_cruzada(xtabs(~indig+treat, data = lapop_mx))

##                 treat
## indig            Neutral Benefits Stigma TOTALf
##   Non indigenous     251      264    276    791
##   Indigenous         239      241    244    724
##   TOTALc             490      505    520   1515

Cross tabulation Peru: Experimental condition

tab_cruzada(xtabs(~indig+treat, data = lapop_pe))

##                 treat
## indig            Neutral Benefits Stigma TOTALf
##   Non indigenous     261      376    279    916
##   Indigenous         530      473    553   1556
##   TOTALc             791      849    832   2472

Etnic question LAPOP (etid)

lapop_mx$etnic <- lapop_mx$etid
lapop_mx$etnic[is.na(lapop_mx$etid)] <- 9

lapop_mx$etnic <- factor(lapop_mx$etnic, labels = c("White","Mestizo", "Indigenous",
                                                  "Black", "Mulatto", "Other", "DK/NA"))

library(car)

lapop_pe$etnic <- recode(lapop_pe$etid, "1110 = 3; 1111=4; 1112=5; 4=6; 5=7; 1113=8; 
                         1106=9; 7=10; NA = 11")


lapop_pe$etnic <- factor(lapop_pe$etnic, labels = c("White","Mestizo", "Quechua", "Aymara",
                                                    "Amazonian", "Black", "Mulatto", 
                                                    "Zambo", "Oriental", "Other", "DK/NA"))

lapop_pe$etnic2 <- factor(recode(lapop_pe$etid, "1110:1112 = 3; 5=4; 
                                 1113=4; 1106=5; 7=5; NA=6"), 
                          labels = c("White","Mestizo", "Indigenous", "Afro", "Other",
                                     "DK/NA"))

table(lapop_mx$etid, exclude = NULL)

## 
##    1    2    3    4    5    7 <NA> 
##  279  697  158   35   31   95  268

table(lapop_pe$etid, exclude = NULL)

## 
##    1    2    4    5    7 1106 1110 1111 1112 1113 <NA> 
##  280 1504   52   26  139   17  330   51   54   31  163

lapop_mx$etnic_ind <- factor(recode(lapop_mx$etid, "3=1; NA=NA; else = 0"), 
                             labels = c("ETID - Non Indig.", "ETID - Indig"))


lapop_pe$etnic_ind <- factor(recode(lapop_pe$etid, "1110:1112=1; NA=NA; else = 0"), 
                             labels = c("ETID - Non Indig.", "ETID - Indig"))

Table etnic question Mexico

## 
##      White    Mestizo Indigenous      Black    Mulatto      Other 
##        279        697        158         35         31         95 
##      DK/NA 
##        268

Table etnic question Peru (original)

## 
##     White   Mestizo   Quechua    Aymara Amazonian     Black   Mulatto 
##       280      1504       330        51        54        52        26 
##     Zambo  Oriental     Other     DK/NA 
##        31        17       139       163

Table etnic question Peru (grouped)

## 
##      White    Mestizo Indigenous       Afro      Other      DK/NA 
##        280       1504        435        109        156        163

Other variables

lapop_mx$gender <- factor(lapop_mx$q1, labels = c("Male", "Female"))
lapop_pe$gender <- factor(lapop_pe$q1, labels = c("Male", "Female"))

lapop_mx$comsize <- factor(lapop_mx$tamano, 
                           labels = c("Metropolitan area", "Large city", "Medium city",
                                      "Small city", "Rural area"))

lapop_pe$comsize <- factor(lapop_pe$tamano, 
                           labels = c("Metropolitan area", "Large city", "Medium city",
                                      "Small city", "Rural area"))

lapop_mx$skin2 <- factor(recode(lapop_mx$colorr, "1:5 = 1; 6:11 = 2"), 
                         label = c("Ligth/Medium", "Dark"))


lapop_pe$skin2 <- factor(recode(lapop_pe$colorr, "1:5 = 1; 6:11 = 2"), 
                         labels = c("Ligth/Medium", "Dark"))

table(lapop_pe$q10new)

## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 
##  78 258 196 190 109 120 163 136 147  93 117  68 144 155 142 113 205

lapop_pe$income <- factor(recode(lapop_pe$q10new, "lo:7=1; 8:hi=2"),
                          labels = c("Low", "Medium/High"))

lapop_mx$income <- factor(recode(lapop_mx$q10new, "lo:7=1; 8:hi=2"),
                          labels = c("Low", "Medium/High"))

Descriptive Statistics

Using sample weigths

Mexico

Ethnoracial identity (%)

## etnic
##      White    Mestizo Indigenous      Black    Mulatto      Other 
##  17.850288  44.593730  10.108765   2.239283   1.983365   6.078055 
##      DK/NA 
##  17.146513

Skin color (%)

## skin2
## Ligth/Medium         Dark 
##     82.46961     17.53039

Age (mean)

## [1] 40.57006

Gender (%)

## gender
##     Male   Female 
## 50.41587 49.58413

Community size (%)

## comsize
## Metropolitan area        Large city       Medium city        Small city 
##         19.961612         35.444658         23.800384         18.490083 
##        Rural area 
##          2.303263

Education (mean)

## [1] 9.37991

Household income (mean)

## [1] 7.328757

Household income (%)

## income
##         Low Medium/High 
##     54.1185     45.8815

Peru

Ethnoracial identity (%)

## etnic
##      White    Mestizo    Quechua     Aymara  Amazonian      Black 
## 10.9602700 57.8386567 12.9874091  2.4946033  1.2701834  1.4659675 
##    Mulatto      Zambo   Oriental      Other      DK/NA 
##  1.1157083  1.1626838  0.4336345  4.5288092  5.7420741

Ethnoracial identity - grouped (%)

## etnic2
##      White    Mestizo Indigenous       Afro      Other      DK/NA 
##  10.960270  57.838657  16.752196   3.744360   4.962444   5.742074

Skin color (%)

## skin2
## Ligth/Medium         Dark 
##     75.85023     24.14977

Age (mean)

## [1] 38.84493

Gender (%)

## gender
##     Male   Female 
## 50.29878 49.70122

Community size (%)

## comsize
## Metropolitan area        Large city       Medium city        Small city 
##          33.60000          19.11944          11.42716          12.65339 
##        Rural area 
##          23.20000

Education (mean)

## [1] 11.6587

Household income (mean)

## [1] 8.599625

Household income (%)

## income
##         Low Medium/High 
##    43.68396    56.31604

Experiment: Indigenous identification by treatment

Mexico

mx_exptab1 <- xtabs(wt~indig+treat, data = lapop_mx)
mx_exptab2 <- tab_cruzada(mx_exptab1, pc="col")
mx_exptab2

##                Neutral Benefits Stigma  TOTAL
## Non indigenous    51.2     52.3   53.1   52.2
## Indigenous        48.8     47.7   46.9   47.8
## TOTAL            100.0    100.0  100.0  100.0
## Nvalid           490.0    505.0  520.0 1515.0

chisq.test(mx_exptab1)

## 
##  Pearson's Chi-squared test
## 
## data:  mx_exptab1
## X-squared = 0.34828, df = 2, p-value = 0.8402

Peru

pe_exptab1 <- xtabs(wt~indig+treat, data = lapop_pe)
pe_exptab2 <- tab_cruzada(pe_exptab1, pc="col")
pe_exptab2

##                 Neutral Benefits   Stigma    TOTAL
## Non indigenous  32.2000  45.9000  34.5000   37.700
## Indigenous      67.8000  54.1000  65.5000   62.300
## TOTAL          100.0000 100.0000 100.0000  100.000
## Nvalid         788.1837 855.1904 828.1031 2471.477

chisq.test(pe_exptab1)

## 
##  Pearson's Chi-squared test
## 
## data:  pe_exptab1
## X-squared = 38.375, df = 2, p-value = 4.645e-09

Graph 1

# Data for Peru
dat1 <- as.data.frame(pe_exptab2)
dat2 <- as.data.frame(t(dat1))

colnames(dat2) <- c("non.indig", "indig", "total", "nvalid")

dat2$id <- row.names(dat2)
dat2$pais <- c("Peru")

# Data for Mexico
dat1.mx <- as.data.frame(mx_exptab2)
dat2.mx <- as.data.frame(t(dat1.mx))

colnames(dat2.mx) <- c("non.indig", "indig", "total", "nvalid")

dat2.mx$id <- row.names(dat2.mx)
dat2.mx$pais <- c("Mexico")

# Data frame for graph 1

dat3 <- rbind(dat2, dat2.mx)
dat3$ci <- sqrt((dat3$indig*dat3$non.indig)/dat3$nvalid)*2
dat3.b <- subset(dat3, id != "TOTAL")

# Graph 1

graph.1 <- ggplot(dat3.b, aes(x=id, y=indig)) + geom_point(stat = "identity") + facet_grid(.~pais) +
  geom_errorbar(aes(ymin=indig-ci, ymax=indig+ci), width = 0.2) +
  ylab("% Indigenous Identification") + xlab("Treatment") + ylim(20,80) +
  scale_x_discrete(limits=c("Neutral", "Benefits", "Stigma")) +
  ggtitle("Indigenous identification by treatment condition - 95% CI") +
  theme_bw()

graph.1

Regression models table 4

mexico.m1 <- glm(indig ~ treat + q2 + gender + ed + comsize, data = lapop_mx,
               family = binomial)

mexico.m2 <- glm(indig ~ treat + q2 + gender + ed + comsize + q10new, data = lapop_mx,
               family = binomial)

peru.m1 <- glm(indig ~ treat + q2 + gender + ed + comsize, 
                family = binomial, data = lapop_pe)

peru.m2 <- glm(indig ~ treat + q2 + gender + ed + comsize + q10new, family = binomial, 
               data = lapop_pe)


models <- list(mexico.m1, mexico.m2, peru.m1, peru.m2)

# Output in Odds Ratio

stargazer2(models, odd.ratio = T, type = "text",
           column.labels=c("Mexico", "Mexico", "Peru", "Peru"))

## 
## =============================================================
##                               Dependent variable:            
##                    ------------------------------------------
##                                      indig                   
##                      Mexico    Mexico      Peru       Peru   
##                       (1)        (2)       (3)        (4)    
## -------------------------------------------------------------
## treatBenefits        0.903      0.974    0.611***   0.624*** 
##                     (0.118)    (0.135)   (0.064)    (0.068)  
##                                                              
## treatStigma          0.902      0.959     0.967      0.969   
##                     (0.116)    (0.132)   (0.104)    (0.108)  
##                                                              
## q2                   0.996      0.997    0.993**    0.993**  
##                     (0.004)    (0.004)   (0.003)    (0.003)  
##                                                              
## genderFemale         0.893      0.870    0.735***   0.726*** 
##                     (0.094)    (0.100)   (0.064)    (0.067)  
##                                                              
## ed                  0.943***   0.970*     0.981*     0.983   
##                     (0.013)    (0.015)   (0.011)    (0.013)  
##                                                              
## comsizeLarge city    1.065      1.139     0.997      0.961   
##                     (0.157)    (0.179)   (0.144)    (0.146)  
##                                                              
## comsizeMedium city   1.321*    1.354*    1.518**    1.485**  
##                     (0.211)    (0.230)   (0.247)    (0.254)  
##                                                              
## comsizeSmall city   2.164***  1.911***   2.244***   2.089*** 
##                     (0.373)    (0.351)   (0.337)    (0.333)  
##                                                              
## comsizeRural area   2.308**    2.822**   1.346**     1.222   
##                     (0.879)    (1.193)   (0.158)    (0.157)  
##                                                              
## q10new                        0.941***               0.991   
##                                (0.012)              (0.010)  
##                                                              
## Constant             1.682*    1.855**   2.944***   3.325*** 
##                     (0.475)    (0.573)   (0.727)    (0.879)  
##                                                              
## -------------------------------------------------------------
## Observations         1,506      1,355     2,431      2,264   
## Log Likelihood     -1,014.763 -901.811  -1,560.628 -1,450.496
## Akaike Inf. Crit.  2,049.525  1,825.622 3,141.256  2,922.993 
## =============================================================
## Note:                             *p<0.1; **p<0.05; ***p<0.01

# Pseudo R2
PseudoR2(mexico.m1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.03230237 0.05852759

PseudoR2(mexico.m2, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.1400153  0.2474853

PseudoR2(peru.m1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.04236007 0.07477643

PseudoR2(peru.m2, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.1099397  0.1918555

Graph 2

# Data for Peru

dg2.per <- as.data.frame(xtabs(wt~etnic_ind + treat + indig, data=lapop_pe))
dg2.per2 <- dcast(dg2.per, etnic_ind + treat ~ indig)

## Using Freq as value column: use value.var to override.

colnames(dg2.per2) <- c("etnic_ind", "treat", "non.indig.exp", "indig.exp")
dg2.per2$total <- dg2.per2$non.indig.exp+dg2.per2$indig.exp
dg2.per2$pc_indig <- dg2.per2$indig.exp/dg2.per2$total*100
dg2.per2$ci_pc <- sqrt((dg2.per2$pc_indig*(100-dg2.per2$pc_indig))/dg2.per2$total)*2

dg2.per2$pais <- c("Peru")

# Data for Mexico

dg2.mx <- as.data.frame(xtabs(wt~etnic_ind + treat + indig, data=lapop_mx))
dg2.mx2 <- dcast(dg2.mx, etnic_ind + treat ~ indig)

## Using Freq as value column: use value.var to override.

colnames(dg2.mx2) <- c("etnic_ind", "treat", "non.indig.exp", "indig.exp")
dg2.mx2$total <- dg2.mx2$non.indig.exp+dg2.mx2$indig.exp
dg2.mx2$pc_indig <- dg2.mx2$indig.exp/dg2.mx2$total*100
dg2.mx2$ci_pc <- sqrt((dg2.mx2$pc_indig*(100-dg2.mx2$pc_indig))/dg2.mx2$total)*2

dg2.mx2$pais <- c("Mexico")


# Data for graph 2

data.g2 <- rbind(dg2.mx2, dg2.per2)

Graph2 - Version 1

graph2.v1 <- ggplot(data.g2, aes(x= treat, y=pc_indig, shape = etnic_ind)) + geom_point(position=position_dodge(0.5),
                                                                           size = 3) +
  facet_grid(.~pais) + geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), 
                                     position = position_dodge(0.5), width = 0.2) +
  ylim(20,110) + xlab("Treatment") + ylab("% Indigenous Identification (experiment)") +
  scale_shape_discrete(name="Indigenous ethnicity (ETID)") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by indigenous ethnicity (ETID), 95% CI") +
  theme_bw()

graph2.v1

Graph 2 - version 2

graph2_v2 <- ggplot(data.g2, aes(x= etnic_ind, y=pc_indig)) + geom_point() +
  facet_grid(pais~treat) + 
  geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), width = 0.2) +
  ylab("% Indigenous Identification (experiment)") + xlab("Indigenous ethnicity (ETID)") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by indigenous ethnicity (ETID), 95% CI") +
  theme_bw()

graph2_v2

Graph 3

# Data for Peru

dg3.per <- as.data.frame(xtabs(wt~skin2 + treat + indig, data=lapop_pe))
dg3.per2 <- dcast(dg3.per, skin2 + treat ~ indig)

## Using Freq as value column: use value.var to override.

colnames(dg3.per2) <- c("skin", "treat", "non.indig.exp", "indig.exp")
dg3.per2$total <- dg3.per2$non.indig.exp+dg3.per2$indig.exp
dg3.per2$pc_indig <- dg3.per2$indig.exp/dg3.per2$total*100
dg3.per2$ci_pc <- sqrt((dg3.per2$pc_indig*(100-dg3.per2$pc_indig))/dg3.per2$total)*2

dg3.per2$pais <- c("Peru")



# Data for Mexico

dg3.mx <- as.data.frame(xtabs(wt~skin2 + treat + indig, data=lapop_mx))
dg3.mx2 <- dcast(dg3.mx, skin2 + treat ~ indig)

## Using Freq as value column: use value.var to override.

colnames(dg3.mx2) <- c("skin", "treat", "non.indig.exp", "indig.exp")
dg3.mx2$total <- dg3.mx2$non.indig.exp+dg3.mx2$indig.exp
dg3.mx2$pc_indig <- dg3.mx2$indig.exp/dg3.mx2$total*100
dg3.mx2$ci_pc <- sqrt((dg3.mx2$pc_indig*(100-dg3.mx2$pc_indig))/dg3.mx2$total)*2

dg3.mx2$pais <- c("Mexico")


# Data for graph 3

data.g3 <- rbind(dg3.mx2, dg3.per2)

Graph3 - Version 1

graph3.v1 <- ggplot(data.g3, aes(x= treat, y=pc_indig, shape = skin)) + 
  geom_point(position=position_dodge(0.5), size = 3) +
  facet_grid(.~pais) + geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), 
                                     position = position_dodge(0.5), width = 0.2) +
  ylim(20,110) + xlab("Treatment") + ylab("% Indigenous Identification (experiment)") +
  scale_shape_discrete(name="Skin color") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by skin color, 95% CI") +
  theme_bw()

graph3.v1

Graph 3 - version 2

graph3_v2 <- ggplot(data.g3, aes(x= skin, y=pc_indig)) + geom_point() +
  facet_grid(pais~treat) + 
  geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), width = 0.2) +
  ylab("% Indigenous Identification (experiment)") + xlab("Skin color") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by skin color, 95% CI") +
  theme_bw()

graph3_v2

Graph 3 - version 3

graph3_v3 <- ggplot(data.g3, aes(x= treat, y=pc_indig)) + geom_point() +
  facet_grid(pais~skin) + 
  geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), width = 0.2) +
  ylab("% Indigenous Identification (experiment)") + xlab("Treatment") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by skin color, 95% CI") +
  theme_bw()

graph3_v3

Graph 4

# Data for Peru

dg4.per <- as.data.frame(xtabs(wt~income + treat + indig, data=lapop_pe))
dg4.per2 <- dcast(dg4.per, income + treat ~ indig)

## Using Freq as value column: use value.var to override.

colnames(dg4.per2) <- c("income", "treat", "non.indig.exp", "indig.exp")
dg4.per2$total <- dg4.per2$non.indig.exp+dg4.per2$indig.exp
dg4.per2$pc_indig <- dg4.per2$indig.exp/dg4.per2$total*100
dg4.per2$ci_pc <- sqrt((dg4.per2$pc_indig*(100-dg4.per2$pc_indig))/dg4.per2$total)*2

dg4.per2$pais <- c("Peru")

dg4.per2

##        income    treat non.indig.exp indig.exp    total pc_indig    ci_pc
## 1         Low  Neutral       94.6917  212.2539 306.9456 69.15033 5.272562
## 2         Low Benefits      131.0505  204.6880 335.7385 60.96650 5.324684
## 3         Low   Stigma       92.0165  212.0250 304.0415 69.73554 5.269351
## 4 Medium/High  Neutral      132.4274  284.2399 416.6673 68.21748 4.562231
## 5 Medium/High Benefits      218.1782  227.3581 445.5363 51.03022 4.736595
## 6 Medium/High   Stigma      163.8911  284.9147 448.8058 63.48285 4.545456
##   pais
## 1 Peru
## 2 Peru
## 3 Peru
## 4 Peru
## 5 Peru
## 6 Peru

# Data for Mexico

dg4.mx <- as.data.frame(xtabs(wt~income + treat + indig, data=lapop_mx))
dg4.mx2 <- dcast(dg4.mx, income + treat ~ indig)

## Using Freq as value column: use value.var to override.

colnames(dg4.mx2) <- c("income", "treat", "non.indig.exp", "indig.exp")
dg4.mx2$total <- dg4.mx2$non.indig.exp+dg4.mx2$indig.exp
dg4.mx2$pc_indig <- dg4.mx2$indig.exp/dg4.mx2$total*100
dg4.mx2$ci_pc <- sqrt((dg4.mx2$pc_indig*(100-dg4.mx2$pc_indig))/dg4.mx2$total)*2

dg4.mx2$pais <- c("Mexico")

# Data for graph 3

data.g4 <- rbind(dg4.mx2, dg4.per2)

Graph4 - Version 1

graph4.v1 <- ggplot(data.g4, aes(x= treat, y=pc_indig, shape = income)) + 
  geom_point(position=position_dodge(0.5), size = 3) +
  facet_grid(.~pais) + geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), 
                                     position = position_dodge(0.5), width = 0.2) +
  ylim(20,110) + xlab("Treatment") + ylab("% Indigenous Identification (experiment)") +
  scale_shape_discrete(name="Income level") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by income level, 95% CI") +
  theme_bw()

graph4.v1

Graph 4 - version 2

graph4_v2 <- ggplot(data.g4, aes(x= income, y=pc_indig)) + geom_point() +
  facet_grid(pais~treat) + 
  geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), width = 0.2) +
  ylab("% Indigenous Identification (experiment)") + xlab("Income level") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by income level, 95% CI") +
  theme_bw()

graph4_v2

Graph 4 - version 3

graph4_v3 <- ggplot(data.g4, aes(x= treat, y=pc_indig)) + geom_point() +
  facet_grid(pais~income) + 
  geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), width = 0.2) +
  ylab("% Indigenous Identification (experiment)") + xlab("Treatment") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by income level, 95% CI") +
  theme_bw()

graph4_v3

Table A2 Models

t4_mx1 <- glm(indig~treat*etnic_ind, family=binomial, data = lapop_mx)
t4_pe1 <- glm(indig~treat*etnic_ind, family=binomial, data = lapop_pe)
t4_mx2 <- glm(indig~treat*skin2, family=binomial, data = lapop_mx)
t4_pe2 <- glm(indig~treat*skin2, family=binomial, data = lapop_pe)
t4_mx3 <- glm(indig~treat*income, family=binomial, data = lapop_mx)
t4_pe3 <- glm(indig~treat*income, family=binomial, data = lapop_pe)

models <- list(t4_mx1, t4_pe1,t4_mx2, t4_pe2,t4_mx3, t4_pe3)

# Output in Odds Ratio

stargazer2(models, odd.ratio = T, type = "text",
           column.labels=c("Mexico", "Peru", "Mexico", "Peru", "Mexico", "Peru"))

## 
## ===================================================================================================
##                                                           Dependent variable:                      
##                                     ---------------------------------------------------------------
##                                                                  indig                             
##                                      Mexico      Peru      Mexico      Peru     Mexico      Peru   
##                                        (1)       (2)        (3)        (4)        (5)       (6)    
## ---------------------------------------------------------------------------------------------------
## treatBenefits                         1.143    0.588***    0.954     0.655***    1.013     0.750*  
##                                      (0.172)   (0.067)    (0.134)    (0.077)    (0.187)   (0.113)  
##                                                                                                    
## treatStigma                           1.242     0.992      0.862      1.087      0.991     1.119   
##                                      (0.186)   (0.115)    (0.120)    (0.130)    (0.181)   (0.177)  
##                                                                                                    
## etnic_indETID - Indig               42.844***  4.140***                                            
##                                     (31.193)   (1.157)                                             
##                                                                                                    
## treatBenefits:etnic_indETID - Indig   1.399     0.929                                              
##                                      (1.750)   (0.336)                                             
##                                                                                                    
## treatStigma:etnic_indETID - Indig    0.129**    0.932                                              
##                                      (0.105)   (0.364)                                             
##                                                                                                    
## skin2Dark                                                  1.601*    1.708***                      
##                                                           (0.407)    (0.317)                       
##                                                                                                    
## treatBenefits:skin2Dark                                    0.923      0.762                        
##                                                           (0.314)    (0.186)                       
##                                                                                                    
## treatStigma:skin2Dark                                      1.418      0.629*                       
##                                                           (0.492)    (0.160)                       
##                                                                                                    
## incomeMedium/High                                                              0.514***    1.032   
##                                                                                 (0.100)   (0.162)  
##                                                                                                    
## treatBenefits:incomeMedium/High                                                  1.032     0.705   
##                                                                                 (0.281)   (0.150)  
##                                                                                                    
## treatStigma:incomeMedium/High                                                    0.938     0.779   
##                                                                                 (0.256)   (0.171)  
##                                                                                                    
## Constant                            0.642***   1.751***    0.886     1.792***   1.277*    2.017*** 
##                                      (0.070)   (0.146)    (0.087)    (0.154)    (0.170)   (0.227)  
##                                                                                                    
## ---------------------------------------------------------------------------------------------------
## Observations                          1,268     2,347      1,515      2,472      1,358     2,300   
## Log Likelihood                      -794.749  -1,470.699 -1,038.977 -1,609.421 -921.482  -1,494.879
## Akaike Inf. Crit.                   1,601.499 2,953.399  2,089.953  3,230.842  1,854.964 3,001.757 
## ===================================================================================================
## Note:                                                                   *p<0.1; **p<0.05; ***p<0.01

# Pseudo R2
PseudoR2(t4_mx1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.2421112  0.4080327

PseudoR2(t4_pe1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.09754264 0.16877744

PseudoR2(t4_mx2, c("McFadden", "Nagel"))

##    McFadden  Nagelkerke 
## 0.009211266 0.016905100

PseudoR2(t4_pe2, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.01241963 0.02217435

PseudoR2(t4_mx3, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.1212568  0.2171206

PseudoR2(t4_pe3, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.08270552 0.14598294