Import Data

library(haven)
library(car)
library(SDMTools)
library(ggplot2)
library(DescTools)
library(stargazer)
library(reshape2)
library(Hmisc)
library(plyr)
library(tibble)

lapop_dat <- read_dta("https://www.dropbox.com/s/83fnh1hghpdsp6g/Data_PE_MX.dta?dl=1")
lapop_dat <- subset(lapop_dat, is.na(lapop_dat$t2) == F)

Special Functions

## Cross tabulation function: tab_cruzada

tab_cruzada <- function(tabla, pc="s"){
  tab.0 <- tabla
  if(pc=="s"){
    tab.1 <- addmargins(tab.0, 1, FUN = list(list(TOTALc = sum)))
    tab.1 <- addmargins(tab.1, 2, FUN = list(list(TOTALf = sum)))
    return(tab.1)}
  if (pc=="col"){
    tab.2 <- addmargins(tab.0, 2, FUN = list(list(TOTAL = sum)))
    tab.2 <- round(prop.table(tab.2, 2)*100,2)
    tab.2 <- round(addmargins(tab.2, 1, FUN = list(list(TOTAL = sum))),1)
    tab.0 <- tab.0
    tab.0 <- addmargins(tab.0, 2, FUN = list(list(TOTAL = sum)))
    tab.0 <- addmargins(tab.0, 1, FUN = list(list(Nvalid = sum)))
    tab.2 <- rbind(tab.2, tail(tab.0, 1))
    return(tab.2)}
  if (pc=="fila"){
    tab.2 <- addmargins(tab.0, 1, FUN = list(list(TOTAL = sum)))
    tab.2 <- round(prop.table(tab.2, 1)*100,2)
    tab.2 <- round(addmargins(tab.2, 2, FUN = list(list(TOTAL = sum))),1)
    tab.0 <- tab.0
    tab.0.1 <- addmargins(tab.0, 1, FUN = list(list(TOTAL = sum)))
    tab.0.1 <- addmargins(tab.0.1, 2, FUN = list(list(Nvalid = sum)))
    tab.2 <- cbind(tab.2, Nvalid=tab.0.1[, "Nvalid"])
    return(tab.2)}
}

# Stargazer 2 function
# from: https://cimentadaj.github.io/blog/2016-08-22-producing-stargazer-tables-with-odds-ratios-and-standard-errors-in-r/producing-stargazer-tables-with-odds-ratios-and-standard-errors-in-r/

stargazer2 <- function(model, odd.ratio = F, ...) {
  if(!("list" %in% class(model))) model <- list(model)
    
  if (odd.ratio) {
    coefOR2 <- lapply(model, function(x) exp(coef(x)))
    seOR2 <- lapply(model, function(x) exp(coef(x)) * summary(x)$coef[, 2])
    p2 <- lapply(model, function(x) summary(x)$coefficients[, 4])
    stargazer(model, coef = coefOR2, se = seOR2, p = p2, ...)
    
  } else {
    stargazer(model, ...)
  }
}

Recoding variables

# Country
lapop_dat$country <- factor(lapop_dat$pais, labels = c("Mexico", "Peru"))

# Indigenous in experiment (v2)

lapop_dat$ind_exp0 <- factor(recode((lapop_dat$indig2), "4=3"), 
                                    labels = c("Non indigenous", "Indigenous", "DK/NA"))
# Indigenous in experiment (v2)
lapop_dat$ind_exp <- factor(lapop_dat$indig3, labels = c("Non indigenous", "Indigenous"))

# Experimental condition
lapop_dat$treat <- factor(lapop_dat$t2, labels = c("Neutral", "Benefits", "Stigma"))

# Self identification ETID (1)
lapop_dat$etnic <- lapop_dat$etid

lapop_dat$etnic[is.na(lapop_dat$etid)] <- 9999

lapop_dat$etnic <- factor(lapop_dat$etnic, labels = c("White","Mestizo", "Indigenous",
                                                  "Black", "Mulatto", "Other", "Oriental",
                                                  "Quechua", "Aymara", "Amazonian", "Zambo", "DK/NA"))

# Self identification ETID (2)

lapop_dat$etnic2 <- factor(recode(lapop_dat$etid, "5=4; 1106=7; 1110:1112 = 3; 1113=3; NA=9"),
                           labels = c("White","Mestizo", "Indigenous", "Afro-descendant","Other", "DK/NA"))

# Self identification Indigenous - ETID

lapop_dat$self_indg <- factor(recode(lapop_dat$etid, "3=1; 1010:1112 = 1; else = 0"), 
                              labels = c("ETID - Non Indig.", "ETID - Indig"))

# Gender
lapop_dat$gender <- factor(lapop_dat$q1, labels = c("Male", "Female"))

# Community Size
lapop_dat$comsize <- factor(lapop_dat$tamano, 
                           labels = c("Metropolitan area", "Large city", "Medium city",
                                      "Small city", "Rural area"))

# Urban / Rural

lapop_dat$rural <- factor(recode(lapop_dat$ur, "1=0; 2=1"), labels = c("Urbano", "Rural"))

# Skin color

lapop_dat$skin2 <- factor(recode(lapop_dat$colorr, "1:5 = 1; 6:11 = 2"), 
                         label = c("Ligth/Medium", "Dark"))

# Income

lapop_dat$income <- factor(recode(lapop_dat$q10new_16, "lo:7=1; 8:hi=2"),
                          labels = c("Low", "Medium/High"))

# Weight: Setting weigth for Mexico = 1

lapop_dat$wt2 <- lapop_dat$wt

lapop_dat$wt2[lapop_dat$country == "Mexico"] <- 1

Descriptive Statistics

Table 1: Experimental Design

xtabs(~ treat + country, data = lapop_dat)

##           country
## treat      Mexico Peru
##   Neutral     507  858
##   Benefits    521  903
##   Stigma      535  886

Table 2: Descriptive Statistics

t1 <- dcast(as.data.frame(prop.table(xtabs(wt2~etnic2+country, data = lapop_dat), 2)*100),
            etnic2~country)

t2 <- dcast(as.data.frame(prop.table(xtabs(wt2~gender+country, data = lapop_dat), 2)*100),
            gender~country)

t3 <- as.data.frame(t(ddply(lapop_dat, ~country, summarise, 
                          Mean = wtd.mean(q2, wt2, na.rm = T))))[-1, ]
t3 <- rownames_to_column(t3, var = "Category")

t4 <- dcast(as.data.frame(prop.table(xtabs(wt2~rural+country, data = lapop_dat), 2)*100),
            rural~country)

t5 <- dcast(as.data.frame(prop.table(xtabs(wt2~comsize+country, data = lapop_dat), 2)*100),
            comsize~country)

t6<- as.data.frame(t(ddply(lapop_dat, ~country, summarise, Mean  = wtd.mean(ed, wt2, na.rm = T))))[-1, ]
t6 <- rownames_to_column(t6, var = "Category")


t7<- as.data.frame(t(ddply(lapop_dat, ~country, summarise, Mean  = wtd.mean(q10new_16, wt2, na.rm = T))))[-1, ]
t7 <- rownames_to_column(t7, var = "Category")


t1$variable <- c("Ethnic Self ID")
t2$variable <- c("Gender")
t3$variable <- c("Age")
t4$variable <- c("Urban / Rural")
t5$variable <- c("Community Size")
t6$variable <- c("Education")
t7$variable <- c("Income")


t1 <- t1[, c(4,1,2,3)]
t2 <- t2[, c(4,1,2,3)]
t3 <- t3[, c(4,1,2,3)]
t4 <- t4[, c(4,1,2,3)]
t5 <- t5[, c(4,1,2,3)]
t6 <- t6[, c(4,1,2,3)]
t7 <- t7[, c(4,1,2,3)]

colnames(t1) <- c("variable", "Category", "Mexico", "Peru")
colnames(t2) <- c("variable", "Category", "Mexico", "Peru")
colnames(t3) <- c("variable", "Category", "Mexico", "Peru")
colnames(t4) <- c("variable", "Category", "Mexico", "Peru")
colnames(t5) <- c("variable", "Category", "Mexico", "Peru")
colnames(t6) <- c("variable", "Category", "Mexico", "Peru")
colnames(t7) <- c("variable", "Category", "Mexico", "Peru")

table2 <- rbind(t1, t2, t3, t4, t5, t6, t7)
table2

##          variable          Category           Mexico             Peru
## 1  Ethnic Self ID             White 17.8502879078695 10.9872649803397
## 2  Ethnic Self ID           Mestizo  44.593730006398 57.8816892657081
## 3  Ethnic Self ID        Indigenous 10.1087651951376  17.928652150406
## 4  Ethnic Self ID   Afro-descendant 4.22264875239923 2.58661889927482
## 5  Ethnic Self ID             Other 6.07805502239283 4.92461649259747
## 6  Ethnic Self ID             DK/NA 17.1465131158029 5.69115821167396
## 7          Gender              Male 50.4158669225848 50.2674863493653
## 8          Gender            Female 49.5841330774152 49.7325136506347
## 9             Age              Mean         40.57006         38.83157
## 10  Urban / Rural            Urbano 79.9744081893794 76.7999992260178
## 11  Urban / Rural             Rural 20.0255918106206 23.2000007739822
## 12 Community Size Metropolitan area 19.9616122840691 33.5999999145674
## 13 Community Size        Large city 35.4446577095329 18.4625036152736
## 14 Community Size       Medium city 23.8003838771593 11.4028631675307
## 15 Community Size        Small city 18.4900831733845 12.9284628613245
## 16 Community Size        Rural area 2.30326295585413 23.6061704413038
## 17      Education              Mean          9.37991         11.64524
## 18         Income              Mean         7.328757         8.587820

Table 3

tab3.1 <- xtabs(~ind_exp0+treat, data = subset(lapop_dat, lapop_dat$country=="Mexico"))
tab3.2 <- xtabs(~ind_exp0+treat, data = subset(lapop_dat, lapop_dat$country=="Peru"))

Mexico

tab_cruzada(tab3.1, pc="col")

##                Neutral Benefits Stigma  TOTAL
## Non indigenous    49.5     50.7   51.6   50.6
## Indigenous        47.1     46.3   45.6   46.3
## DK/NA              3.4      3.1    2.8    3.1
## TOTAL            100.0    100.0  100.0  100.0
## Nvalid           507.0    521.0  535.0 1563.0

chisq.test(tab3.1)

## 
##  Pearson's Chi-squared test
## 
## data:  tab3.1
## X-squared = 0.61203, df = 4, p-value = 0.9617

Peru

tab_cruzada(tab3.2, pc="col")

##                Neutral Benefits Stigma  TOTAL
## Non indigenous    30.4     41.6   31.5   34.6
## Indigenous        61.8     52.4   62.4   58.8
## DK/NA              7.8      6.0    6.1    6.6
## TOTAL            100.0    100.0  100.0  100.0
## Nvalid           858.0    903.0  886.0 2647.0

chisq.test(tab3.2)

## 
##  Pearson's Chi-squared test
## 
## data:  tab3.2
## X-squared = 32.09, df = 4, p-value = 1.834e-06

Regression models table 4

mexico.m1 <- glm(ind_exp ~ treat + q2 + gender + ed + rural, 
                 data = subset(lapop_dat, lapop_dat$pais == 1), 
               family = binomial)

mexico.m2 <- glm(ind_exp ~ treat + q2 + gender + ed + rural + q10new_16, 
                 data = subset(lapop_dat, lapop_dat$pais == 1), 
               family = binomial)

peru.m1 <- glm(ind_exp ~ treat + q2 + gender + ed + rural, 
                 data = subset(lapop_dat, lapop_dat$pais == 11), 
               family = binomial)

peru.m2 <- glm(ind_exp ~ treat + q2 + gender + ed + rural + q10new_16, 
                 data = subset(lapop_dat, lapop_dat$pais == 11), 
               family = binomial)

models <- list(mexico.m1, mexico.m2, peru.m1, peru.m2)

# Output in Odds Ratio

stargazer2(models, odd.ratio = T, type = "text",
           column.labels=c("Mexico", "Mexico", "Peru", "Peru"))

## 
## ============================================================
##                              Dependent variable:            
##                   ------------------------------------------
##                                    ind_exp                  
##                     Mexico    Mexico      Peru       Peru   
##                      (1)        (2)       (3)        (4)    
## ------------------------------------------------------------
## treatBenefits       0.934      1.008    0.680***   0.669*** 
##                    (0.118)    (0.137)   (0.067)    (0.069)  
##                                                             
## treatStigma         0.914      0.972     1.020      1.014   
##                    (0.115)    (0.132)   (0.102)    (0.107)  
##                                                             
## q2                  0.995      0.997    0.992***   0.993**  
##                    (0.004)    (0.004)   (0.003)    (0.003)  
##                                                             
## genderFemale        0.865      0.851    0.683***   0.689*** 
##                    (0.089)    (0.096)   (0.056)    (0.060)  
##                                                             
## ed                 0.951***    0.975     0.992      0.989   
##                    (0.013)    (0.015)   (0.011)    (0.012)  
##                                                             
## ruralRural         1.489***   1.380**    1.051      1.006   
##                    (0.195)    (0.194)   (0.089)    (0.092)  
##                                                             
## q10new_16                    0.935***               0.995   
##                               (0.011)              (0.010)  
##                                                             
## Constant           1.747**   2.166***   2.895***   3.204*** 
##                    (0.457)    (0.622)   (0.630)    (0.741)  
##                                                             
## ------------------------------------------------------------
## Observations        1,553      1,381     2,597      2,394   
## Log Likelihood    -1,057.244 -926.001  -1,733.809 -1,590.050
## Akaike Inf. Crit. 2,128.489  1,868.003 3,481.617  3,196.099 
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01

# Pseudo R2
PseudoR2(mexico.m1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.02030307 0.03705341

PseudoR2(mexico.m2, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.1419197  0.2516559

PseudoR2(peru.m1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.03339222 0.06020476

PseudoR2(peru.m2, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.1135386  0.2014736

Table A2 Models

t4_mx1 <- glm(ind_exp~treat*self_indg, family=binomial, 
              data = subset(lapop_dat, lapop_dat$pais == 1))
t4_pe1 <- glm(ind_exp~treat*self_indg, family=binomial, 
              data = subset(lapop_dat, lapop_dat$pais == 11))
t4_mx2 <- glm(ind_exp~treat*skin2, family=binomial, 
              data = subset(lapop_dat, lapop_dat$pais == 1))
t4_pe2 <- glm(ind_exp~treat*skin2, family=binomial, 
              data = subset(lapop_dat, lapop_dat$pais == 11))
t4_mx3 <- glm(ind_exp~treat*income, family=binomial, 
              data = subset(lapop_dat, lapop_dat$pais == 1))
t4_pe3 <- glm(ind_exp~treat*income, family=binomial, 
              data = subset(lapop_dat, lapop_dat$pais == 11))

models <- list(t4_mx1, t4_pe1,t4_mx2, t4_pe2,t4_mx3, t4_pe3)

# Output in Odds Ratio

stargazer2(models, odd.ratio = T, type = "text",
           column.labels=c("Mexico", "Peru", "Mexico", "Peru", "Mexico", "Peru"))

## 
## ===================================================================================================
##                                                           Dependent variable:                      
##                                     ---------------------------------------------------------------
##                                                                 ind_exp                            
##                                      Mexico      Peru      Mexico      Peru     Mexico      Peru   
##                                        (1)       (2)        (3)        (4)        (5)       (6)    
## ---------------------------------------------------------------------------------------------------
## treatBenefits                         1.017    0.683***    0.966     0.730***    1.025     0.800   
##                                      (0.136)   (0.072)    (0.133)    (0.081)    (0.187)   (0.113)  
##                                                                                                    
## treatStigma                           1.029     1.064      0.892      1.159      1.004     1.124   
##                                      (0.137)   (0.114)    (0.122)    (0.130)    (0.182)   (0.164)  
##                                                                                                    
## self_indgETID - Indig               26.404***  3.170***                                            
##                                     (15.858)   (0.707)                                             
##                                                                                                    
## treatBenefits:self_indgETID - Indig   2.360     0.918                                              
##                                      (2.785)   (0.273)                                             
##                                                                                                    
## treatStigma:self_indgETID - Indig    0.212**    0.778                                              
##                                      (0.147)   (0.239)                                             
##                                                                                                    
## skin2Dark                                                 1.725**    1.800***                      
##                                                           (0.438)    (0.312)                       
##                                                                                                    
## treatBenefits:skin2Dark                                    0.872      0.712                        
##                                                           (0.294)    (0.164)                       
##                                                                                                    
## treatStigma:skin2Dark                                      1.195     0.591**                       
##                                                           (0.407)    (0.141)                       
##                                                                                                    
## incomeMedium/High                                                              0.510***    1.185   
##                                                                                 (0.098)   (0.175)  
##                                                                                                    
## treatBenefits:incomeMedium/High                                                  1.051     0.695*  
##                                                                                 (0.283)   (0.141)  
##                                                                                                    
## treatStigma:incomeMedium/High                                                    0.954     0.833   
##                                                                                 (0.258)   (0.173)  
##                                                                                                    
## Constant                            0.694***   1.363***   0.823**    1.413***    1.217    1.566*** 
##                                      (0.067)   (0.104)    (0.080)    (0.113)    (0.160)   (0.163)  
##                                                                                                    
## ---------------------------------------------------------------------------------------------------
## Observations                          1,563     2,647      1,563      2,647      1,384     2,434   
## Log Likelihood                      -996.853  -1,739.987 -1,069.850 -1,774.699 -938.462  -1,626.093
## Akaike Inf. Crit.                   2,005.707 3,491.974  2,151.699  3,561.397  1,888.924 3,264.187 
## ===================================================================================================
## Note:                                                                   *p<0.1; **p<0.05; ***p<0.01

# Pseudo R2
PseudoR2(t4_mx1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.07626461 0.13351687

PseudoR2(t4_pe1, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.02994788 0.05359612

PseudoR2(t4_mx2, c("McFadden", "Nagel"))

##    McFadden  Nagelkerke 
## 0.008622376 0.015809739

PseudoR2(t4_pe2, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.01059590 0.01921207

PseudoR2(t4_mx3, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
##  0.1303728  0.2329551

PseudoR2(t4_pe3, c("McFadden", "Nagel"))

##   McFadden Nagelkerke 
## 0.09344411 0.16688413

Graphs

Graph 1

mx_exptab1 <- xtabs(~ind_exp+treat, data = subset(lapop_dat, lapop_dat$pais == 1))
mx_exptab2 <- tab_cruzada(mx_exptab1, pc="col")

pe_exptab1 <- xtabs(wt~ind_exp+treat, data = subset(lapop_dat, lapop_dat$pais == 11))
pe_exptab2 <- tab_cruzada(pe_exptab1, pc="col")

## Graph 1

# Data for Peru
dat1 <- as.data.frame(pe_exptab2)
dat2 <- as.data.frame(t(dat1))

colnames(dat2) <- c("non.indig", "indig", "total", "nvalid")

dat2$id <- row.names(dat2)
dat2$pais <- c("Peru")

# Data for Mexico
dat1.mx <- as.data.frame(mx_exptab2)
dat2.mx <- as.data.frame(t(dat1.mx))

colnames(dat2.mx) <- c("non.indig", "indig", "total", "nvalid")

dat2.mx$id <- row.names(dat2.mx)
dat2.mx$pais <- c("Mexico")

# Data frame for graph 1

dat3 <- rbind(dat2, dat2.mx)
dat3$ci <- sqrt((dat3$indig*dat3$non.indig)/dat3$nvalid)*2
dat3.b <- subset(dat3, id != "TOTAL")

# Graph 1

graph.1 <- ggplot(dat3.b, aes(x=id, y=indig)) + geom_point(stat = "identity") + facet_grid(.~pais) +
  geom_errorbar(aes(ymin=indig-ci, ymax=indig+ci), width = 0.2) +
  ylab("% Indigenous Identification") + xlab("Treatment") + ylim(20,80) +
  scale_x_discrete(limits=c("Neutral", "Benefits", "Stigma")) +
  ggtitle("Indigenous identification by treatment condition - 95% CI") +
  theme_bw()

graph.1

Graph 2

# Data for Peru

lapop_pe <- subset(lapop_dat, lapop_dat$pais==11)

dg2.per <- as.data.frame(xtabs(wt~self_indg + treat + ind_exp, data=lapop_pe))
dg2.per2 <- dcast(dg2.per, self_indg + treat ~ ind_exp)
colnames(dg2.per2) <- c("etnic_ind", "treat", "non.indig.exp", "indig.exp")
dg2.per2$total <- dg2.per2$non.indig.exp+dg2.per2$indig.exp
dg2.per2$pc_indig <- dg2.per2$indig.exp/dg2.per2$total*100
dg2.per2$ci_pc <- sqrt((dg2.per2$pc_indig*(100-dg2.per2$pc_indig))/dg2.per2$total)*2

dg2.per2$pais <- c("Peru")

# Data for Mexico
lapop_mx <- subset(lapop_dat, lapop_dat$pais==1)


dg2.mx <- as.data.frame(xtabs(~self_indg + treat + ind_exp, data=lapop_mx))
dg2.mx2 <- dcast(dg2.mx, self_indg + treat ~ ind_exp)
colnames(dg2.mx2) <- c("etnic_ind", "treat", "non.indig.exp", "indig.exp")
dg2.mx2$total <- dg2.mx2$non.indig.exp+dg2.mx2$indig.exp
dg2.mx2$pc_indig <- dg2.mx2$indig.exp/dg2.mx2$total*100
dg2.mx2$ci_pc <- sqrt((dg2.mx2$pc_indig*(100-dg2.mx2$pc_indig))/dg2.mx2$total)*2

dg2.mx2$pais <- c("Mexico")


# Data for graph 2

data.g2 <- rbind(dg2.mx2, dg2.per2)

graph2.v1 <- ggplot(data.g2, aes(x= treat, y=pc_indig, shape = etnic_ind)) + geom_point(position=position_dodge(0.5),
                                                                           size = 3) +
  facet_grid(.~pais) + geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), 
                                     position = position_dodge(0.5), width = 0.2) +
  ylim(20,110) + xlab("Treatment") + ylab("% Indigenous Identification (experiment)") +
  scale_shape_discrete(name="Indigenous ethnicity (ETID)") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by indigenous ethnicity (ETID), 95% CI") +
  theme_bw()

graph2.v1

Graph 3

# Data for Peru

dg3.per <- as.data.frame(xtabs(wt~skin2 + treat + ind_exp, data=lapop_pe))
dg3.per2 <- dcast(dg3.per, skin2 + treat ~ ind_exp)
colnames(dg3.per2) <- c("skin", "treat", "non.indig.exp", "indig.exp")
dg3.per2$total <- dg3.per2$non.indig.exp+dg3.per2$indig.exp
dg3.per2$pc_indig <- dg3.per2$indig.exp/dg3.per2$total*100
dg3.per2$ci_pc <- sqrt((dg3.per2$pc_indig*(100-dg3.per2$pc_indig))/dg3.per2$total)*2

dg3.per2$pais <- c("Peru")



# Data for Mexico

dg3.mx <- as.data.frame(xtabs(~skin2 + treat + ind_exp, data=lapop_mx))
dg3.mx2 <- dcast(dg3.mx, skin2 + treat ~ ind_exp)
colnames(dg3.mx2) <- c("skin", "treat", "non.indig.exp", "indig.exp")
dg3.mx2$total <- dg3.mx2$non.indig.exp+dg3.mx2$indig.exp
dg3.mx2$pc_indig <- dg3.mx2$indig.exp/dg3.mx2$total*100
dg3.mx2$ci_pc <- sqrt((dg3.mx2$pc_indig*(100-dg3.mx2$pc_indig))/dg3.mx2$total)*2

dg3.mx2$pais <- c("Mexico")


# Data for graph 3

data.g3 <- rbind(dg3.mx2, dg3.per2)

graph3.v1 <- ggplot(data.g3, aes(x= treat, y=pc_indig, shape = skin)) + 
  geom_point(position=position_dodge(0.5), size = 3) +
  facet_grid(.~pais) + geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), 
                                     position = position_dodge(0.5), width = 0.2) +
  ylim(20,110) + xlab("Treatment") + ylab("% Indigenous Identification (experiment)") +
  scale_shape_discrete(name="Skin color") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by skin color, 95% CI") +
  theme_bw()

graph3.v1

Graph 4

# Data for Peru

dg4.per <- as.data.frame(xtabs(wt~income + treat + ind_exp, data=lapop_pe))
dg4.per2 <- dcast(dg4.per, income + treat ~ ind_exp)
colnames(dg4.per2) <- c("income", "treat", "non.indig.exp", "indig.exp")
dg4.per2$total <- dg4.per2$non.indig.exp+dg4.per2$indig.exp
dg4.per2$pc_indig <- dg4.per2$indig.exp/dg4.per2$total*100
dg4.per2$ci_pc <- sqrt((dg4.per2$pc_indig*(100-dg4.per2$pc_indig))/dg4.per2$total)*2

dg4.per2$pais <- c("Peru")



# Data for Mexico

dg4.mx <- as.data.frame(xtabs(~income + treat + ind_exp, data=lapop_mx))
dg4.mx2 <- dcast(dg4.mx, income + treat ~ ind_exp)
colnames(dg4.mx2) <- c("income", "treat", "non.indig.exp", "indig.exp")
dg4.mx2$total <- dg4.mx2$non.indig.exp+dg4.mx2$indig.exp
dg4.mx2$pc_indig <- dg4.mx2$indig.exp/dg4.mx2$total*100
dg4.mx2$ci_pc <- sqrt((dg4.mx2$pc_indig*(100-dg4.mx2$pc_indig))/dg4.mx2$total)*2

dg4.mx2$pais <- c("Mexico")

# Data for graph 3

data.g4 <- rbind(dg4.mx2, dg4.per2)

graph4.v1 <- ggplot(data.g4, aes(x= treat, y=pc_indig, shape = income)) + 
  geom_point(position=position_dodge(0.5), size = 3) +
  facet_grid(.~pais) + geom_errorbar(aes(ymin = pc_indig - ci_pc, ymax = pc_indig + ci_pc), 
                                     position = position_dodge(0.5), width = 0.2) +
  ylim(20,110) + xlab("Treatment") + ylab("% Indigenous Identification (experiment)") +
  scale_shape_discrete(name="Income level") +
  ggtitle("Indigenous identification in experiment by treatment condition \nand by income level, 95% CI") +
  theme_bw()

graph4.v1

Facebook Experiment

fb_indig <- as.data.frame(read.csv("https://www.dropbox.com/s/mfwagog5yx3kbwk/facebook_exp_2.csv?dl=1", sep=";"))

colnames(fb_indig) <- c("pc_indig", "ncases", "pais", "ci", "condition", "cond2")

fb_indig$cond3 <- factor(fb_indig$cond2, labels = c("Neutral + ETID Before + Intro", "Neutral + ETID After + Intro", "Neutral + ETID Before","Neutral + ETID After", "Benefits original", "Benefits productive","Benefits consumption","Stigma", "Total"))
                             
                             

graph5.v1 <- ggplot(fb_indig, aes(x=cond3, y=pc_indig)) + geom_point() +
  facet_grid(.~pais) + 
  geom_errorbar(aes(ymin = pc_indig - ci, ymax = pc_indig + ci), width = 0.3)+
  ggtitle("Experiment 2 - Facebook sample: Indigenous identification by treatment condition,\n95% CI") + 
  xlab("Treatment") + ylab("% Indigenous Identification") +
  theme_bw() +
  theme(axis.text.x  = element_text(angle=90, size = 8, vjust = 0.5, hjust = 1))

graph5.v2 <- ggplot(fb_indig, aes(x=cond3, y=pc_indig)) + geom_point(stat = "identity") +
  facet_grid(.~pais) + 
  geom_errorbar(aes(ymin = pc_indig - ci, ymax = pc_indig + ci), width = 0.3)+
  ggtitle("Experiment 2 - Facebook sample: Indigenous identification by \ntreatment condition, 95% CI") + ylab("% Indigenous Identification") + xlab("Treatment") +
  theme_bw() + coord_flip()

Facebook experiment - Graph Version 1

graph5.v1

Facebook experiment - Graph Version 2

graph5.v2

Data Analysis Flores & Sulmont (Ver. 2)

David Sulmont

20/7/2018

Import Data

Special Functions

Recoding variables

Descriptive Statistics

Table 1: Experimental Design

Table 2: Descriptive Statistics

Table 3

Mexico

Peru

Regression models table 4

Table A2 Models

Graphs

Graph 1

Graph 2

Graph 3

Graph 4

Facebook Experiment

Facebook experiment - Graph Version 1

Facebook experiment - Graph Version 2