#load data
gp<-read.csv("~/Downloads/GG identification across groups Wave 1.csv") %>%
dplyr::select(CONDITION:Gender, LocationLatitude, LocationLongitude) %>%
mutate(CONDITION=dplyr::recode(CONDITION, '0'="Experiment", '1'="Control"))
We first compare if there is any significant difference between participants from control group who only answered questions 9-17 and the other participants that rated the identity of all the different groups.
#Scaled mean value cleveland dot plot
new.gp<-gp %>% dplyr::select(CONDITION,Essentialism_SCL:Gender)
Scale.gp <- new.gp
Scale.gp[,-1]<-scale(new.gp[,-1])
Scale.gp <- Scale.gp %>%
group_by(CONDITION) %>%
summarize_all(mean,na.rm=TRUE)%>%
gather( "variable", "Mean", Essentialism_SCL:Gender)
cd <- ggplot(Scale.gp, aes(x = Mean,
y = fct_reorder2(variable, CONDITION, -Mean),
color = CONDITION)) +
geom_point() + ylab("") +
ggtitle("Scaled Mean Value Sorted by Experiment Group")
cd
Interestingly, participants who rated the identity of all groups have very neutral attitude (close to value zero under the standardized normal distribution N(0,1)) towards question 9 to 17. While for people who were not assigned group identification questions, they have very diverse answer for question 9 to 17, which indicates that answering group identification questions may influence how participants think about ingroup and outgroup.
ds <- gp %>%
tibble::rowid_to_column() %>%
gather("group", value, Muslims_3:Italian_4) %>%
dplyr::select(rowid, group, value)
ds$groups <-
ifelse(ds$group == 'Muslims_3' | ds$group == 'Muslims_4', "Muslims",
ifelse(ds$group == 'Chinese_3' | ds$group == 'Chinese_4', "Chinese",
ifelse(ds$group == 'Hispanic_3' | ds$group == 'Hispanic_4', "Hispanic",
ifelse(ds$group == 'Russian_3' | ds$group == 'Russian_4', "Russian",
ifelse(ds$group == 'Irish_3' | ds$group == 'Irish_4', "Irish",
ifelse(ds$group == 'African_3' | ds$group == 'African_4', "African",
ifelse(ds$group == 'Mexican_3' |ds$group == 'Mexican_4',"Mexican","Italian")))))))
ds$identity <-
ifelse(grepl("^.+\\_(3)$", ds$group) == TRUE, "ingroup", "outgroup") #use regular expression to extract the last digit of group (eg:Muslims_4 -> 4)
ds.mean <-
ds %>% select(rowid, groups, identity, value) %>% spread(identity, value) %>%
select(groups, ingroup, outgroup) %>% group_by(groups) %>%
summarise_all(mean, na.rm = T) %>%
gather(identity, mean, ingroup:outgroup)
#ds.mean
ggplot(ds.mean, aes(identity, mean, fill = groups)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("") + ggtitle("Ingroup & Outgroup Identification Comparison for each group")
ds.raw<-ds %>% select(groups, identity, value) %>% dplyr::rename(identification=identity)
#stat_compare_means(): easy to use solution to automatically add p-values and significance levels to a ggplot
ggbarplot(ds.raw, x = "identification", y = "value", add = "mean_se",
color = "groups", palette = "simpsons",
position = position_dodge(0.8))+
stat_compare_means(aes(group = groups), label = "p.signif", label.y = 90)+ #add pairwise comparisons p-value
stat_compare_means(method = "anova", label.y=100)+
xlab("")+ylab("mean")#+ggtitle("Group Comparison for ingroup & outgroup")
ggbarplot(ds.raw, x = "identification", y = "value", add = "mean_se",
fill = "groups", palette = "simpsons",
position = position_dodge(0.8))+
stat_compare_means(aes(group = groups), label = "p.signif", label.y = 90)+ #add pairwise comparisons p-value
stat_compare_means(method = "anova", label.y=100)+
xlab("")+ylab("mean")#+ggtitle("Group Comparison for ingroup & outgroup")
#Significance test for identification comparison
ds.test <- ds %>% dplyr::select(groups, identity, value)
#anova test results
result <- data.frame(matrix(nrow = 2, ncol = 3))
colnames(result) <- c("identity", "f_val", "p_val")
for (i in unique(ds.test$identity)) {
#f-val and p-val are ordered the same as in the original dataset
aovtest <-
anova(lm(value ~ groups, data = ds.test[which(ds.test$identity == i), ], var.equal =
TRUE))
result[i, 1] <- i
result[i, 2] <- aovtest$`F value`[[1]]
result[i, 3] <- aovtest$`Pr(>F)`[[1]]
}
#glht stands for general linear hypothesis tests
#model: a fitted model, for example an object returned by aov().
#lincft(): a specification of the linear hypotheses to be tested. Multiple comparisons in ANOVA models are specified by objects returned from the function mcp().
#we use glht() to perform multiple pairwise-comparisons for a one-way ANOVA
#multiple comparisons, this may not be necessay due to large group number (almost 30 combinations)
ds.test$groups <- as.factor(ds.test$groups)
#summary(glht(lm(value ~ groups, data = ds.test[which(ds.test$identity =='ingroup'), ]), linfct = mcp(groups = "Tukey")))
#summary(glht(lm(value ~ groups, data = ds.test[which(ds.test$identity =='outgroup'), ]), linfct = mcp(groups = "Tukey")))
ds.compare <- result %>% slice(3:4)
ds.compare$dir <-
ifelse(ds.compare$f_val > 0, "positive", "negative")
#ds.sig_level <- factor(ds.compare$sig_level, levels = c("sig", "marginally_sig", "non-sig")) #define the order of legend bar
ds.compare$sig_level <- ifelse( ds.compare$p_val <= 0.05, "sig", ifelse(ds.compare$p_val > 0.1, "non_sig", "marginally_sig"))
ds.compare
## identity f_val p_val dir sig_level
## 1 ingroup 21.49652 1.131514e-28 positive sig
## 2 outgroup 33.44121 1.447726e-45 positive sig
p <- ggplot(ds.compare, aes(x = identity, y = p_val, fill = dir)) +
scale_fill_manual(values = c(negative = "red", positive = "green")) +
geom_bar(stat = "identity") +
#ylab("Correlation with GG_Level")+
#coord_flip()+
xlab("") +
ggtitle("Statistical significance check for group comparison")
label.df <- ds.compare %>% dplyr::filter(ds.compare$p_val < 0.05)
p + geom_text(data = label.df, label = "*")
ggplot(ds.mean, aes(groups, mean, fill = identity)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("") + ggtitle("Group Comparison for ingroup & outgroup")
ggbarplot(ds.raw, x = "groups", y = "value", add = "mean_se",
color = "identification", palette = "simpsons",
position = position_dodge(0.8))+
stat_compare_means(aes(group = identification), label = "p.signif", label.y = 90)+ #add pairwise comparisons p-value
stat_compare_means(method = "anova", label.y=100)+
xlab("")+ylab("mean")#+ggtitle("Group Comparison for ingroup & outgroup")
ggbarplot(ds.raw, x = "groups", y = "value", add = "mean_se",
fill = "identification", palette = "simpsons",
position = position_dodge(0.8))+
stat_compare_means(aes(group = identification), label = "p.signif", label.y = 90)+ #add pairwise comparisons p-value
stat_compare_means(method = "anova", label.y=100)+
xlab("")+ylab("mean")#+ggtitle("Group Comparison for ingroup & outgroup")
#Significance test for group comparison
ds.test<-ds %>%dplyr::select(groups, identity, value)
result <- data.frame(matrix(nrow = 8, ncol = 3))
colnames(result)<-c("groups", "f_val", "p_val")
for (i in unique(ds.test$groups)) {#f-val and p-val are ordered the same as in the original dataset
ttest<-t.test(value~identity, data=ds.test[which(ds.test$groups==i),], var.equal = TRUE)
result[i,1]<-i
result[i,2]<-ttest$statistic
result[i,3]<-ttest$p.value
}
ds.compare<-result %>%slice(9:16)
ds.compare$dir <- ifelse(ds.compare$f_val > 0, "positive", "negative")
#ds.sig_level <- factor(ds.compare$sig_level, levels = c("sig", "marginally_sig", "non-sig")) #define the order of legend bar
ds.compare$sig_level <- ifelse(ds.compare$p_val <= 0.05, "sig",
ifelse(ds.compare$p_val > 0.1, "non_sig", "marginally_sig"))
#ds.compare
p<-ggplot(ds.compare, aes(x=groups, y=p_val, fill= dir))+
scale_fill_manual(values=c(negative="red", positive="green"))+
geom_bar(stat="identity")+
#ylab("Correlation with GG_Level")+
#coord_flip()+
xlab("")+
ggtitle("Statistical significance check for group comparison")
label.df <- ds.compare %>% dplyr::filter(ds.compare$p_val < 0.05)
p + geom_text(data = label.df, label = "*")
#compare <-ddply(ds, .(groups), function())
ggplot(ds, aes(x = value, y = groups, fill = group)) +
geom_density_ridges(scale = 1, alpha = 0.5)
For every eight group, participants usually rate 100 score for either as American or as outgroup and there is limited difference between the distribution of as Americna or as outgroup except for African American.
–3. Geographic plot
–prepare the raw data
only experiment group is selected
wide to long table
group the data first by groups(Muslims, Chinese, etc,.) then by identification (ingroup/outgroup)
dss <- gp %>%
dplyr::select(CONDITION,LocationLatitude, LocationLongitude, Muslims_3:Italian_4) %>%
dplyr::filter(CONDITION=="Experiment") %>%
gather("group", value, Muslims_3:Italian_4)
dss$groups<-ifelse(dss$group=='Muslims_3'|dss$group=='Muslims_4', "Muslims",
ifelse(dss$group=='Chinese_3'|dss$group=='Chinese_4', "Chinese",
ifelse(dss$group=='Hispanic_3'|dss$group=='Hispanic_4', "Hispanic",
ifelse(dss$group=='Russian_3'|dss$group=='Russian_4', "Russian",
ifelse(dss$group=='Irish_3'|dss$group=='Irish_4', "Irish",
ifelse(dss$group=='African_3'|dss$group=='African_4', "African",
ifelse(dss$group=='Mexican_3'|dss$group=='Mexican_4',"Mexican","Italian")))))))
dss$identity <-ifelse(grepl("^.+\\_(3)$", dss$group) == TRUE, "ingroup", "outgroup") #use regular expression to extract the last digit of group (eg:Muslims_4 -> 4)
dss<-dss %>% dplyr::select(LocationLatitude, LocationLongitude, groups, identity, value) %>%
na.omit()
###non-immigrant data
non_imm<- gp %>%
dplyr::select(Immigrant, CONDITION,LocationLatitude, LocationLongitude, Muslims_3:Italian_4) %>%
dplyr::filter(CONDITION=="Experiment") %>%
gather("group", value, Muslims_3:Italian_4)
non_imm$groups<-ifelse(non_imm$group=='Muslims_3'|non_imm$group=='Muslims_4', "Muslims",
ifelse(non_imm$group=='Chinese_3'|non_imm$group=='Chinese_4', "Chinese",
ifelse(non_imm$group=='Hispanic_3'|non_imm$group=='Hispanic_4', "Hispanic",
ifelse(non_imm$group=='Russian_3'|non_imm$group=='Russian_4', "Russian",
ifelse(non_imm$group=='Irish_3'|non_imm$group=='Irish_4', "Irish",
ifelse(non_imm$group=='African_3'|non_imm$group=='African_4', "African",
ifelse(non_imm$group=='Mexican_3'|non_imm$group=='Mexican_4',"Mexican","Italian")))))))
non_imm$identity <-ifelse(grepl("^.+\\_(3)$", non_imm$group) == TRUE, "ingroup", "outgroup") #use regular expression to extract the last digit of group (eg:Muslims_4 -> 4)
non_immi<-non_imm %>% dplyr::filter(Immigrant == 2) %>%
dplyr::select(LocationLatitude, LocationLongitude, groups, identity, value) %>%
na.omit()
gpp<-gp %>%
dplyr::select(Immigrant, CONDITION,LocationLatitude, LocationLongitude, Muslims_3:Italian_4) %>%
dplyr::filter(CONDITION=="Experiment")
imm_percent <-gpp %>% dplyr::select(Immigrant, LocationLatitude, LocationLongitude) %>%
na.omit()%>%
mutate(Immigrant=dplyr::recode(Immigrant, '2'="No", '1'="Yes"))
1.acquire US map information
2.convert lat+lon to state code
states <- map_data("state")
#first, make a blank state map with state data using geom_polygon()
latlong2state <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per state (plus DC, minus HI & AK)
states <- map('state', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(states$names, ":"), function(x) x[1])
states_sp <- map2SpatialPolygons(states, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, states_sp)
# Return the state names of the Polygons object containing each point
stateNames <- sapply(states_sp@polygons, function(x) x@ID)
stateNames[indices]
}
testPoints <- data.frame(x = as.numeric(dss$LocationLongitude), y=as.numeric(dss$LocationLatitude))
dss$state<-latlong2state(testPoints)
sum(is.na(dss$state))
## [1] 512
non_immPoints <-data.frame(x = as.numeric(non_immi$LocationLongitude), y= as.numeric(non_immi$LocationLatitude))
non_immi$state<-latlong2state(non_immPoints)
sum(is.na(non_immi$state))
## [1] 224
#write.csv(non_imm, file = "non_imm.csv")
imm_percentPoints <- data.frame(x= as.numeric(imm_percent$LocationLongitude),
y = as.numeric(imm_percent$LocationLatitude))
imm_percent$state<-latlong2state(imm_percentPoints)
sum(is.na(imm_percent$state))
## [1] 32
The above code works for most lat&lon location but contains 512 NAs due to rough mapping
statewinner <- dss %>%
tibble::rowid_to_column()%>%
dplyr::select(rowid, identity, value, state) %>% spread(identity, value) %>%
select(-rowid) %>%
group_by(state) %>%
summarise_all(mean, na.rm = T) %>%
mutate(identification = ifelse(ingroup>outgroup, "ingroup", "outgroup")) %>%
select(state, identification) %>% dplyr::rename(region = state)
#head(statewinner)
non_immwinner<- non_immi %>%
tibble::rowid_to_column()%>%
dplyr::select(rowid, identity, value, state) %>% spread(identity, value) %>%
select(-rowid) %>%
group_by(state) %>%
summarise_all(mean, na.rm = T) %>%
mutate(identification = ifelse(ingroup>outgroup, "ingroup", "outgroup")) %>%
select(state, identification) %>% dplyr::rename(region = state)
##calculate immigration percetage
imm.perce<-imm_percent %>%
dplyr::filter(!is.na(state)) %>%
dplyr::count(Immigrant, state) %>%
dplyr::group_by(state) %>%
dplyr::transmute(Immigrant, imm_Percentage=n/sum(n)*100)%>%
dplyr::filter(Immigrant=="Yes") %>%
dplyr::select(state, imm_Percentage) %>% dplyr::rename(region = state)
id_winner<-left_join(states, statewinner, by = "region")
nonimm_winner<-left_join(states, non_immwinner, by = "region")
per_imm<-left_join(states, imm.perce, by = "region")
p <- ggplot(data = id_winner,
aes(x = long, y = lat,
group = group, fill = identification))
p + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)+
ggtitle("statewinner")
###non-immigrant
ni <- ggplot(data = nonimm_winner,
aes(x = long, y = lat,
group = group, fill = identification))
ni + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)+
ggtitle("statewinner of non-immigrant")
###percentage of immigrants
perimm<-ggplot(data = per_imm,
aes(x = long, y=lat,
group = group, fill = imm_Percentage))+
scale_fill_gradient(low="white", high="red")
perimm + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)+
ggtitle("Immigration percentage")
ggplot() + geom_polygon(data = states, aes(x=long, y = lat, group = group), fill = "aliceblue", color = "black")+ coord_fixed(1.3)+
geom_point(data = dss, aes(x= LocationLongitude, y=LocationLatitude, shape=groups, color = value))
#plot intergroup people more as ingroup
ds3<-gp %>%
dplyr::select(CONDITION,LocationLatitude, LocationLongitude, Muslims_3, Chinese_3, Hispanic_3, Russian_3, Irish_3, African_3, Mexican_3, Italian_3) %>%
dplyr::filter(CONDITION=="Experiment") %>%
gather("group", value, Muslims_3:Italian_3)
GG3<-ggplot() + geom_polygon(data = states, aes(x=long, y = lat, group = group), fill = "aliceblue", color = "black")+ coord_fixed(1.3)+
geom_point(data = ds3, aes(x= LocationLongitude, y=LocationLatitude, shape=group, color = value))+
ggtitle("Feel intergroup people are American")
#plot intergroup people more as outgroup
ds4<-gp %>%
dplyr::select(CONDITION,LocationLatitude, LocationLongitude, Muslims_4, Chinese_4, Hispanic_4, Russian_4, Irish_4, African_4, Mexican_4, Italian_4) %>%
dplyr::filter(CONDITION=="Experiment") %>%
gather("group", value, Muslims_4:Italian_4)
GG4<-ggplot() + geom_polygon(data = states, aes(x=long, y = lat, group = group), fill = "aliceblue", color = "black")+ coord_fixed(1.3)+
geom_point(data = ds4, aes(x= LocationLongitude, y=LocationLatitude, shape=group, color = value))+
ggtitle("Feel intergroup people are outgroup")
GG3;GG4