d_rw <- read_csv("cogsci_data/rice_wheat_info.csv")
d_demog <- read.csv("cogsci_data/tidy_demog.csv")
d <- read_csv("cogsci_data/tidy_main.csv")
# d_demog
demogs of interest: state_grewup, subjectiveses, overseaexpnum, abroadexp.
d_rw <- d_rw%>%
janitor::clean_names()
library("pinyin")
mypy <- pydic(method = "toneless") # 载入默认字典
fd_d <- d %>%
filter(culture == "CN") %>%
filter(task_name == "FD") %>%
filter(resp_type == "first_mention_focal") %>%
group_by(subject) %>%
summarise(
mean_first_mention = mean(resp)
)
province_d <- d_grewup.cn %>%
select(subject, demog_response) %>%
rowwise() %>%
mutate(demog_resp_py = stringr::str_to_title(as.character(py(char = as.character(demog_response),
sep = "",
other_replace = NULL,
dic = mypy)))) %>%
mutate(province = case_when(
demog_resp_py == "Hena" ~ "Henan",
demog_resp_py == "Jita" ~ "Others",
demog_resp_py == "Haina" ~ "Hainan",
demog_resp_py == "Huna" ~ "Hunan",
demog_resp_py == "Andong" ~ "Guangdong",
demog_resp_py == "Namenggu" ~ "Inner Mongolia",
demog_resp_py == "Jinghai" ~ "Qinghai",
demog_resp_py == "Angxi" ~ "Guangxi",
demog_response == "山西" ~ "Shanxi",
demog_response == "陕西" ~ "Shaanxi",
TRUE ~ demog_resp_py
))
fd_d_with_province <- fd_d %>%
left_join(province_d, by = "subject") %>%
left_join(d_rw, by = "province")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = per_capita_gdp1996rmb, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Per Capita GDP 1996 (10kRMB)") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = percapita_gdp1996log, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Per Capita GDP 1996 (log)") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_per_capita_gdp2000rmb, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Per Capita GDP 1996 (RMB)") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_per_capita_gdp2000rmb, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Province per Capita GDP 2000 (RMB)") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_per_capita_gdp2008rmb, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Province per Capita GDP 2008 (RMB)") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_per_capita_gdp2012, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Province per Capita GDP 2012") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_internet_penetration2007, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Province Internet Penetration 2007") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = hdi2008, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Human Development Index 2008") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_percent_college_graduates1990, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Province Percent College Graduates 1990") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
fd_d_with_province %>%
group_by(province) %>%
summarise(
mean_first_mention = mean(mean_first_mention),
n = n()
) %>%
left_join(d_rw, by = "province") %>%
mutate(rice_cat_print = case_when(
rice_cat == 1 ~ "Wheat",
rice_cat == 2 ~ "Rice",
TRUE ~ "Not Available"
)) %>%
filter(rice_cat_print != "Not Available") %>%
ggplot(aes(x = province_percent_college_graduates2015, y = mean_first_mention,
size = n, label = province,
color = as.factor(rice_cat_print))) +
geom_point(alpha = 0.3) +
geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) +
theme_classic() +
xlab("Province Percent College Graduates 2015") +
ylab("Average Number of First Mention") +
labs(size="Number of Participants", colour="Rice/Wheat Regions")
state_grewup
d_grewup.us.ct <- d_grewup.us %>%
group_by(demog_response) %>%
summarize(count=n())
d_grewup.us.ct <- d_grewup.us.ct[order(-d_grewup.us.ct$count), c(1,2)] # sort from large to small
sum(d_grewup.us.ct$count)
## [1] 169
d_grewup.us.ct
## # A tibble: 36 x 2
## demog_response count
## <fct> <int>
## 1 California 72
## 2 Texas 12
## 3 Florida 7
## 4 Illinois 7
## 5 Not applicable 7
## 6 Washington 7
## 7 Colorado 5
## 8 New Jersey 4
## 9 Arizona 3
## 10 Iowa 3
## 11 Michigan 3
## 12 Minnesota 3
## 13 New York 3
## 14 Virginia 3
## 15 Connecticut 2
## 16 Georgia 2
## 17 Maine 2
## 18 Massachusetts 2
## 19 Nebraska 2
## 20 Ohio 2
## 21 Oregon 2
## 22 Pennsylvania 2
## 23 Alabama 1
## 24 Arkansas 1
## 25 Hawaii 1
## 26 Kansas 1
## 27 Kentucky 1
## 28 Louisiana 1
## 29 Maryland 1
## 30 Montana 1
## 31 Nevada 1
## 32 North Carolina 1
## 33 Tennessee 1
## 34 Utah 1
## 35 Vermont 1
## 36 Wisconsin 1
d_grewup.cn.ct <- d_grewup.cn %>%
group_by(demog_response) %>%
summarize(count=n())
d_grewup.cn.ct <- d_grewup.cn.ct[order(-d_grewup.cn.ct$count), c(1,2)]
sum(d_grewup.cn.ct$count)
## [1] 167
d_grewup.cn.ct
## # A tibble: 28 x 2
## demog_response count
## <fct> <int>
## 1 山东 21
## 2 北京 20
## 3 广东 13
## 4 江苏 11
## 5 江西 9
## 6 河北 9
## 7 河南 9
## 8 天津 6
## 9 福建 6
## 10 四川 5
## 11 辽宁 5
## 12 黑龙江 5
## 13 其它 4
## 14 内蒙古 4
## 15 安徽 4
## 16 新疆 4
## 17 湖南 4
## 18 贵州 4
## 19 重庆 4
## 20 陕西 4
## 21 吉林 3
## 22 山西 3
## 23 浙江 3
## 24 湖北 3
## 25 上海 1
## 26 广西 1
## 27 海南 1
## 28 青海 1
subjectiveses
d_subses.us.ct <- d_subses.us %>%
group_by(demog_response) %>%
summarize(count=n())
d_subses.us.ct <- d_subses.us.ct[order(-d_subses.us.ct$count), c(1,2)]
d_subses.us.ct
## # A tibble: 10 x 2
## demog_response count
## <fct> <int>
## 1 7 49
## 2 6 32
## 3 4 22
## 4 8 18
## 5 9 16
## 6 5 13
## 7 3 12
## 8 2 4
## 9 10 2
## 10 1 1
d_subses.cn.ct <- d_subses.cn %>%
group_by(demog_response) %>%
summarize(count=n())
d_subses.cn.ct <- d_subses.cn.ct[order(-d_subses.cn.ct$count), c(1,2)]
d_subses.cn.ct
## # A tibble: 9 x 2
## demog_response count
## <fct> <int>
## 1 5 45
## 2 6 44
## 3 4 27
## 4 7 19
## 5 3 16
## 6 8 11
## 7 2 3
## 8 10 1
## 9 9 1
overseaexpnum
d_overseanum.us.ct <- d_overseanum.us %>%
group_by(demog_response) %>%
summarize(count=n())
d_overseanum.us.ct <- d_overseanum.us.ct[order(-d_overseanum.us.ct$count), c(1,2)]
d_overseanum.us.ct
## # A tibble: 5 x 2
## demog_response count
## <fct> <int>
## 1 Six or more experiences 81
## 2 Three to five experiences 51
## 3 Two experiences 14
## 4 One experience 12
## 5 No experiences 11
d_overseanum.cn.ct <- d_overseanum.cn %>%
group_by(demog_response) %>%
summarize(count=n())
d_overseanum.cn.ct <- d_overseanum.cn.ct[order(-d_overseanum.cn.ct$count), c(1,2)]
d_overseanum.cn.ct
## # A tibble: 5 x 2
## demog_response count
## <fct> <int>
## 1 没有国际经历 101
## 2 一段国际经历 26
## 3 三到五段国际经历 16
## 4 两段国际经历 14
## 5 六段或更多国际经历 10
abroadexp
d_abroad2y.us.ct <- d_abroad2y.us %>%
group_by(demog_response) %>%
summarize(count=n())
d_abroad2y.us.ct <- d_abroad2y.us.ct[order(-d_abroad2y.us.ct$count), c(1,2)]
d_abroad2y.us.ct
## # A tibble: 2 x 2
## demog_response count
## <fct> <int>
## 1 No 155
## 2 Yes 14
d_abroad2y.cn.ct <- d_abroad2y.cn %>%
group_by(demog_response) %>%
summarize(count=n())
d_abroad2y.cn.ct <- d_abroad2y.cn.ct[order(-d_abroad2y.cn.ct$count), c(1,2)]
d_abroad2y.cn.ct
## # A tibble: 2 x 2
## demog_response count
## <fct> <int>
## 1 否 157
## 2 是 10