d_rw <- read_csv("cogsci_data/rice_wheat_info.csv")
d_demog <- read.csv("cogsci_data/tidy_demog.csv")
d <- read_csv("cogsci_data/tidy_main.csv")
# d_demog

demogs of interest: state_grewup, subjectiveses, overseaexpnum, abroadexp.

Rice Wheat Breakdown

d_rw <- d_rw%>% 
  janitor::clean_names() 
library("pinyin")

mypy <- pydic(method = "toneless") # 载入默认字典

fd_d <- d %>% 
  filter(culture == "CN") %>% 
  filter(task_name == "FD") %>% 
  filter(resp_type == "first_mention_focal") %>% 
  group_by(subject) %>% 
  summarise(
    mean_first_mention = mean(resp)
  )

province_d <- d_grewup.cn %>% 
  select(subject, demog_response) %>%
  rowwise() %>% 
  mutate(demog_resp_py = stringr::str_to_title(as.character(py(char = as.character(demog_response),
                                         sep = "", 
                                         other_replace = NULL, 
                                         dic = mypy)))) %>% 
  mutate(province = case_when(
    demog_resp_py == "Hena" ~ "Henan", 
    demog_resp_py == "Jita" ~ "Others", 
    demog_resp_py == "Haina" ~ "Hainan", 
    demog_resp_py == "Huna" ~ "Hunan", 
    demog_resp_py == "Andong" ~ "Guangdong", 
    demog_resp_py == "Namenggu" ~ "Inner Mongolia",
    demog_resp_py == "Jinghai" ~ "Qinghai", 
    demog_resp_py == "Angxi" ~ "Guangxi",
    demog_response == "山西" ~ "Shanxi", 
    demog_response == "陕西" ~ "Shaanxi", 
    TRUE ~ demog_resp_py
    
  ))
  



fd_d_with_province <- fd_d %>% 
  left_join(province_d, by = "subject") %>% 
  left_join(d_rw, by = "province")

GDP

1996

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = per_capita_gdp1996rmb, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Per Capita GDP 1996 (10kRMB)") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = percapita_gdp1996log, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Per Capita GDP 1996 (log)") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_per_capita_gdp2000rmb, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Per Capita GDP 1996 (RMB)") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

2000

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_per_capita_gdp2000rmb, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Province per Capita GDP 2000 (RMB)") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

2008

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_per_capita_gdp2008rmb, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Province per Capita GDP 2008 (RMB)") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

2012

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_per_capita_gdp2012, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Province per Capita GDP 2012") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

Development

Internet Penetration 2007

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_internet_penetration2007, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Province Internet Penetration 2007") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

HDI 2008

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = hdi2008, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Human Development Index 2008") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

College graduates 1990

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_percent_college_graduates1990, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Province Percent College Graduates 1990") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

College graduates 2015

fd_d_with_province %>% 
  group_by(province) %>% 
  summarise(
    mean_first_mention = mean(mean_first_mention), 
    n = n()
  ) %>% 
  left_join(d_rw, by = "province") %>% 
  mutate(rice_cat_print = case_when(
    rice_cat == 1 ~ "Wheat", 
     rice_cat == 2 ~ "Rice", 
    TRUE ~ "Not Available"
  )) %>% 
  filter(rice_cat_print != "Not Available") %>% 
  ggplot(aes(x = province_percent_college_graduates2015, y = mean_first_mention, 
             size = n, label = province, 
             color = as.factor(rice_cat_print))) +
  geom_point(alpha = 0.3) + 
  geom_text(size = 3, hjust = 0.01, nudge_x = -0.05, nudge_y = 0.03, show.legend = FALSE) + 
  theme_classic() + 
  xlab("Province Percent College Graduates 2015") + 
  ylab("Average Number of First Mention") + 
  labs(size="Number of Participants", colour="Rice/Wheat Regions")

state_grewup

d_grewup.us.ct <- d_grewup.us %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_grewup.us.ct <- d_grewup.us.ct[order(-d_grewup.us.ct$count), c(1,2)] # sort from large to small
sum(d_grewup.us.ct$count)
## [1] 169
d_grewup.us.ct            
## # A tibble: 36 x 2
##    demog_response count
##    <fct>          <int>
##  1 California        72
##  2 Texas             12
##  3 Florida            7
##  4 Illinois           7
##  5 Not applicable     7
##  6 Washington         7
##  7 Colorado           5
##  8 New Jersey         4
##  9 Arizona            3
## 10 Iowa               3
## 11 Michigan           3
## 12 Minnesota          3
## 13 New York           3
## 14 Virginia           3
## 15 Connecticut        2
## 16 Georgia            2
## 17 Maine              2
## 18 Massachusetts      2
## 19 Nebraska           2
## 20 Ohio               2
## 21 Oregon             2
## 22 Pennsylvania       2
## 23 Alabama            1
## 24 Arkansas           1
## 25 Hawaii             1
## 26 Kansas             1
## 27 Kentucky           1
## 28 Louisiana          1
## 29 Maryland           1
## 30 Montana            1
## 31 Nevada             1
## 32 North Carolina     1
## 33 Tennessee          1
## 34 Utah               1
## 35 Vermont            1
## 36 Wisconsin          1
d_grewup.cn.ct <- d_grewup.cn %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_grewup.cn.ct <- d_grewup.cn.ct[order(-d_grewup.cn.ct$count), c(1,2)]
sum(d_grewup.cn.ct$count)
## [1] 167
d_grewup.cn.ct
## # A tibble: 28 x 2
##    demog_response count
##    <fct>          <int>
##  1 山东              21
##  2 北京              20
##  3 广东              13
##  4 江苏              11
##  5 江西               9
##  6 河北               9
##  7 河南               9
##  8 天津               6
##  9 福建               6
## 10 四川               5
## 11 辽宁               5
## 12 黑龙江             5
## 13 其它               4
## 14 内蒙古             4
## 15 安徽               4
## 16 新疆               4
## 17 湖南               4
## 18 贵州               4
## 19 重庆               4
## 20 陕西               4
## 21 吉林               3
## 22 山西               3
## 23 浙江               3
## 24 湖北               3
## 25 上海               1
## 26 广西               1
## 27 海南               1
## 28 青海               1

subjectiveses

d_subses.us.ct <- d_subses.us %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_subses.us.ct <- d_subses.us.ct[order(-d_subses.us.ct$count), c(1,2)]
d_subses.us.ct            
## # A tibble: 10 x 2
##    demog_response count
##    <fct>          <int>
##  1 7                 49
##  2 6                 32
##  3 4                 22
##  4 8                 18
##  5 9                 16
##  6 5                 13
##  7 3                 12
##  8 2                  4
##  9 10                 2
## 10 1                  1
d_subses.cn.ct <- d_subses.cn %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_subses.cn.ct <- d_subses.cn.ct[order(-d_subses.cn.ct$count), c(1,2)]
d_subses.cn.ct
## # A tibble: 9 x 2
##   demog_response count
##   <fct>          <int>
## 1 5                 45
## 2 6                 44
## 3 4                 27
## 4 7                 19
## 5 3                 16
## 6 8                 11
## 7 2                  3
## 8 10                 1
## 9 9                  1

overseaexpnum

d_overseanum.us.ct <- d_overseanum.us %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_overseanum.us.ct <- d_overseanum.us.ct[order(-d_overseanum.us.ct$count), c(1,2)]
d_overseanum.us.ct            
## # A tibble: 5 x 2
##   demog_response            count
##   <fct>                     <int>
## 1 Six or more experiences      81
## 2 Three to five experiences    51
## 3 Two experiences              14
## 4 One experience               12
## 5 No experiences               11
d_overseanum.cn.ct <- d_overseanum.cn %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_overseanum.cn.ct <- d_overseanum.cn.ct[order(-d_overseanum.cn.ct$count), c(1,2)]
d_overseanum.cn.ct
## # A tibble: 5 x 2
##   demog_response     count
##   <fct>              <int>
## 1 没有国际经历         101
## 2 一段国际经历          26
## 3 三到五段国际经历      16
## 4 两段国际经历          14
## 5 六段或更多国际经历    10

abroadexp

d_abroad2y.us.ct <- d_abroad2y.us %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_abroad2y.us.ct <- d_abroad2y.us.ct[order(-d_abroad2y.us.ct$count), c(1,2)]
d_abroad2y.us.ct            
## # A tibble: 2 x 2
##   demog_response count
##   <fct>          <int>
## 1 No               155
## 2 Yes               14
d_abroad2y.cn.ct <- d_abroad2y.cn %>% 
                  group_by(demog_response) %>% 
                  summarize(count=n())
d_abroad2y.cn.ct <- d_abroad2y.cn.ct[order(-d_abroad2y.cn.ct$count), c(1,2)]
d_abroad2y.cn.ct
## # A tibble: 2 x 2
##   demog_response count
##   <fct>          <int>
## 1 否               157
## 2 是                10