Prosox

log_india <- read.csv(here("data/original", "india_master.csv")) %>% 
  select(id, grade) %>%
  rename(subid = id) %>%
  mutate(age = grade + 5) %>%
  select(-grade)

log_korea <- read.csv(here("data/original", "korea.csv")) %>% 
  distinct(subid, .keep_all=T) %>%
  select(subid, age)

log_iran <- read.csv(here("data/original", "iran.csv")) %>%
  distinct(subid, .keep_all=T) %>%
  select(subid, age)

log_canada <- read.csv(here("data/original", "Canada with exact age.csv")) %>%
  rename(subid = sub_id,
         age = Age) %>%
  distinct(subid, .keep_all=T) %>%
  select(subid, age)

log_all <- rbind(log_india, log_korea, log_iran, log_canada)
  
d <- left_join(d, log_all)

## Joining, by = "subid"

## Warning: Column `subid` joining factors with different levels, coercing to
## character vector

summary(d)

##     subid                agebin               task               item     
##  Length:9519        grade3-4:3919   moral       :4080   namecall   :1360  
##  Class :character   grade5-6:4144   conventional:5439   push       :1360  
##  Mode  :character   grade8-9:1456                       rip        :1360  
##                                                         shoes      :1359  
##                                                         swim       :1360  
##                                                         teachername:1360  
##                                                         toy        :1360  
##       q_kind         answer           site           age        
##  base_q  :2379   Min.   :0.0000   Iran  :2184   Min.   : 5.000  
##  faraway :2380   1st Qu.:0.0000   India :4144   1st Qu.: 8.000  
##  no rule :2380   Median :0.0000   Korea : 952   Median : 9.680  
##  everyone:2380   Mean   :0.5357   Canada:2239   Mean   : 9.541  
##                  3rd Qu.:1.0000                 3rd Qu.:11.000  
##                  Max.   :6.0000                 Max.   :14.000  
##                  NA's   :578                    NA's   :252

d %>%
  group_by(site, agebin, subid) %>%
  summarise(n=n()) %>%
  group_by(site, agebin) %>%
  summarise(n=n())

## # A tibble: 9 x 3
## # Groups:   site [?]
##     site   agebin     n
##   <fctr>   <fctr> <int>
## 1   Iran grade3-4    57
## 2   Iran grade5-6    21
## 3  India grade3-4    17
## 4  India grade5-6    78
## 5  India grade8-9    52
## 6  Korea grade3-4    20
## 7  Korea grade5-6    14
## 8 Canada grade3-4    46
## 9 Canada grade5-6    34

ggplot(data=d %>%
         group_by(site, agebin, subid) %>%
         summarise(n = n()) %>%
         group_by(site, agebin) %>%
         summarise(number = n()) %>%
         filter(!is.na(agebin)), 
       
       aes(x=site, y=number, fill=agebin)) +
  geom_bar(position="dodge", stat="identity") +
  theme_few() +
  scale_fill_ptol() +
  ylab("number of participants")

# how bad is ~?
d %>% 
  filter(q_kind == "base_q", !is.na(answer)) %>%
  group_by(site, agebin, task) %>%
  multi_boot_standard(col="answer") %>%
  ggplot(., aes(x=agebin, y=mean, fill=task, col=task, group=task)) +
  # geom_bar(position="dodge", stat="identity") +
  # geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper), position="dodge") +
  geom_line() +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
               position = position_dodge(width = .1)) +
  geom_hline(yintercept = 3, lty = 2) +
  facet_grid(.~site) + 
  theme_few() +
  scale_color_ptol()

d %>%
  mutate(age_group = floor(age)) %>%
  filter(q_kind == "base_q") %>%
  group_by(site, age_group, task, subid, item) %>%
  summarise(answer = mean(answer)) %>%
  group_by(site, age_group, task) %>%
  multi_boot_standard(col="answer", na.rm=T) %>%
  ggplot(., aes(x=age_group, y=mean, col=task)) +
  geom_line() +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
               position = position_dodge(width = .1)) +
  facet_grid(.~site) +
  theme_few() +
  scale_color_ptol()

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 8 rows containing missing values (geom_linerange).

ggplot(filter(d, q_kind=="base_q", !is.na(age), answer!="0"), 
       aes(x=age, y=answer, col=task)) +
  geom_jitter(width=.3, alpha=.3)+
  facet_grid(.~site) +
  geom_smooth() +
  theme_few() +
  scale_color_ptol()+
  geom_hline(yintercept =3, lty=2)

## `geom_smooth()` using method = 'loess'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 9.98

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 3.02

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.9374e-16

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 9.1204

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 9.98

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 3.02

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 2.9374e-16

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 9.1204

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 9.98

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 3.02

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.5395e-15

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 9.1204

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 9.98

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 3.02

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 3.5395e-15

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 9.1204

summary(lmer(data=d %>%
               mutate(age = scale(age)), 
             answer ~ site * task * age + (task | subid) + (site + age | item)))

## Linear mixed model fit by REML ['lmerMod']
## Formula: answer ~ site * task * age + (task | subid) + (site + age | item)
##    Data: d %>% mutate(age = scale(age))
## 
## REML criterion at convergence: 21707.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5180 -0.6098 -0.4168  0.5457  6.7819 
## 
## Random effects:
##  Groups   Name             Variance Std.Dev. Corr                   
##  subid    (Intercept)      0.010730 0.10359                         
##           taskconventional 0.004604 0.06785  1.00                   
##  item     (Intercept)      0.004443 0.06666                         
##           siteIndia        0.002669 0.05166   0.33                  
##           siteKorea        0.002104 0.04587   0.91 -0.08            
##           siteCanada       0.005980 0.07733   0.76 -0.36  0.96      
##           age              0.001496 0.03868   1.00  0.24  0.95  0.82
##  Residual                  0.684548 0.82737                         
## Number of obs: 8717, groups:  subid, 312; item, 7
## 
## Fixed effects:
##                                  Estimate Std. Error t value
## (Intercept)                      0.422298   0.059507   7.097
## siteIndia                        0.001202   0.069248   0.017
## siteKorea                       -0.012397   0.074030  -0.167
## siteCanada                       0.157479   0.076988   2.045
## taskconventional                 0.071390   0.075934   0.940
## age                              0.067980   0.050018   1.359
## siteIndia:taskconventional       0.104241   0.087010   1.198
## siteKorea:taskconventional       0.121292   0.093227   1.301
## siteCanada:taskconventional      0.145702   0.097722   1.491
## siteIndia:age                   -0.060651   0.058419  -1.038
## siteKorea:age                    0.010045   0.117246   0.086
## siteCanada:age                  -0.047122   0.063888  -0.738
## taskconventional:age             0.035911   0.062923   0.571
## siteIndia:taskconventional:age  -0.027893   0.072498  -0.385
## siteKorea:taskconventional:age   0.091923   0.146802   0.626
## siteCanada:taskconventional:age -0.033484   0.079313  -0.422

## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##   vcov(x)     if you need it

# how bad is ~?
d %>% 
  filter(q_kind == "base_q", !is.na(answer)) %>%
  group_by(site, agebin, task, item) %>%
  multi_boot_standard(col="answer") %>%
  ungroup() %>%
  arrange(site, agebin, mean) %>%
  mutate(order = row_number()) %>%
  ggplot(., aes(x=order, y=mean, col=task, label=item)) +
  # geom_bar(position="dodge", stat="identity") +
  geom_text(size=3) +
  geom_linerange(aes(ymin=ci_lower,ymax=ci_upper), position="dodge", alpha=.3) +
  facet_wrap(site~agebin, ncol=4, scales="free") + 
  theme_few() +
  scale_color_ptol() +
  xlab("items") +
  ylab("mean rating (1=really bad)") +
  theme(legend.position = "bottom",
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

## Warning: Width not defined. Set with `position_dodge(width = ?)`

# would it be okay if ~?
d %>%
  filter(q_kind != "base_q", !is.na(answer)) %>%
  mutate(q_kind = fct_relevel(q_kind, "faraway", "no rule", "everyone")) %>%
  group_by(site, agebin, task, q_kind) %>%
  multi_boot_standard(col="answer") %>%
  ggplot(., aes(x=site, y=mean, fill=task)) +
  geom_bar(position="dodge", stat="identity") +
  geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper), position="dodge") +
  facet_grid(q_kind~agebin) + 
  theme_few() +
  scale_fill_ptol() +
  ylab("mean proportion saying okay \ngiven new circumstances")

# would it be okay if ~?
d %>%
  filter(q_kind != "base_q", !is.na(answer)) %>%
  mutate(q_kind = fct_relevel(q_kind, "faraway", "no rule", "everyone"),
         age_group = floor(age)) %>%
  group_by(site, age_group, task, q_kind) %>%
  multi_boot_standard(col="answer") %>%
  ggplot(., aes(x=age_group, y=mean, col=task)) +
  geom_line() +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
               position = position_dodge(width = .1)) +
  facet_grid(q_kind~site) +
  theme_few() +
  scale_color_ptol() +
  ylab("mean proportion saying okay \ngiven new circumstances")

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_linerange).

d %>% 
  filter(q_kind == "faraway", !is.na(answer)) %>%
  group_by(site, agebin, task, item) %>%
  multi_boot_standard(col="answer") %>%
  ungroup() %>%
  arrange(site, agebin, mean) %>%
  mutate(order = row_number()) %>%
  ggplot(., aes(x=order, y=mean, col=task, label=item)) +
  # geom_bar(position="dodge", stat="identity") +
  geom_text(size=3) +
  geom_linerange(aes(ymin=ci_lower,ymax=ci_upper), position="dodge", alpha=.3) +
  facet_wrap(site~agebin, ncol=4, scales="free") + 
  theme_few() +
  scale_color_ptol() +
  xlab("items") +
  ylab("mean proportion saying okay \nin faraway country") +
  theme(legend.position = "bottom",
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

## Warning: Width not defined. Set with `position_dodge(width = ?)`

d %>% 
  filter(q_kind == "no rule", !is.na(answer)) %>%
  group_by(site, agebin, task, item) %>%
  multi_boot_standard(col="answer") %>%
  ungroup() %>%
  arrange(site, agebin, mean) %>%
  mutate(order = row_number()) %>%
  ggplot(., aes(x=order, y=mean, col=task, label=item)) +
  # geom_bar(position="dodge", stat="identity") +
  geom_text(size=3) +
  geom_linerange(aes(ymin=ci_lower,ymax=ci_upper), position="dodge", alpha=.3) +
  facet_wrap(site~agebin, ncol=4, scales="free") + 
  theme_few() +
  scale_color_ptol() +
  xlab("items") +
  ylab("mean proportion saying okay \nif there's no rule") +
  theme(legend.position = "bottom",
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

## Warning: Width not defined. Set with `position_dodge(width = ?)`

d %>% 
  filter(q_kind == "everyone", !is.na(answer)) %>%
  group_by(site, agebin, task, item) %>%
  multi_boot_standard(col="answer") %>%
  ungroup() %>%
  arrange(site, agebin, mean) %>%
  mutate(order = row_number()) %>%
  ggplot(., aes(x=order, y=mean, col=task, label=item)) +
  # geom_bar(position="dodge", stat="identity") +
  geom_text(size=3) +
  geom_linerange(aes(ymin=ci_lower,ymax=ci_upper), position="dodge", alpha=.3) +
  facet_wrap(site~agebin, ncol=4, scales="free") + 
  theme_few() +
  scale_color_ptol() +
  xlab("items") +
  ylab("mean proportion saying okay \nif everyone did it") +
  theme(legend.position = "bottom",
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

## Warning: Width not defined. Set with `position_dodge(width = ?)`

Prosox

Erica Yoon

4/10/2018