Investigation of the relationship between children’s knowledge of the word “bottle” and breastfeeding rates? Prediction is a negative correlation.

Specific questions (from email with Lee Saunders):

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.5
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(wordbankr)
library(here)
## here() starts at /Users/mcfrank/Projects/breastfeeding

WHO by-country data.

breastfeeding <- read_csv(here("WHOSIS_000006.csv"))
## Parsed with column specification:
## cols(
##   Country = col_character(),
##   Year = col_character(),
##   `Infants exclusively breastfed for the first six months of life (%)` = col_double()
## )

For simplicity, take the country average, ignoring rate changes across eras. Obviously this should be re-examined.

names(breastfeeding) <- c("country","year","rate")
breastfeeding %<>% 
  group_by(country) %>%
  summarise(rate = mean(rate))

Get cross-linguistic data on bottle.

words <- get_crossling_data("bottle")
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for British Sign Language...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Croatian...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Danish...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for English (American)...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for French (French)...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for French (Quebecois)...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Hebrew...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Italian...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Kiswahili...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Norwegian...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Russian...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Slovak...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Spanish (Mexican)...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Swedish...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Getting data for Turkish...
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric

Country averages, for comprehension.

ggplot(words, 
       aes(x = age, y = comprehension, col = definition)) + 
  geom_point() + 
  facet_wrap(~language) + 
  scale_color_discrete(guide = FALSE)

Production.

ggplot(words, 
       aes(x = age, y = production, col = definition)) + 
  geom_point() + 
  facet_wrap(~language) + 
  scale_color_discrete(guide = FALSE)

Average comprehension and production and merge.

avg_bottle <- words %>%
  group_by(language, definition) %>%
  filter(age > 9, age < 15) %>%
  summarise(comprehension = mean(comprehension), 
            production = mean(production)) %>%
  group_by(language) %>%
  filter(comprehension == max(comprehension)) %>%
  mutate(country = case_when(
    language == "Croatian" ~ "Croatia",
    language == "Danish" ~ "Denmark",
    language == "English (American)" ~ "United States of America", 
    language == "French (French)" ~ "France", 
    language == "French (Quebecois)" ~ "Canada", 
    language == "Hebrew" ~ "Israel", 
    language == "Italian" ~ "Italy", 
    language == "Kiswahili" ~ "Kenya", 
    language == "Norwegian" ~ "Norway", 
    language == "Russian" ~ "Russia", 
    language == "Slovak" ~ "Slovakia", 
    language == "Spanish (Mexican)" ~ "Mexico", 
    language == "Swedish" ~ "Sweden", 
    language == "Turkish" ~ "Turkey")) 
  
bottle_breastfeeding <- inner_join(avg_bottle, breastfeeding)
## Joining, by = "country"

Plot.

bb <- bottle_breastfeeding %>%
  rename(breastfeeding_rate = rate) %>%
  gather(language_measure, language_rate, comprehension, production)

ggplot(bb, 
       aes(x = breastfeeding_rate, y = language_rate, col = country)) +
  geom_point() + 
  geom_smooth(aes(group = 1), method = "lm") + 
  ggrepel::geom_label_repel(aes(label = country), size = 3) + 
  facet_wrap(~language_measure) + 
  xlab("Average Rate of Breastfeeding") + 
  ylab("Average Rate of 'Bottle' 10-14mo") + 
  theme_bw() + 
  scale_color_discrete(guide = FALSE)

A major challenge is that we lost data from a lot of countries, including Russia, France, and Canada. This is likely due to the WHO not having data on these?