jittered dot plot

Credits: Cedric Schere

Reference: https://www.cedricscherer.com/2019/05/17/the-evolution-of-a-ggplot-ep.-1/#aim

library(tidyverse)
#library(ggsci)
#library(showtext)

#font_add_google("Poppins", "Poppins")
#font_add_google("Roboto Mono", "Roboto Mono")
#showtext_auto()

df_students <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")

df_world_tile <- readr::read_csv("https://gist.githubusercontent.com/maartenzam/787498bbc07ae06b637447dbd430ea0a/raw/9a9dafafb44d8990f85243a9c7ca349acd3a0d07/worldtilegrid.csv") %>% 
  mutate(
    ## Namibias two-digit country code is handled as `NA` - let us fix that
    alpha.2 = if_else(name == "Namibia", "NA", alpha.2),
    ## We are going to split "Americas" into "North America" and "Sout America"
    region = if_else(region == "Americas", sub.region, region),
    region = if_else(region %in% c("Northern America", "Central America", "Caribbean"), 
                     "North America", region),
    region = if_else(region == "Southern America", "South America", region),
    ## to join both data sets, we need a id column
    country_code = alpha.3
  )

df_ratios <- df_students %>% 
  ## Let's keep only the most recent data per country
  group_by(country, indicator) %>% 
  filter(year == max(year)) %>% 
  ungroup() %>% 
  # Create `NA`s for countries which do not have any data 2012-2018
  complete(indicator, nesting(country, country_code)) %>% 
  ## Let's focus on primary education and keep only countries (coded by letters)
  filter(
    indicator == "Primary Education",
    str_detect(country_code, "[A-Z]")
  ) %>% 
  ## merge with world tile map data
  full_join(df_world_tile) %>%
  filter(
    !is.na(region),
    !is.na(indicator)
  ) %>% 
  group_by(region) %>% 
  mutate(student_ratio_region = median(student_ratio, na.rm = T)) %>% 
  ungroup()

#arrange the box plots or any other type such as bars or violins in an in- or decreasing order to #simplify readability. Since the category “continent” does not have an intrinsic order, I rearrange #the box plots by their mean student-teacher ratio instead of sorting them alphabetically which is #the default:
library(ggsci)
theme_set(theme_light(base_size = 18, base_family = "Arial Black"))
  
df_sorted <- df_ratios %>% 
  mutate(region=fct_reorder(region, -student_ratio_region))


g <- ggplot(df_sorted, aes(y=region, x=student_ratio, color=region)) + 
  scale_x_continuous(limits = c(0,90), expand = c(0.007, 0.005)) +
  scale_color_aaas() + 
  labs(x= "Student Teacher Ratio", y= NULL) +
  theme(legend.position = "none", 
        axis.title = element_text(size = rel(0.8)),
        axis.text = element_text(size=16, family = "Cochin"),
        panel.grid = element_blank())

set.seed(2019)
world_avg <- df_sorted%>%summarize(avg=mean(student_ratio, na.rm=TRUE))%>%
  pull(avg)

g_text <- g +   geom_jitter(size=2, height = 0.2, alpha=0.25) +
  stat_summary(fun = mean, geom="point", size = 5) +
  geom_vline(aes(xintercept=mean(student_ratio, na.rm=TRUE))) +
#  geom_point(aes(y=region, x=student_ratio_region, size=5)) +
geom_segment(aes(x=world_avg, xend=student_ratio_region, y=region, yend=region), size=0.8) +
  
  annotate(geom = "text", y =6.3, x=35, color="brown", family="Cochin", label=glue::glue("Worldwide Average:\n{round(world_avg,1)} students per teacher ratio"), lineheight = .9) +
  
  annotate(geom = "text", y =3.5, x=10.5, color="brown", family="Cochin", label=glue::glue("Continental Aveerage")) +
  
  annotate(geom = "text", y =1.7, x=65, color="brown", family="Cochin", label=glue::glue("The Central African Republic has the \nhighest student per teacher ratio "))

arrows <-
  tibble(
    x1 = c(6.2, 3.5, 1.7),
    x2 = c(5.6, 4, 1.1),
    y1 = c(35, 10, 73),
    y2 = c(world_avg, 19.4, 83)
  )

g_text +
  geom_curve(
    data = arrows, aes(y = x1, x = y1, yend = x2, xend = y2),
    arrow = arrow(length = unit(0.07, "inch")), size = 0.4,
    color = "gray20", curvature = -0.3
  )

ggsave(filename="jitterdot.png", 
       width = 12,
       height=9,
#       device = cairo_pdf
)

jittered dot plot

Richard Acquah-Sarpong

2022-01-27