Tidy Tuesday for Student Teacher Ratios

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(janitor))

raw_df <- read.csv(url("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/EDULIT_DS_06052019101747206.csv"))

###janitor's clean_names() function removes weird text from column names, makes lower-case, standardizes punctuation... 
clean_ed <- raw_df %>% janitor::clean_names() 
colnames(raw_df); colnames(clean_ed)

## [1] "ï..EDULIT_IND" "Indicator"     "LOCATION"      "Country"      
## [5] "TIME"          "Time"          "Value"         "Flag.Codes"   
## [9] "Flags"

## [1] "i_edulit_ind" "indicator"    "location"     "country"     
## [5] "time"         "time_2"       "value"        "flag_codes"  
## [9] "flags"

###stringr cleans up values, in this case, for the variable "indicator", it removes extra text and white spaces, then capitalizes each word 
clean_ed <- raw_df %>% 
  janitor::clean_names() %>% 
  mutate(indicator = str_remove(indicator, "Pupil-teacher ratio in"),
         indicator = str_remove(indicator, "(headcount basis)"),
         indicator = str_remove(indicator, "\\(\\)"),
         indicator = str_trim(indicator),
         indicator = stringr::str_to_title(indicator)) %>% 
  select(-time_2) %>% #remove time_2 variable
  rename("country_code" = location,
         "student_ratio" = value,
         "year" = time)

head(raw_df); head(clean_ed)

clean_ed <- as.data.frame(clean_ed)
library(countrycode)
clean_ed$continent <- countrycode(sourcevar = clean_ed[, "country"],
                            origin = "country.name",
                            destination = "continent")
table(clean_ed$continent)

## 
##   Africa Americas     Asia   Europe  Oceania 
##      887      826     1129     1154      168

africa <- clean_ed %>% filter(continent == 'Africa')
americas <- clean_ed %>% filter(continent == 'Americas')
asia <- clean_ed %>% filter(continent == 'Asia')
europe <- clean_ed %>% filter(continent == 'Europe')
oceania <- clean_ed %>% filter(continent == 'Oceania')

ggplot(africa, aes(year, student_ratio, col = indicator)) + 
  geom_point() +
  geom_line(aes(group = indicator)) + 
  facet_wrap(vars(country)) +
  theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylim(0,50)

ggplot(americas, aes(year, student_ratio, col = indicator)) + 
  geom_point() +
  geom_line(aes(group = indicator)) + 
  facet_wrap(vars(country)) +
  theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylim(0,50)

ggplot(asia, aes(year, student_ratio, col = indicator)) + 
  geom_point() +
  geom_line(aes(group = indicator)) + 
  facet_wrap(vars(country)) +
  theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylim(0,50)

ggplot(europe, aes(year, student_ratio, col = indicator)) + 
  geom_point() +
  geom_line(aes(group = indicator)) + 
  facet_wrap(vars(country)) +
  theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylim(0,50)

ggplot(oceania, aes(year, student_ratio, col = indicator)) + 
  geom_point() +
  geom_line(aes(group = indicator)) + 
  facet_wrap(vars(country)) +
  theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
  ylim(0,50)