suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(janitor))
raw_df <- read.csv(url("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/EDULIT_DS_06052019101747206.csv"))
###janitor's clean_names() function removes weird text from column names, makes lower-case, standardizes punctuation...
clean_ed <- raw_df %>% janitor::clean_names()
colnames(raw_df); colnames(clean_ed)
## [1] "ï..EDULIT_IND" "Indicator" "LOCATION" "Country"
## [5] "TIME" "Time" "Value" "Flag.Codes"
## [9] "Flags"
## [1] "i_edulit_ind" "indicator" "location" "country"
## [5] "time" "time_2" "value" "flag_codes"
## [9] "flags"
###stringr cleans up values, in this case, for the variable "indicator", it removes extra text and white spaces, then capitalizes each word
clean_ed <- raw_df %>%
janitor::clean_names() %>%
mutate(indicator = str_remove(indicator, "Pupil-teacher ratio in"),
indicator = str_remove(indicator, "(headcount basis)"),
indicator = str_remove(indicator, "\\(\\)"),
indicator = str_trim(indicator),
indicator = stringr::str_to_title(indicator)) %>%
select(-time_2) %>% #remove time_2 variable
rename("country_code" = location,
"student_ratio" = value,
"year" = time)
head(raw_df); head(clean_ed)
clean_ed <- as.data.frame(clean_ed)
library(countrycode)
clean_ed$continent <- countrycode(sourcevar = clean_ed[, "country"],
origin = "country.name",
destination = "continent")
table(clean_ed$continent)
##
## Africa Americas Asia Europe Oceania
## 887 826 1129 1154 168
africa <- clean_ed %>% filter(continent == 'Africa')
americas <- clean_ed %>% filter(continent == 'Americas')
asia <- clean_ed %>% filter(continent == 'Asia')
europe <- clean_ed %>% filter(continent == 'Europe')
oceania <- clean_ed %>% filter(continent == 'Oceania')
ggplot(africa, aes(year, student_ratio, col = indicator)) +
geom_point() +
geom_line(aes(group = indicator)) +
facet_wrap(vars(country)) +
theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
ylim(0,50)

ggplot(americas, aes(year, student_ratio, col = indicator)) +
geom_point() +
geom_line(aes(group = indicator)) +
facet_wrap(vars(country)) +
theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
ylim(0,50)

ggplot(asia, aes(year, student_ratio, col = indicator)) +
geom_point() +
geom_line(aes(group = indicator)) +
facet_wrap(vars(country)) +
theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
ylim(0,50)

ggplot(europe, aes(year, student_ratio, col = indicator)) +
geom_point() +
geom_line(aes(group = indicator)) +
facet_wrap(vars(country)) +
theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
ylim(0,50)

ggplot(oceania, aes(year, student_ratio, col = indicator)) +
geom_point() +
geom_line(aes(group = indicator)) +
facet_wrap(vars(country)) +
theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1)) +
ylim(0,50)
