library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(ggplot2)
This is an analysis of gender and education data for South Africa. The dataset includes indicators such as literacy rates, school enrollment rates, and gender parity indices.
data <- read.csv("gender_zaf.csv")
#Clean and structure the data
clean_data <- data %>%
select(Year, Indicator.Name, Value) %>%
pivot_wider(names_from = Indicator.Name, values_from = Value)
#View the cleaned data
head(clean_data)
## # A tibble: 6 × 266
## Year `#indicator+name` Firms with female top…¹ Firms with female pa…²
## <chr> <chr> <chr> <chr>
## 1 #date+year #indicator+value+num <NA> <NA>
## 2 2020 <NA> 38.5 10.6
## 3 2007 <NA> <NA> 22.6
## 4 2021 <NA> <NA> <NA>
## 5 2019 <NA> <NA> <NA>
## 6 2017 <NA> <NA> <NA>
## # ℹ abbreviated names: ¹​`Firms with female top manager (% of firms)`,
## # ²​`Firms with female participation in ownership (% of firms)`
## # ℹ 262 more variables:
## # `Literacy rate, youth female (% of females ages 15-24)` <chr>,
## # `Literacy rate, youth (ages 15-24), gender parity index (GPI)` <chr>,
## # `Literacy rate, youth male (% of males ages 15-24)` <chr>,
## # `Literacy rate, adult female (% of females ages 15 and above)` <chr>, …
#Filter data for youth literacy rates
youth_literacy <- clean_data %>%
select(Year, contains("Literacy rate, youth")) %>%
pivot_longer(cols = -Year, names_to = "Indicator", values_to = "Value") %>%
drop_na()
#Plot Youth Literacy Rate Over Time
ggplot(youth_literacy, aes(x = Year, y = Value, color = Indicator)) +
geom_line(linewidth = 1) +
labs(title = "Youth Literacy Rate Over Time",
x = "Year",
y = "Literacy Rate (%)") +
theme_minimal()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
#Filter data for gender parity index
gender_parity <- clean_data %>%
select(Year, contains("gender parity")) %>%
pivot_longer(cols = -Year, names_to = "Indicator", values_to = "Value") %>%
drop_na()
#Plot Gender Parity Index Over Time
ggplot(gender_parity, aes(x = Year, y = Value)) +
geom_line(size = 1) +
labs(title = "Gender Parity Index Over Time",
x = "Year",
y = "Gender Parity Index") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
literacy_table <- clean_data %>%
select(Year, contains("Literacy rate")) %>%
arrange(desc(Year)) %>%
head(10)
kable(literacy_table, caption = "Recent Literacy Rates")
Year | Literacy rate, youth female (% of females ages 15-24) | Literacy rate, youth (ages 15-24), gender parity index (GPI) | Literacy rate, youth male (% of males ages 15-24) | Literacy rate, adult female (% of females ages 15 and above) | Literacy rate, adult male (% of males ages 15 and above) |
---|---|---|---|---|---|
2023 | NA | NA | NA | NA | NA |
2022 | NA | NA | NA | NA | NA |
2021 | 97 | 1 | 96 | 89 | 91 |
2020 | NA | NA | NA | NA | NA |
2019 | 99 | 1 | 98 | 95 | 96 |
2018 | NA | NA | NA | NA | NA |
2017 | 97 | 1 | 94 | 86 | 88 |
2016 | 97.7195816040039 | 1.01933002471924 | 95.8310623168945 | 91.9364700317383 | 91.5023574829102 |
2015 | 99 | 1 | 99 | 93 | 95 |
2014 | 99 | 1 | 99 | 93 | 95 |
parity_table <- clean_data %>%
select(Year, contains("gender parity")) %>%
arrange(desc(Year)) %>%
head(10)
kable(parity_table, caption = "Recent Gender Parity Statistics")
Year | Literacy rate, youth (ages 15-24), gender parity index (GPI) | School enrollment, primary (gross), gender parity index (GPI) | School enrollment, primary and secondary (gross), gender parity index (GPI) | School enrollment, secondary (gross), gender parity index (GPI) | School enrollment, tertiary (gross), gender parity index (GPI) |
---|---|---|---|---|---|
2023 | NA | NA | NA | NA | NA |
2022 | NA | NA | NA | NA | 1.40590000152588 |
2021 | 1 | NA | NA | NA | 1.38700997829437 |
2020 | NA | 0.959590017795563 | 1.00926995277405 | 1.08913004398346 | 1.36945998668671 |
2019 | 1 | 0.965240001678467 | 1.00258004665375 | 1.06217002868652 | 1.33575999736786 |
2018 | NA | 0.961589992046356 | 1.00301003456116 | 1.07071995735168 | 1.32193994522095 |
2017 | 1 | 0.966669976711273 | 1.01267004013062 | 1.08571004867554 | 1.30654001235962 |
2016 | 1.01933002471924 | 0.968039989471436 | 1.01780998706818 | 1.09534001350403 | 1.30541002750397 |
2015 | 1 | 0.933380007743835 | 0.955309987068176 | 0.987519979476929 | 1.30209004878998 |
2014 | 1 | 0.961239993572235 | 1.01610994338989 | 1.09686005115509 | 1.2974499464035 |
The analysis of trends in youth literacy rates and gender parity indices over time in South Africa.