library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(ggplot2)

Introduction

This is an analysis of gender and education data for South Africa. The dataset includes indicators such as literacy rates, school enrollment rates, and gender parity indices.

Data Loading and Cleaning

Load the data

data <- read.csv("gender_zaf.csv")
#Clean and structure the data
clean_data <- data %>%
select(Year, Indicator.Name, Value) %>%
pivot_wider(names_from = Indicator.Name, values_from = Value)
#View the cleaned data
head(clean_data)
## # A tibble: 6 × 266
##   Year       `#indicator+name`    Firms with female top…¹ Firms with female pa…²
##   <chr>      <chr>                <chr>                   <chr>                 
## 1 #date+year #indicator+value+num <NA>                    <NA>                  
## 2 2020       <NA>                 38.5                    10.6                  
## 3 2007       <NA>                 <NA>                    22.6                  
## 4 2021       <NA>                 <NA>                    <NA>                  
## 5 2019       <NA>                 <NA>                    <NA>                  
## 6 2017       <NA>                 <NA>                    <NA>                  
## # ℹ abbreviated names: ¹​`Firms with female top manager (% of firms)`,
## #   ²​`Firms with female participation in ownership (% of firms)`
## # ℹ 262 more variables:
## #   `Literacy rate, youth female (% of females ages 15-24)` <chr>,
## #   `Literacy rate, youth (ages 15-24), gender parity index (GPI)` <chr>,
## #   `Literacy rate, youth male (% of males ages 15-24)` <chr>,
## #   `Literacy rate, adult female (% of females ages 15 and above)` <chr>, …

Youth Literacy Rate Over Time

#Filter data for youth literacy rates
youth_literacy <- clean_data %>%
select(Year, contains("Literacy rate, youth")) %>%
pivot_longer(cols = -Year, names_to = "Indicator", values_to = "Value") %>%
drop_na()

#Plot Youth Literacy Rate Over Time

ggplot(youth_literacy, aes(x = Year, y = Value, color = Indicator)) +
geom_line(linewidth = 1) +
labs(title = "Youth Literacy Rate Over Time",
x = "Year",
y = "Literacy Rate (%)") +
theme_minimal()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Gender Parity Index Over Time

#Filter data for gender parity index
gender_parity <- clean_data %>%
select(Year, contains("gender parity")) %>%
pivot_longer(cols = -Year, names_to = "Indicator", values_to = "Value") %>%
drop_na()
#Plot Gender Parity Index Over Time
ggplot(gender_parity, aes(x = Year, y = Value)) +
geom_line(size = 1) +
labs(title = "Gender Parity Index Over Time",
x = "Year",
y = "Gender Parity Index") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Summary Tables

Recent Literacy Rates

literacy_table <- clean_data %>%
select(Year, contains("Literacy rate")) %>%
arrange(desc(Year)) %>%
head(10)
kable(literacy_table, caption = "Recent Literacy Rates")
Recent Literacy Rates
Year Literacy rate, youth female (% of females ages 15-24) Literacy rate, youth (ages 15-24), gender parity index (GPI) Literacy rate, youth male (% of males ages 15-24) Literacy rate, adult female (% of females ages 15 and above) Literacy rate, adult male (% of males ages 15 and above)
2023 NA NA NA NA NA
2022 NA NA NA NA NA
2021 97 1 96 89 91
2020 NA NA NA NA NA
2019 99 1 98 95 96
2018 NA NA NA NA NA
2017 97 1 94 86 88
2016 97.7195816040039 1.01933002471924 95.8310623168945 91.9364700317383 91.5023574829102
2015 99 1 99 93 95
2014 99 1 99 93 95

Gender Parity Statistics

parity_table <- clean_data %>%
select(Year, contains("gender parity")) %>%
arrange(desc(Year)) %>%
head(10)
kable(parity_table, caption = "Recent Gender Parity Statistics")
Recent Gender Parity Statistics
Year Literacy rate, youth (ages 15-24), gender parity index (GPI) School enrollment, primary (gross), gender parity index (GPI) School enrollment, primary and secondary (gross), gender parity index (GPI) School enrollment, secondary (gross), gender parity index (GPI) School enrollment, tertiary (gross), gender parity index (GPI)
2023 NA NA NA NA NA
2022 NA NA NA NA 1.40590000152588
2021 1 NA NA NA 1.38700997829437
2020 NA 0.959590017795563 1.00926995277405 1.08913004398346 1.36945998668671
2019 1 0.965240001678467 1.00258004665375 1.06217002868652 1.33575999736786
2018 NA 0.961589992046356 1.00301003456116 1.07071995735168 1.32193994522095
2017 1 0.966669976711273 1.01267004013062 1.08571004867554 1.30654001235962
2016 1.01933002471924 0.968039989471436 1.01780998706818 1.09534001350403 1.30541002750397
2015 1 0.933380007743835 0.955309987068176 0.987519979476929 1.30209004878998
2014 1 0.961239993572235 1.01610994338989 1.09686005115509 1.2974499464035

Conclusion

The analysis of trends in youth literacy rates and gender parity indices over time in South Africa.