GCSE Results by Free School Meal Eligibility

Read in the data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
url <- "2223_national_char_data_provisional.csv"
f<- read_csv(url)
## Rows: 8600 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (40): time_identifier, geographic_level, country_code, country_name, ver...
## dbl  (3): time_period, t_schools, t_pupils
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Stats for total percent of entrants

class(f$pt_entbasics)
## [1] "character"
f$pt_entbasics <- as.numeric(f$pt_entbasics)
## Warning: NAs introduced by coercion
class(f$pt_entbasics)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = pt_entbasics )) +
  geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
  labs(title = "Dot Plot Example", x = "Categories", y = "Values") 
## Warning: Removed 108 rows containing missing values (`geom_point()`).

average_score_percent_total <- aggregate(pt_entbasics ~ free_school_meals, data = f, FUN = mean)

Make the column numeric instead of character

class(f$pt_l2basics_94)
## [1] "character"
f$pt_l2basics_94 <- as.numeric(f$pt_l2basics_94)
## Warning: NAs introduced by coercion
class(f$pt_l2basics_94)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = pt_l2basics_94)) +
  geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
  labs(title = "Dot Plot Example", x = "Categories", y = "Values") 
## Warning: Removed 108 rows containing missing values (`geom_point()`).

Average score for each category of free school meals

average_score <- aggregate(pt_l2basics_94 ~ free_school_meals, data = f, FUN = mean)
average_score
##   free_school_meals pt_l2basics_94
## 1               FSM       50.65844
## 2     FSM all other       69.19001
## 3             Total       60.77055

Stats for total percent of achieving grades 5 or above

class(f$pt_l2basics_95)
## [1] "character"
f$pt_l2basics_95 <- as.numeric(f$pt_l2basics_95)
## Warning: NAs introduced by coercion
class(f$pt_l2basics_95)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = pt_l2basics_95 )) +
  geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
  labs(title = "Dot Plot Example", x = "Categories", y = "Values") 
## Warning: Removed 108 rows containing missing values (`geom_point()`).

average_score_five_above <- aggregate(pt_l2basics_95 ~ free_school_meals, data = f, FUN = mean)

Stats for total percent of entrants

class(f$avg_p8score)
## [1] "character"
f$avg_p8score <- as.numeric(f$avg_p8score)
## Warning: NAs introduced by coercion
class(f$avg_p8score)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = avg_p8score)) +
  geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
  labs(title = "Dot Plot Example", x = "Categories", y = "Values") 
## Warning: Removed 3506 rows containing missing values (`geom_point()`).

average_score_percent_attainment <- aggregate(avg_p8score ~ free_school_meals, data = f, FUN = mean)

Increase in Salary by Qualifications in the Work Force

#Read in the data

salary <- read_csv("data.csv")
## Rows: 119 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Measure, Ethnicity, Gender, Qualification, Qualifications, Value_type
## dbl (3): Time, change in wage, Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Filter the data

# Define the strings you want to exclude
strings_to_exclude <- c("Other qualifications", "NQF Below Level 2")

# Remove rows with specified strings
filtered_salary <- subset(salary, !(Qualifications %in% strings_to_exclude))

Bar graph showing the increase in salary by gender in 2021

ggplot(filtered_salary, aes(Qualifications,`change in wage`, fill = Gender)) +
  geom_bar(stat = 'identity', position = 'dodge') +
  labs(title = "2021 Increase in Wage by Education and Gender", x = "Education Level", y = "Change in Wage") +
  ylim(0,1) +
  theme_classic()

Bar graph showing the increase in salary by ethnicity in 2021

ggplot(filtered_salary, aes(Qualifications,`change in wage`, fill = Ethnicity)) +
  geom_bar(stat = 'identity', position = 'dodge') +
  labs(title = "Increase in Wage by Education and Ethnicity", x = "Education Level", y = "Change in Wage") +
  ylim(0,1) +
  theme_classic()

Box plot showing the increase in salary in 2021

# Box plot
ggplot(filtered_salary, aes(x = Qualifications, y = Value)) +
  geom_boxplot(fill = 'light blue') +
  labs(title = "Box Plot of Salaries by Qualifications", x = "Education Level", y = "Percentage increase of Wages") +
  theme_classic()