GCSE Results by Free School Meal Eligibility
Read in the data
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
url <- "2223_national_char_data_provisional.csv"
f<- read_csv(url)
## Rows: 8600 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (40): time_identifier, geographic_level, country_code, country_name, ver...
## dbl (3): time_period, t_schools, t_pupils
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Stats for total percent of entrants
class(f$pt_entbasics)
## [1] "character"
f$pt_entbasics <- as.numeric(f$pt_entbasics)
## Warning: NAs introduced by coercion
class(f$pt_entbasics)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = pt_entbasics )) +
geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
labs(title = "Dot Plot Example", x = "Categories", y = "Values")
## Warning: Removed 108 rows containing missing values (`geom_point()`).

average_score_percent_total <- aggregate(pt_entbasics ~ free_school_meals, data = f, FUN = mean)
Make the column numeric instead of character
class(f$pt_l2basics_94)
## [1] "character"
f$pt_l2basics_94 <- as.numeric(f$pt_l2basics_94)
## Warning: NAs introduced by coercion
class(f$pt_l2basics_94)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = pt_l2basics_94)) +
geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
labs(title = "Dot Plot Example", x = "Categories", y = "Values")
## Warning: Removed 108 rows containing missing values (`geom_point()`).

Average score for each category of free school meals
average_score <- aggregate(pt_l2basics_94 ~ free_school_meals, data = f, FUN = mean)
average_score
## free_school_meals pt_l2basics_94
## 1 FSM 50.65844
## 2 FSM all other 69.19001
## 3 Total 60.77055
Stats for total percent of achieving grades 5 or above
class(f$pt_l2basics_95)
## [1] "character"
f$pt_l2basics_95 <- as.numeric(f$pt_l2basics_95)
## Warning: NAs introduced by coercion
class(f$pt_l2basics_95)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = pt_l2basics_95 )) +
geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
labs(title = "Dot Plot Example", x = "Categories", y = "Values")
## Warning: Removed 108 rows containing missing values (`geom_point()`).

average_score_five_above <- aggregate(pt_l2basics_95 ~ free_school_meals, data = f, FUN = mean)
Stats for total percent of entrants
class(f$avg_p8score)
## [1] "character"
f$avg_p8score <- as.numeric(f$avg_p8score)
## Warning: NAs introduced by coercion
class(f$avg_p8score)
## [1] "numeric"
ggplot(f, aes(x = free_school_meals, y = avg_p8score)) +
geom_point(position = position_jitter(width = 0.2), color = "blue", alpha = 0.7) +
labs(title = "Dot Plot Example", x = "Categories", y = "Values")
## Warning: Removed 3506 rows containing missing values (`geom_point()`).

average_score_percent_attainment <- aggregate(avg_p8score ~ free_school_meals, data = f, FUN = mean)
Increase in Salary by Qualifications in the Work Force
#Read in the data
salary <- read_csv("data.csv")
## Rows: 119 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Measure, Ethnicity, Gender, Qualification, Qualifications, Value_type
## dbl (3): Time, change in wage, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Filter the data
# Define the strings you want to exclude
strings_to_exclude <- c("Other qualifications", "NQF Below Level 2")
# Remove rows with specified strings
filtered_salary <- subset(salary, !(Qualifications %in% strings_to_exclude))
Bar graph showing the increase in salary by gender in 2021
ggplot(filtered_salary, aes(Qualifications,`change in wage`, fill = Gender)) +
geom_bar(stat = 'identity', position = 'dodge') +
labs(title = "2021 Increase in Wage by Education and Gender", x = "Education Level", y = "Change in Wage") +
ylim(0,1) +
theme_classic()

Bar graph showing the increase in salary by ethnicity in 2021
ggplot(filtered_salary, aes(Qualifications,`change in wage`, fill = Ethnicity)) +
geom_bar(stat = 'identity', position = 'dodge') +
labs(title = "Increase in Wage by Education and Ethnicity", x = "Education Level", y = "Change in Wage") +
ylim(0,1) +
theme_classic()

Box plot showing the increase in salary in 2021
# Box plot
ggplot(filtered_salary, aes(x = Qualifications, y = Value)) +
geom_boxplot(fill = 'light blue') +
labs(title = "Box Plot of Salaries by Qualifications", x = "Education Level", y = "Percentage increase of Wages") +
theme_classic()
