This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
install.packages("FSA")
library(readxl)
library(tidyverse)
library(psych)
library(ggplot2)
library(effects)
library(viridis)
library(car)
library(stats)
library(FSA)
library(scales)
#uploading dataset
library(readxl)
dataAssignment2 <- read_excel("~/Library/Mobile Documents/com~apple~CloudDocs/UNI/courses/year two/quan/QRMSEMINARS/assignemnt 2/dataAssignment2.xlsx")
View(dataAssignment2)
#rename dataset
data <- dataAssignment2
# check structure of dataset
str(data)
#correct variable types
#correct types of variables
data$laName <- as.factor(data$laName)
data$timePeriod <- as.factor(data$timePeriod)
data$regionName <- as.factor(data$regionName)
data$sex <- as.factor(data$sex)
data$ethnicityMajor <- as.factor(data$ethnicityMajor)
data$ethnicityMinor <- as.factor(data$ethnicityMinor)
data$fsmStatus <- as.factor(data$fsmStatus)
data$yearGroup <- as.factor(data$yearGroup)
data$percentTarget <- as.numeric(data$percentTarget)
data$percentFSM <- as.numeric(data$percentFSM)
#part 1: overall England
#1) average percent of students that met phonics attainment target
mean(data$percentTarget, na.rm=TRUE)
[1] 77.55901
#2)
median(data$percentTarget, na.rm = TRUE)
[1] 80
#3)
summary(data$percentTarget)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 70.00 80.00 77.56 87.00 100.00 3428
#5) creating boxplot: by region
pal_fill <- c("East Midlands"="#A24BEA", "East of England"="#1F78B4", "London"="#FF69B4", "North East"="#FFFF00", "North West"="#008000", "South East"="#ADD8E6", "South West" = "#FFA500", "West Midlands" = "#FF0000", "Yorkshire and The Humber"="#964B00")
boxplot_regs <- ggplot(data, aes(x= regionName, y = percentTarget, fill= regionName)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.9,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England's Phonics Attainment by Region",
x = "Region",
y= "Phonics Attainment") +
theme_minimal(base_size = 6)
print(boxplot_regs)
# Part 1: 6) histogram: local authorities' medians of phonics attainment
#6.1 robust single number per LA: median percentTarget
LA_att <- data %>%
group_by(laName) %>%
summarise(LA_median_percentTarget = median(percentTarget, na.rm = TRUE),
.groups = "drop")
#6.2 National central tendency and spread across LAs
national_median <- median(LA_att$LA_median_percentTarget, na.rm = TRUE) # vertical reference line
LA_q25 <- quantile(LA_att$LA_median_percentTarget, probs = 0.25, na.rm = TRUE) # IQR lowerbound
LA_q75 <- quantile(LA_att$LA_median_percentTarget, probs = 0.75, na.rm = TRUE) # IQR upper bound
#6.3 Histogram with vertical median line and shaded IQR band
histogram <- ggplot(LA_att, aes(x = LA_median_percentTarget)) +
geom_histogram(bins = 20, fill = "#A24BEA", color = "white", alpha = 0.85) +
# Shaded IQR band
annotate("rect",
xmin = as.numeric(LA_q25), xmax = as.numeric(LA_q75),
ymin = -Inf, ymax = Inf,
fill = "#FFFF00", alpha = 0.5) +
# National median line + label
geom_vline(xintercept = national_median, color = "#E45756", linewidth = 1.2) +
annotate("label",
x = national_median, y = Inf, vjust = 1.5,
label = paste0("National median = ", round(national_median, 1), "%"),
fill = "#1F78B4", color = "white", size = 3.5) +
# Labels & theme
labs(
title = "Distribution of LA-level phonics attainment (medians)",
x = "medians",
y = "Count of LAs" ) +
scale_x_continuous(labels = label_percent(scale = 1)) +
scale_y_continuous(
breaks = scales::breaks_width(2),
expand = expansion(mult = c(0, 0.05))
)+
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
#printed histogram
print(histogram)
#part 1: 7->dotplot of local authorities
# 7.1) Compute medians at LA, region, and nation
#a) One robust number per LA: median of percentTarget across its rows
la_med <- data %>%
group_by(laName, regionName) %>%
summarise(la_median = median(percentTarget, na.rm = TRUE), .groups = "drop")
#b) Region medians (across LAs)
region_med <- la_med %>%
group_by(regionName) %>%
summarise(region_median = median(la_median, na.rm = TRUE), .groups = "drop")
#c) National median (across LAs)
national_median <- median(la_med$la_median, na.rm = TRUE)
#7.2) Sort LAs from lowest to highest and build x positions
la_sorted <- la_med %>%
arrange(la_median, laName) %>% # tie-break by name for stability
mutate(x_idx = row_number()) # x position after sorting
# For faint region reference lines
region_spans <- la_sorted %>%
group_by(regionName) %>%
summarise(
x_min = min(x_idx),
x_max = max(x_idx),
region_median = median(la_median, na.rm = TRUE),
.groups = "drop"
)
# part 1: 8-> Dot plot (“caterpillar”) figure
dotplot<- ggplot(la_sorted, aes(x = x_idx, y = la_median)) +
# National median (bold line across entire panel)
geom_hline(yintercept = national_median, color = "#E45756", linewidth = 0.9) +
# Dots for each LA, colored by region (optional)
geom_point(aes(color = regionName), size = 2.2, alpha = 0.9) +
# Axes & labels
scale_x_continuous(
breaks = NULL, # hide tick labels (too many LAs)
expand = expansion(mult = c(0.01, 0.01))
) +
scale_y_continuous(
limits = c(0, 100),
breaks = seq(30, 100, by = 10)
) +
labs(
title = "Local Authority phonics attainment (LA medians, sorted low → high)",
subtitle = "Dots = LA medians; red line = national median",
x = "Local Authorities (sorted)",
y = "LA median % at expected standard",
color = "Region"
) +
theme_minimal(base_size = 10) +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
print(dotplot)
#Part 1: 9-> boxplot showing sex across regions
# One panel with regions on the x-axis; side-by-side boxes for Girls/Boys.
boxplot_regions <- ggplot(data , aes(x = regionName, y = percentTarget, fill = sex)) +
geom_boxplot(outlier.alpha = 0.25, width = 0.7, position = position_dodge(width = 0.75)) +
scale_fill_manual(values = c("Girls" = "#A24BEA", "Boys" = "#1F78B4")) +
labs(
title = "Phonics attainment by sex across regions",
x = "Region",
y = "% at expected standard",
fill = "Sex"
) +
scale_y_continuous(labels = percent_format(scale = 1), limits = c(0, 100)) +
theme_minimal(base_size = 11) +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
print(boxplot_regions)
#Part 1:10-> heatmap: sex x FSM -> by region
region_map <- data %>%
group_by(regionName, sex, fsmStatus) %>%
summarise(median_attain = median(percentTarget, na.rm = TRUE), .groups = "drop")
heatmap <- ggplot(region_map, aes(x = sex, y = fsmStatus, fill = median_attain)) +
geom_tile(color = "white", linewidth = 0.6) +
geom_text(aes(label = paste0(round(median_attain, 1), "%")), color = "black", size = 3.2) +
facet_wrap(~ regionName, ncol = 4) +
scale_fill_gradient(low = "#FDE725", high = "#2C7FB8", limits = c(0, 100), name = "Median %") +
labs(
title = "Sex × FSM heatmap by region (median % at expected standard)",
x = "Sex",
y = "FSM status"
) +
theme_minimal(base_size = 8) +
theme(
plot.title = element_text(face = "bold"),
panel.grid = element_blank(),
strip.text = element_text(face = "bold")
)
# Display:
print(heatmap)
#part 1a: overview of phonics attainment by sex
#1) filtering for sex
sex_data <- data %>%
filter(timePeriod == "202425") %>%
select(sex, percentTarget) %>%
filter(! is.na(percentTarget))
#2) checkin sample size for assumptions
table(sex_data$sex)
Boys Girls
4135 4110
#3) assumption one: Shapiro-Wilk (normality/normal distribution) -> violated
by(sex_data$percentTarget, sex_data$sex, shapiro.test)
sex_data$sex: Boys
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.9608, p-value < 2.2e-16
----------------------------------------------------------------------------------------------
sex_data$sex: Girls
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.93387, p-value < 2.2e-16
#4) assumption two: Levene's Test (homogenity of varience) -> violated
leveneTest(percentTarget ~ sex, data = sex_data)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 110.63 < 2.2e-16 ***
8243
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#5) double checking labels in "sex"
levels(factor(sex_data$sex))
[1] "Boys" "Girls"
#Kruskal-Wallis (any statistically significant differences?) -> yes
kruskal.test(percentTarget ~ sex, data = sex_data)
Kruskal-Wallis rank sum test
data: percentTarget by sex
Kruskal-Wallis chi-squared = 985.76, df = 1, p-value < 2.2e-16
#Post-hoc Dunn's test
dunnTest(percentTarget ~ sex, data = sex_data, method = "bonferroni")
#8) colors for boxplot
pal_fill <- c("Girls" = "#A24BEA", "Boys" = "#1F78B4") #fill colors for boxes
#9) creating boxplot
ggplot(sex_data, aes(x= sex, y = percentTarget, fill= sex)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "white",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England: Phonics Attainment by Sex",
subtitle = "p value < 2.2e-16",
x = "Sex",
y= "Phonics Attainment") +
theme_minimal()
#medians for boys and girls
med_by_sex <- tapply(sex_data$percentTarget, sex_data$sex, median, na.rm = TRUE)
med_by_sex
Boys Girls
75 84
#part 1b: overview phonics attainment by FSM-Status
#1) filtering for FSMStatus
fsm_data <- data %>%
filter(timePeriod == "202425") %>%
select(fsmStatus, percentTarget) %>%
filter(! is.na(percentTarget))
#2) checking levels
levels(fsm_data$fsmStatus)
[1] "FSM eligible" "Not known to be FSM eligible"
#3) assumption one: Shapiro-Wilk (normality/normal distribution) -> violated
by(fsm_data$percentTarget, fsm_data$fsmStatus, shapiro.test)
fsm_data$fsmStatus: FSM eligible
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.98263, p-value < 2.2e-16
----------------------------------------------------------------------------------------------
fsm_data$fsmStatus: Not known to be FSM eligible
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.93218, p-value < 2.2e-16
#4) assumption two: Levene's Test (homogenity of varience) -> violated
leveneTest(percentTarget ~ fsmStatus, data = fsm_data)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 413.11 < 2.2e-16 ***
8243
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#Kruskal-Wallis (any statistically significant differences?) -> yes
kruskal.test(percentTarget ~ fsmStatus, data = fsm_data)
Kruskal-Wallis rank sum test
data: percentTarget by fsmStatus
Kruskal-Wallis chi-squared = 1940.4, df = 1, p-value < 2.2e-16
#Post-hoc Dunn's test (where is the difference?) ->
dunnTest(percentTarget ~ fsmStatus, data = fsm_data, method = "bonferroni")
#3) colors for boxplot
pal_fill <- c("FSM eligible" = "#A24BEA", "Not known to be FSM eligible" = "#1F78B4")
#8) creating boxplot
ggplot(fsm_data, aes(x= fsmStatus, y = percentTarget, fill= fsmStatus)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "white",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England: Phonics Attainment by FSM-Status",
subtitle = "p value < 2.2e-16",
x = "FSM-Status",
y= "Phonics Attainment") +
theme_minimal()
#part 1c: correlation between percentFSM and phonics attainment
#1) correlation testing (person's r)
cor.test(data$percentTarget, data$percentFSM, method = "pearson")
Pearson's product-moment correlation
data: data$percentTarget and data$percentFSM
t = 3.8415, df = 8243, p-value = 0.0001232
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.02070673 0.06380083
sample estimates:
cor
0.04227344
cor(data$percentTarget, data$percentFSM, use = "complete.obs")
[1] 0.04227344
#-> revealed: statistically significant but negligable relationship and percentFSM is not a meaningful predicator
#scatterplot:
ggplot(data, aes(x = percentFSM, y = percentTarget)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = TRUE) +
labs(
x = "Percentage of pupils eligible for FSM",
y = "Percentage meeting phonics target",
title = "Correlation between FSM eligibility and phonics attainment"
)
#part 1d: overview phonics attainment by ethnicity major
#1) filtering for ethnicity major
eth_data_ma <- data %>%
filter(timePeriod == "202425") %>%
select(ethnicityMajor, percentTarget) %>%
filter(! is.na(percentTarget))
#2) checking levels
levels(eth_data_ma$ethnicityMajor)
[1] "Asian / Asian British" "Black / African / Caribbean / Black British"
[3] "Mixed / Multiple ethnic groups" "Other ethnic group"
[5] "White"
#3) colors for boxplot
pal_fill <- c("Asian / Asian British" = "#A24BEA", "Black / African / Caribbean / Black British" = "#1F78B4", "White" = "#FF69B4", "Mixed / Multiple ethnic groups" = "#FFFF00", "Other ethnic group" = "#008000" )
#4) creating boxplot
ggplot(eth_data_ma, aes(x= ethnicityMajor, y = percentTarget, fill= ethnicityMajor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England: Phonics Attainment by major ethnicities",
x = "Major Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 5.5)
#table of medians
eth_data_ma %>%
group_by(ethnicityMajor) %>%
summarise(median_value = median(percentTarget))
NA
#part 1e: overview phonics attainment by ethnicity (minor)
#1) filtering for minor ethnicites
eth_data_mi <- data %>%
filter(timePeriod == "202425") %>%
select(ethnicityMinor, percentTarget) %>%
filter(! is.na(percentTarget))
#2) checking levels
levels(eth_data_mi$ethnicityMinor)
#part 1e.1: overview phonics attainment by minor Asian ethnicities
#1) filtering for all asians
asian_categories <- c( "Bangladeshi", "Chinese", "Pakistani", "Gypsy", "Any other Asian background", "Indian", "White and Asian")
asian_minor <- eth_data_mi %>%
filter(ethnicityMinor %in% asian_categories)
#2) colors for boxplot
pal_fill <- c("Bangladeshi" = "#A24BEA", "Chinese" = "#1F78B4", "Pakistani" = "#FF69B4", "Indian" = "#FFFF00", "Gypsy" = "#008000", "Any other Asian background" = "#FF0000", "White and Asian" = "#964B00" )
#4) creating boxplot
ggplot(asian_minor, aes(x= ethnicityMinor, y = percentTarget, fill= ethnicityMinor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England: Phonics Attainment by minor Asian ethnicities",
x = "Minor Asian Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 7)
#part 1e.2: overview phonics attainment by minor White ethnicities
#1) filtering for all whites
white_categories <- c("English / Welsh / Scottish / Northern Irish / British", "Irish", "Irish Traveller", "Any other White Background")
white_minor <- eth_data_mi %>%
filter(ethnicityMinor %in% white_categories)
#2) colors for boxplot
pal_fill <- c("Irish" = "#A24BEA", "Irish Traveller" = "#1F78B4", "Any other White Background" = "#FF69B4", "English / Welsh / Scottish / Northern Irish / British" = "#FFFF00" )
#4) creating boxplot
ggplot(white_minor, aes(x= ethnicityMinor, y = percentTarget, fill= ethnicityMinor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England: Phonics Attainment by minor White ethnicities",
x = "Minor White Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 7)
#part 1e.3: overview phonics attainment by minor Black ethnicities
#1) filtering for all blacks
black_categories <- c("Caribbean", "African", "White and Black African", "Any other Black / African / Caribbean background", "White and Black Caribbean")
black_minor <- eth_data_mi %>%
filter(ethnicityMinor %in% black_categories)
#2) colors for boxplot
pal_fill <- c("Caribbean" = "#A24BEA", "African" = "#1F78B4", "White and Black African" = "#FF69B4", "White and Black Caribbean" = "#FFFF00", "Any other Black / African / Caribbean background" = "#FF0000")
#4) creating boxplot
ggplot(black_minor, aes(x= ethnicityMinor, y = percentTarget, fill= ethnicityMinor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill ="white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "England: Phonics Attainment by minor Black ethnicities",
x = "Minor Black Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 6)
#part 2a: local authorities
#filter for only those 3
dat3 <- data %>%
filter(laName %in% c("Richmond upon Thames", "West Sussex", "Lewisham"))
#filtering for only phonics attainment score (percentTarget)
dat3 <- dat3 %>%
select(laName, percentTarget) %>%
filter(! is.na(percentTarget))
#how many rows for each authority?
table(dat3$laName)
Barking and Dagenham Barnet Barnsley
0 0 0
Bath and North East Somerset Bedford Bexley
0 0 0
Birmingham Blackburn with Darwen Blackpool
0 0 0
Bolton Bournemouth, Christchurch and Poole Bracknell Forest
0 0 0
Bradford Brent Brighton and Hove
0 0 0
Bristol, City of Bromley Buckinghamshire
0 0 0
Bury Calderdale Cambridgeshire
0 0 0
Camden Central Bedfordshire Cheshire East
0 0 0
Cheshire West and Chester City of London Cornwall
0 0 0
County Durham Coventry Croydon
0 0 0
Cumberland Darlington Derby
0 0 0
Derbyshire Devon Doncaster
0 0 0
Dorset Dudley Ealing
0 0 0
East Riding of Yorkshire East Sussex Enfield
0 0 0
Essex Gateshead Gloucestershire
0 0 0
Greenwich Hackney Halton
0 0 0
Hammersmith and Fulham Hampshire Haringey
0 0 0
Harrow Hartlepool Havering
0 0 0
Herefordshire, County of Hertfordshire Hillingdon
0 0 0
Hounslow Isle of Wight Isles of Scilly
0 0 0
Islington Kensington and Chelsea Kent
0 0 0
Kingston upon Hull, City of Kingston upon Thames Kirklees
0 0 0
Knowsley Lambeth Lancashire
0 0 0
Leeds Leicester Leicestershire
0 0 0
Lewisham Lincolnshire Liverpool
67 0 0
Luton Manchester Medway
0 0 0
Merton Middlesbrough Milton Keynes
0 0 0
Newcastle upon Tyne Newham Norfolk
0 0 0
North East Lincolnshire North Lincolnshire North Northamptonshire
0 0 0
North Somerset North Tyneside North Yorkshire
0 0 0
Northumberland Nottingham Nottinghamshire
0 0 0
Oldham Oxfordshire Peterborough
0 0 0
Plymouth Portsmouth Reading
0 0 0
Redbridge Redcar and Cleveland Richmond upon Thames
0 0 49
Rochdale Rotherham Rutland
0 0 0
Salford Sandwell Sefton
0 0 0
Sheffield Shropshire Slough
0 0 0
Solihull Somerset South Gloucestershire
0 0 0
South Tyneside Southampton Southend-on-Sea
0 0 0
Southwark St. Helens Staffordshire
0 0 0
Stockport Stockton-on-Tees Stoke-on-Trent
0 0 0
Suffolk Sunderland Surrey
0 0 0
Sutton Swindon Tameside
0 0 0
Telford and Wrekin Thurrock Torbay
0 0 0
Tower Hamlets Trafford Wakefield
0 0 0
Walsall Waltham Forest Wandsworth
0 0 0
Warrington Warwickshire West Berkshire
0 0 0
West Northamptonshire West Sussex Westminster
0 71 0
Westmorland and Furness Wigan Wiltshire
0 0 0
Windsor and Maidenhead Wirral Wokingham
0 0 0
Wolverhampton Worcestershire York
0 0 0
#checking assumptions: Shapiro-Wilk (normal distribution) -> violated (Lew & Rich: p<0.05)
by(dat3$percentTarget, dat3$laName, shapiro.test)
dat3$laName: Barking and Dagenham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Barnet
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Barnsley
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bath and North East Somerset
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bedford
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bexley
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Birmingham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Blackburn with Darwen
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Blackpool
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bolton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bournemouth, Christchurch and Poole
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bracknell Forest
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bradford
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Brent
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Brighton and Hove
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bristol, City of
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bromley
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Buckinghamshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Bury
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Calderdale
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Cambridgeshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Camden
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Central Bedfordshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Cheshire East
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Cheshire West and Chester
NULL
----------------------------------------------------------------------------------------------
dat3$laName: City of London
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Cornwall
NULL
----------------------------------------------------------------------------------------------
dat3$laName: County Durham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Coventry
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Croydon
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Cumberland
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Darlington
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Derby
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Derbyshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Devon
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Doncaster
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Dorset
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Dudley
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Ealing
NULL
----------------------------------------------------------------------------------------------
dat3$laName: East Riding of Yorkshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: East Sussex
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Enfield
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Essex
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Gateshead
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Gloucestershire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Greenwich
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hackney
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Halton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hammersmith and Fulham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hampshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Haringey
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Harrow
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hartlepool
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Havering
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Herefordshire, County of
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hertfordshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hillingdon
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Hounslow
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Isle of Wight
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Isles of Scilly
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Islington
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Kensington and Chelsea
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Kent
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Kingston upon Hull, City of
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Kingston upon Thames
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Kirklees
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Knowsley
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Lambeth
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Lancashire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Leeds
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Leicester
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Leicestershire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Lewisham
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.96219, p-value = 0.03959
----------------------------------------------------------------------------------------------
dat3$laName: Lincolnshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Liverpool
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Luton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Manchester
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Medway
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Merton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Middlesbrough
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Milton Keynes
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Newcastle upon Tyne
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Newham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Norfolk
NULL
----------------------------------------------------------------------------------------------
dat3$laName: North East Lincolnshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: North Lincolnshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: North Northamptonshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: North Somerset
NULL
----------------------------------------------------------------------------------------------
dat3$laName: North Tyneside
NULL
----------------------------------------------------------------------------------------------
dat3$laName: North Yorkshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Northumberland
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Nottingham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Nottinghamshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Oldham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Oxfordshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Peterborough
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Plymouth
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Portsmouth
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Reading
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Redbridge
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Redcar and Cleveland
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Richmond upon Thames
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.90716, p-value = 0.0009496
----------------------------------------------------------------------------------------------
dat3$laName: Rochdale
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Rotherham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Rutland
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Salford
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Sandwell
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Sefton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Sheffield
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Shropshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Slough
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Solihull
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Somerset
NULL
----------------------------------------------------------------------------------------------
dat3$laName: South Gloucestershire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: South Tyneside
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Southampton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Southend-on-Sea
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Southwark
NULL
----------------------------------------------------------------------------------------------
dat3$laName: St. Helens
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Staffordshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Stockport
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Stockton-on-Tees
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Stoke-on-Trent
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Suffolk
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Sunderland
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Surrey
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Sutton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Swindon
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Tameside
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Telford and Wrekin
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Thurrock
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Torbay
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Tower Hamlets
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Trafford
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wakefield
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Walsall
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Waltham Forest
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wandsworth
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Warrington
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Warwickshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: West Berkshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: West Northamptonshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: West Sussex
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.96824, p-value = 0.06892
----------------------------------------------------------------------------------------------
dat3$laName: Westminster
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Westmorland and Furness
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wigan
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wiltshire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Windsor and Maidenhead
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wirral
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wokingham
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Wolverhampton
NULL
----------------------------------------------------------------------------------------------
dat3$laName: Worcestershire
NULL
----------------------------------------------------------------------------------------------
dat3$laName: York
NULL
#checking assumptions: Levene's test (homogenity of variance) -> violated
leveneTest(percentTarget ~ laName, data = dat3, center = median)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 3.5192 0.03163 *
184
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#Kruskal-Wallis (any statistically significant differences?) -> yes, p<0.05
kruskal.test(percentTarget ~ laName, data = dat3)
Kruskal-Wallis rank sum test
data: percentTarget by laName
Kruskal-Wallis chi-squared = 38.49, df = 2, p-value = 4.386e-09
#Post-hoc Dunn's test (where is the difference?) ->
dunnTest(percentTarget ~ laName, data = dat3, method = "bonferroni")
#part 2b: lowest ranking local authority
#identifying (calculating mean) -> WEST SUSSEX
dat3 %>%
group_by(laName) %>%
summarise(mean_percent = mean(percentTarget, na.rm = TRUE)) %>%
arrange(mean_percent)
#part 2b.1: analysis of the factor sex
#1) filtering for WS
ws_data <- data %>%
filter(laName == "West Sussex") %>%
select(sex, percentTarget) %>%
filter(! is.na(percentTarget))
#2) checkin sample size for assumptions
table(ws_data$sex)
Boys Girls
35 36
#3) assumption one: Shapiro-Wilk (normality/normal distribution) -> confirmed
by(ws_data$percentTarget, ws_data$sex, shapiro.test)
ws_data$sex: Boys
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.9561, p-value = 0.1742
----------------------------------------------------------------------------------------------
ws_data$sex: Girls
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.96213, p-value = 0.2499
#4) assumption two: Levene's Test (homogenity of varience)
leveneTest(percentTarget ~ sex, data = ws_data)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 0.8376 0.3633
69
#5) double checking labels in "sex" for independent T test
levels(factor(ws_data$sex))
[1] "Boys" "Girls"
#6) running independent t test
tt_ws <- t.test(percentTarget ~ sex, data = ws_data, var.equal = TRUE)
#7) printing results
tt_ws
Two Sample t-test
data: percentTarget by sex
t = -2.136, df = 69, p-value = 0.03623
alternative hypothesis: true difference in means between group Boys and group Girls is not equal to 0
95 percent confidence interval:
-13.5009383 -0.4609665
sample estimates:
mean in group Boys mean in group Girls
66.68571 73.66667
#9) colors for boxplot
pal_fill <- c("Girls" = "#A24BEA", "Boys" = "#1F78B4") #fill colors for boxes
pal_point <- c("Girls" = "#7A1FD2", "Boys" = "#0E5D91") #colors for jitter
#8) creating boxplot
ggplot(ws_data, aes(x= sex, y = percentTarget, fill= sex)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
geom_jitter(aes(color = sex), width = 0.15, alpha = 0.6, size = 2)+
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.7,
color = "white",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
scale_color_manual(values = pal_point) +
labs(title = "West Sussex: Phonics Attainment by Sex",
subtitle = "*white diamond marker = mean; p = 0.04",
x = "Sex",
y= "Phonics Attainment") +
theme_minimal()
#9) heatmap fsm x sex in West Sussex
ws_sex_fsm_map <- data %>%
filter(laName == "West Sussex") %>%
group_by(laName, sex, fsmStatus) %>%
summarise(median_attain = median(percentTarget, na.rm = TRUE), .groups = "drop")
ggplot(ws_sex_fsm_map, aes(x = sex, y = fsmStatus, fill = median_attain)) +
geom_tile(color = "white", linewidth = 0.6) +
geom_text(aes(label = paste0(round(median_attain, 1), "%")), color = "black", size = 3.2) +
scale_fill_gradient(low = "#FDE725", high = "#2C7FB8", limits = c(0, 100), name = "Median %") +
labs(
title = "West Sussex: Sex × FSM heatmap (median % at expected standard)",
x = "Sex",
y = "FSM status"
) +
theme_minimal(base_size = 8) +
theme(
plot.title = element_text(face = "bold"),
panel.grid = element_blank(),
strip.text = element_text(face = "bold")
)
#Part 2b.2: Analysis of the factor FSM Status
#1) filtering for fsm-Status in WS
ws_data_fsm <- data %>%
filter(laName == "West Sussex") %>%
select(fsmStatus, percentTarget) %>%
filter(! is.na(percentTarget))
#2) checkin sample size for assumptions
table(ws_data_fsm$fsmStatus)
FSM eligible Not known to be FSM eligible
29 42
#3) assumption one: Shapiro-Wilk (normality/normal distribution) -> violated
by(ws_data_fsm$percentTarget, ws_data_fsm$fsmStatus, shapiro.test)
ws_data_fsm$fsmStatus: FSM eligible
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.95083, p-value = 0.1923
----------------------------------------------------------------------------------------------
ws_data_fsm$fsmStatus: Not known to be FSM eligible
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.89505, p-value = 0.001027
#4) assumption two: Levene's Test (homogenity of varience) -> upheld
leveneTest(percentTarget ~ fsmStatus, data = ws_data_fsm)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 0.7892 0.3774
69
#5) Mann-Whitney U Test (since assumptions violated)
wilcox.test(percentTarget ~ fsmStatus, data = ws_data_fsm, exact = FALSE)
Wilcoxon rank sum test with continuity correction
data: percentTarget by fsmStatus
W = 209, p-value = 2.897e-06
alternative hypothesis: true location shift is not equal to 0
#9) colors for boxplot
pal_fill <- c("FSM eligible" = "#A24BEA", "Not known to be FSM eligible" = "#1F78B4") #fill colors for boxes
pal_point <- c("FSM eligible" = "#7A1FD2", "Not known to be FSM eligible" = "#0E5D91") #colors for jitter
#8) creating boxplot
ggplot(ws_data_fsm, aes(x= fsmStatus, y = percentTarget, fill= fsmStatus)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
geom_jitter(aes(color = fsmStatus), width = 0.15, alpha = 0.6, size = 2)+
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.7,
color = "white",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
scale_color_manual(values = pal_point) +
labs(title = "West Sussex: Phonics Attainment by FSM Status",
subtitle = "*white diamond marker = mean; p = 2.897e-06 ",
x = "FSM Status",
y= "Phonics Attainment") +
theme_minimal(base_size = 8)
#Part 2b.2: Analysis of the factor ethnicity (major) in WS
#1) filtering for ethnicity (major) in WS
ws_data_eth <- data %>%
filter(laName == "West Sussex") %>%
select(ethnicityMajor, percentTarget) %>%
filter(! is.na(percentTarget))
#2) double checking levels
levels(ws_data_eth$ethnicityMajor)
[1] "Asian / Asian British" "Black / African / Caribbean / Black British"
[3] "Mixed / Multiple ethnic groups" "Other ethnic group"
[5] "White"
#3) checking number of rows
table(ws_data_eth$ethnicityMajor)
Asian / Asian British Black / African / Caribbean / Black British
18 12
Mixed / Multiple ethnic groups Other ethnic group
19 4
White
18
#checking assumptions: Shapiro-Wilk (normal distribution) -> violated (Mixed p<0.05)
by(ws_data_eth$percentTarget, ws_data_eth$ethnicityMajor, shapiro.test)
ws_data_eth$ethnicityMajor: Asian / Asian British
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.94075, p-value = 0.2984
----------------------------------------------------------------------------------------------
ws_data_eth$ethnicityMajor: Black / African / Caribbean / Black British
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.91427, p-value = 0.2419
----------------------------------------------------------------------------------------------
ws_data_eth$ethnicityMajor: Mixed / Multiple ethnic groups
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.89373, p-value = 0.03752
----------------------------------------------------------------------------------------------
ws_data_eth$ethnicityMajor: Other ethnic group
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.87842, p-value = 0.3319
----------------------------------------------------------------------------------------------
ws_data_eth$ethnicityMajor: White
Shapiro-Wilk normality test
data: dd[x, ]
W = 0.92169, p-value = 0.1385
#checking assumptions: Levene's test (homogenity of variance) -> upheld
leveneTest(percentTarget ~ ethnicityMajor, data = ws_data_eth, center = median)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 4 1.749 0.1498
66
#Kruskal-Wallis (any statistically significant differnces?) -> p>0.05; not significant
kruskal.test(percentTarget ~ ethnicityMajor, data = ws_data_eth)
Kruskal-Wallis rank sum test
data: percentTarget by ethnicityMajor
Kruskal-Wallis chi-squared = 7.1045, df = 4, p-value = 0.1305
pal_fill <- c("Asian / Asian British" = "#A24BEA", "Black / African / Caribbean / Black British" = "#1F78B4", "White" = "#FF69B4", "Mixed / Multiple ethnic groups" = "#FFFF00", "Other ethnic group" = "#008000" )
#4) creating boxplot
ggplot(ws_data_eth, aes(x= ethnicityMajor, y = percentTarget, fill= ethnicityMajor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.6,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "West Sussex: Phonics Attainment by Major Ethnicities",
x = "Major Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 5.5)
#table of medians
ws_data_eth %>%
group_by(ethnicityMajor) %>%
summarise(median_value = median(percentTarget))
#part 2b.3: overview phonics attainment by minor White ethnicities
#1) filtering for ethnicity (minor) in WS
ws_data_eth_min <- data %>%
filter(laName == "West Sussex") %>%
select(ethnicityMinor, percentTarget) %>%
filter(! is.na(percentTarget))
#2) filtering for all whites
white_categories <- c("English / Welsh / Scottish / Northern Irish / British", "Irish", "Irish Traveller", "Any other White Background")
white_minor_WS <- ws_data_eth_min %>%
filter(ethnicityMinor %in% white_categories)
#3) colors for boxplot
pal_fill <- c("Irish" = "#A24BEA", "Irish Traveller" = "#1F78B4", "Any other White Background" = "#FF69B4", "English / Welsh / Scottish / Northern Irish / British" = "#FFFF00" )
#4) creating boxplot
ggplot(white_minor_WS, aes(x= ethnicityMinor, y = percentTarget, fill= ethnicityMinor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +1.8,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "West Sussex: Phonics Attainment by Minor White Ethnicities",
x = "Minor White Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 7)
#part 2b.4: overview phonics attainment by minor Asian ethnicities
#1) filtering for all asians
asian_categories <- c( "Bangladeshi", "Chinese", "Pakistani", "Gypsy", "Any other Asian background", "Indian", "White and Asian")
asian_minor_WS <- ws_data_eth_min %>%
filter(ethnicityMinor %in% asian_categories)
#2) colors for boxplot
pal_fill <- c("Bangladeshi" = "#A24BEA", "Chinese" = "#1F78B4", "Pakistani" = "#FF69B4", "Indian" = "#FFFF00", "Gypsy" = "#008000", "Any other Asian background" = "#FF0000", "White and Asian" = "#964B00" )
#4) creating boxplot
ggplot(asian_minor_WS, aes(x= ethnicityMinor, y = percentTarget, fill= ethnicityMinor)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white", color = "black")+
stat_summary(fun = mean,
geom = "text",
aes(label = round(..y.., 1)),
vjust = +2,
color = "black",
fontface = "bold",
size = 3) +
scale_fill_manual(values = pal_fill) +
labs(title = "West Sussex: Phonics Attainment by Minor Asian Ethnicities",
x = "Minor Asian Ethnicity",
y= "Phonics Attainment") +
theme_minimal(base_size = 7)