library(data.table)Warning: package 'data.table' was built under R version 4.4.3
library(data.table)Warning: package 'data.table' was built under R version 4.4.3
–> A new teacher does get better with experience but does not typically improve so much as to close the gap with the top performers solely through experience. For example, if an average first-year teacher is – hypothetically – at the 40th percentile of effectiveness initially, after 5 years they might move to the 50th or 60th percentile with that ~0.1 SD gain
d <- fread("../../TED/MAT3/01_MAT3_hr_clean.csv")d[, crs_date := as.Date(cont_relevant_service_date, format = c("%d/%m/%Y"))]
d[, date := as.Date(as.character(academic_year), format = "%Y")]
d[, duration := as.numeric(d$date - d$crs_date)/365.25]
# distribution of years of experience
hist(d$duration)summary(d$duration) Min. 1st Qu. Median Mean 3rd Qu. Max.
-1.374 2.198 6.196 8.190 11.794 55.280
# balance of the panel
table(d$academic_year)
2023 2024 2025
1954 1953 1853
d[, .(rowCount = .N), by = staff_id][, (freq = .N), by = rowCount][order(-rowCount)] rowCount V1
<int> <int>
1: 41 1
2: 3 1219
3: 2 626
4: 1 810
# data available from first 5 years of teaching
d[duration < 6, .(rowCount = .N), by = date] date rowCount
<Date> <int>
1: 2023-11-12 981
2: 2024-11-12 984
3: 2025-11-12 889
# number of teachers who started in first year
d[duration < 4, .(rowCount = .N), by = staff_id][rowCount == 3, (freq = .N)][1] 198
# plot of cohort
dates <- d[, .(date = max(date), crs_date = min(crs_date)), by = staff_id][order(-crs_date)]
plot(x = as.Date("2025-11-12"), y = 1, xlim=c(min(dates$crs_date), max(dates$date)),
ylim = c(0,nrow(dates)), type = 'n', xlab = 'Date', ylab = '')
for (i in 1:nrow(dates)){
lines(x = dates[i, 2:3], y = c(i, i), lwd = .01, col = "#8D10FF")
}
abline(v=unique(dates$date), col="red", lwd=0.8)# plot of the observed data, ordered by date starting in teaching
plot(x = as.Date("2025-11-12"), y = 1, xlim=c(min(dates$date), max(dates$date)),
ylim = c(0,nrow(dates)), type = 'n', xlab = 'Date', ylab = '')
for (i in 1:nrow(dates)){
lines(x = dates[i, 2:3], y = c(i, i), lwd = .001, col = "#8D10FF")
}
abline(v=unique(dates$date), col="red", lwd=0.8)