library(tidyverse)
library(magrittr)
#library(scales)
library(modelr)
tracker <- read_csv("raw_tracker.csv") #### Read from LMS
#### Folder GoH Testing > Cohort17-21 > AY17-18 Class 9 BL-ML-EL
glimpse(tracker)
Observations: 5,260
Variables: 9
$ `Centre Name` <chr> "AMSSS Agroha, Hisar", "AMSSS Agroha, Hisar", "AMSSS Agroha, Hisar",...
$ `Student ID` <chr> "HI10100001", "HI10100003", "HI10100005", "HI10100007", "HI10100009"...
$ ENG_BL <int> 12, 8, 13, 13, 13, 11, NA, 12, 11, NA, NA, 8, NA, 10, NA, 8, 5, 3, 1...
$ MTH_BL <int> 9, 12, 10, 20, 10, 13, 9, 14, 10, 14, 18, 18, 15, 15, 22, 12, 12, 9,...
$ MTH_EL <int> 29, NA, 23, NA, NA, NA, NA, 35, 19, NA, NA, 38, NA, NA, 23, 12, NA, ...
$ MTH_ML <int> 15, 8, 12, 16, 11, 11, 11, 22, NA, 13, NA, 22, NA, 17, 13, 8, 9, 7, ...
$ SCI_BL <int> 9, 19, 15, 17, 26, 20, 9, 15, 16, 20, 17, 19, 15, 19, 23, 13, 8, 14,...
$ SCI_EL <int> 13, NA, 9, NA, NA, NA, NA, 16, 8, NA, NA, 23, NA, NA, 8, 12, NA, NA,...
$ SCI_ML <int> 12, 14, 15, 15, 11, 10, 8, 15, NA, 11, NA, 19, NA, 12, 11, 12, 12, 1...
df10 <- read_csv("df10_raw.csv") #### Evaluated from OMR data
#### Folder GoH Testing > AY18-19 > Baseline Eng Medium 59 Schools [Oct]
glimpse(df10)
Observations: 3,361
Variables: 13
$ X1 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20...
$ Student.ID <chr> "HI10100051", "HI10100023", "HI10100119", "HI10100131", "HI10100049",...
$ Sheet.ID <chr> "100673", "100643", "100644", "100645", "100646", "100648", "100649",...
$ Set <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",...
$ School.Name <chr> "AMSSS Agroha 10th.xlsx", "AMSSS Agroha 10th.xlsx", "AMSSS Agroha 10t...
$ Grade <int> 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1...
$ Total <int> 57, 62, 47, 44, 57, 70, 66, 52, 53, 54, 66, 42, 56, 60, 33, 58, 46, 5...
$ MTH_BL2 <int> 34, 35, 24, 18, 35, 38, 36, 24, 27, 28, 37, 28, 32, 30, 18, 29, 25, 3...
$ SCI_BL2 <int> 23, 27, 23, 26, 22, 32, 30, 28, 26, 26, 29, 14, 24, 30, 15, 29, 21, 2...
$ MTH_BL2_AT9 <int> 15, 13, 9, 8, 14, 16, 15, 9, 10, 10, 16, 9, 11, 10, 6, 11, 8, 12, 6, ...
$ SCI_BL2_AT9 <int> 8, 13, 9, 11, 11, 15, 12, 11, 9, 9, 12, 4, 10, 12, 7, 11, 6, 12, 7, 3...
$ MTH_BL2_BEL9 <int> 19, 22, 15, 10, 21, 22, 21, 15, 17, 18, 21, 19, 21, 20, 12, 18, 17, 1...
$ SCI_BL2_BEL9 <int> 15, 14, 14, 15, 11, 17, 18, 17, 17, 17, 17, 10, 14, 18, 8, 18, 15, 17...
tracker %<>%
inner_join(df10, by = c(`Student ID` = "Student.ID")) #### Notice " " for common variables
(tracker)
tracker %<>%
select(-MTH_ML, -SCI_ML, -MTH_EL, -SCI_EL, -ENG_BL, -X1, -Sheet.ID, -Set, -School.Name, -Grade, -Total)
(tracker)
tracker %<>%
filter(!is.na(MTH_BL)) %>%
filter(!is.na(MTH_BL2))
dim(tracker)
[1] 2701 10
gain <- tracker %>%
mutate(BL1 = (MTH_BL + SCI_BL)) %>%
mutate(BL2_BEL9 = (MTH_BL2_BEL9 + SCI_BL2_BEL9)) %>%
select(`Centre Name`, `Student ID`, BL1, BL2_BEL9)
(gain)
normalize <- function(x) {
return (round(100*(x - min(x)) / (max(x) - min(x))))
}
gain %<>%
bind_cols(as.data.frame(lapply(gain["BL1"], normalize))) %>%
bind_cols(as.data.frame(lapply(gain["BL2_BEL9"], normalize))) %>%
rename(norm_BL1 = BL11) %>%
rename(norm_BL2_BEL9 = BL2_BEL91)
(gain)
school_gain <- gain %>%
group_by(`Centre Name`) %>%
summarise(count = n(),
Avg_Norm_BL1 = round(mean(norm_BL1)),
Avg_Norm_BL2_BEL9 = round(mean(norm_BL2_BEL9)),
Delta_Avg = Avg_Norm_BL2_BEL9 - Avg_Norm_BL1
) %>%
arrange(Delta_Avg)
school_gain
gain %>%
ggplot(aes(x = norm_BL1, y = norm_BL2_BEL9 )) + #### No quotes needed in aes x and y arguement
geom_point(aes(colour = `Centre Name`)) +
geom_smooth(method = "lm", se=FALSE) + #### Removes the 95% confidence interval
theme(legend.position="none") +
xlim(0,100) +
ylim(0,100)
school_gain %>%
ggplot(aes(x = Avg_Norm_BL1, y = Avg_Norm_BL2_BEL9, label = `Centre Name` )) + #### No quotes needed in aes x and y arguement
geom_point() +
#geom_smooth(method = "lm", se=FALSE) + #### Removes the 95% confidence interval
#xlim(0,100) +
#ylim(0,100)
geom_text(check_overlap = TRUE, size = 2) +
theme(legend.position="none")
school_gain %<>%
mutate(Performance = if_else(Avg_Norm_BL1 < 35, "Low",
if_else(Avg_Norm_BL1 < 45, "Mid", "High")
)
)
school_gain %>%
ggplot(aes(x = Avg_Norm_BL1, y = Avg_Norm_BL2_BEL9, label = `Centre Name` )) + #### No quotes needed in aes x and y arguement
geom_point(aes(colour = factor(Performance), fill = factor(Performance))) +
#geom_smooth(method = "lm", se=FALSE) + #### Removes the 95% confidence interval
#xlim(0,100) +
#ylim(0,100)
geom_text(check_overlap = TRUE, size = 2) +
theme(legend.position="none") +
scale_colour_manual(values = c("Low" = "red", "Mid" = "blue", "High" = "green"))
gain %<>%
left_join(school_gain, by = "Centre Name") %>%
select(-count, -Avg_Norm_BL1, -Avg_Norm_BL2_BEL9, -Delta_Avg)
(gain)
low_performing <- gain %>%
filter(Performance == "Low")
#### Getting linear regression fit
low_mod <- lm(norm_BL2_BEL9 ~ norm_BL1, data = low_performing)
low_performing %<>%
add_predictions(low_mod) %>%
add_residuals(low_mod)
low_performing %<>%
mutate(delta = round(100*resid / norm_BL1) )
low_performing %<>%
mutate(movement = if_else(delta > 20, "UP", (if_else(delta > -20, "SAME", "DOWN"))))
#### Count of UP / DOWN and SAME per school for Low performing schools
low_performing %>%
group_by(`Centre Name`, movement) %>%
summarise(countCommonStudents = n() ) %>%
spread(movement, countCommonStudents) %>%
replace(., is.na(.), 0) %>% #### Replace missing values from spread with 0
mutate(NET_perc_UP = round(100*(UP-DOWN) / (UP+DOWN+SAME))) %>%
arrange(NET_perc_UP)
mid_performing <- gain %>%
filter(Performance == "Mid")
#### Getting linear regression fit
mid_mod <- lm(norm_BL2_BEL9 ~ norm_BL1, data = mid_performing)
mid_performing %<>%
add_predictions(mid_mod) %>%
add_residuals(mid_mod)
mid_performing %<>%
mutate(delta = round(100*resid / norm_BL1) ) %>%
mutate(movement = if_else(delta > 20, "UP", (if_else(delta > -20, "SAME", "DOWN"))))
#### Count of UP / DOWN and SAME per school for Low performing schools
mid_performing %>%
group_by(`Centre Name`, movement) %>%
summarise(countCommonStudents = n() ) %>%
spread(movement, countCommonStudents) %>%
replace(., is.na(.), 0) %>%
mutate(NET_perc_UP = round(100*(UP-DOWN) / (UP+DOWN+SAME))) %>%
arrange(NET_perc_UP)
high_performing <- gain %>%
filter(Performance == "High")
#### Getting linear regression fit
high_mod <- lm(norm_BL2_BEL9 ~ norm_BL1, data = high_performing)
high_performing %<>%
add_predictions(high_mod) %>%
add_residuals(high_mod)
high_performing %<>%
mutate(delta = round(100*resid / norm_BL1) ) %>%
mutate(movement = if_else(delta > 20, "UP", (if_else(delta > -20, "SAME", "DOWN"))))
#### Count of UP / DOWN and SAME per school for Low performing schools
high_performing %>%
group_by(`Centre Name`, movement) %>%
summarise(countCommonStudents = n() ) %>%
spread(movement, countCommonStudents) %>%
replace(., is.na(.), 0) %>%
mutate(NET_perc_UP = round(100*(UP-DOWN) / (UP+DOWN+SAME))) %>%
arrange(NET_perc_UP)