Install Packages and Prepare the two dataset and merge them
# install.packages("lmtest")
# install.packages("sandwich")
# rm(list=ls())
# #setRepositories()
# # devtools::install_github("r-lib/conflicted")
# install.packages("mirt")
# install.packages("tidyverse")
# install.packages("ggplot2", dependencies = TRUE, update = TRUE)
# install.packages("lavaan")
library(tidyverse)
library(mirt)
library(lavaan)
library(survey)
library(lattice)
library(lmtest)
library(sandwich)
#reading in data
data_O <- read.csv("E:/Shreya Work in ERO/ECD Study 2023-20230927T042929Z-001/ECD Study 2023/Data/Cleaned data/ELDS2023_final.csv")
dat <- data_O %>% filter(Check == "1") #remove absentees
# nrow(dat)
dat <- dat %>% filter(is.na(Sampling_error_delete)) #remove sampling errors
dat1<-dat
# nrow(dat)
dat <- dat %>% select(1, 5, 7, 8, 19, 22, 24,25, 27, 40, 41:83) #retain only relevant columns
# nrow(dat)
dat <- dat %>% mutate(across(17:52, ~ recode(.x, "999" = "0",
"0" = "0",
"1" ="0.5",
"2"= "1"))) #convert 999 to 0
####add age levels based on target age group####
dat$Agelevel <- ifelse(dat$Age_months>=39&dat$Age_months<=60,'Below 5 years',
ifelse(dat$Age_months>=61&dat$Age_months<=72,'5 years (61-72 m)',
ifelse(dat$Age_months>=73&dat$Age_months<=84, '6 years (73-84 m)',
ifelse(dat$Age_months>85&dat$Age_months<=96, '7 years (85-96 m)', "Above 7 years"))))
dat <- dplyr::mutate_at(dat, c(17:52), as.numeric)
dat$overall <- rowMeans(dat[, 17:52], na.rm = TRUE) #% of tasks performed correctly & partially correctly
#domain scores
dat$Physical <- rowMeans(dat[ ,c(34:39)], na.rm = TRUE)
dat$LanguageScore <- rowMeans(dat[ ,c(22:24,27:33)], na.rm = TRUE)
dat$Cognitive <- rowMeans(dat[ ,c(18:21,25:26, 40:44)], na.rm = TRUE)
dat$SE <- rowMeans(dat[ ,c(17,45:52)], na.rm = TRUE)
dat$composite <- rowMeans(dat[ ,c(56:59)], na.rm = TRUE)
fiveyr_cutoff <- function(score) {
compLevel <- ifelse(score <0.34,
'Struggling',
ifelse(score < 0.74,
'Progressing',
'On Track')
)
return(compLevel)
}
#applying the cut-off
dat$CompLevel <- fiveyr_cutoff(dat$composite)
dat$PhysLevel <- ifelse(dat$Physical <0.28, 'Struggling',
ifelse(dat$Physical < 0.74,'Progressing','On Track'))
dat$LangLevel <- ifelse(dat$LanguageScore <0.41, 'Struggling',
ifelse(dat$LanguageScore < 0.87,'Progressing','On Track'))
dat$CogLevel <- ifelse(dat$Cognitive <0.29, 'Struggling',
ifelse(dat$Cognitive < 0.66,'Progressing','On Track'))
dat$SELevel <- ifelse(dat$SE <0.39, 'Struggling',
ifelse(dat$SE < 0.70,'Progressing','On Track'))
merging the data belowa
file_path <- "E:/Shreya Work in ERO/ECD Study 2023-20230927T042929Z-002/ECD Study 2023/Data/Cleaned data/ECEDBackground_final.csv"
data <- read_csv(file_path, show_col_types = FALSE) #249
New names:
• `` -> `...2`
• `` -> `...3`
mergedtest <- merge(dat, data, by="Code", all.x= TRUE) #2720
# view(mergedtest)
#
names(mergedtest)[names(mergedtest) == "index.y"] <- "index"
ECEDBackground_final <- merge(mergedtest, dat1, by="index", all.x=TRUE)
standardize the data (Mean 0 and SD 1)
As suggested by Kenji
ECEDBackground_final <- ECEDBackground_final %>%
mutate(
SE_standardized = scale(SE),
Cognitive_standardized = scale(Cognitive),
LanguageScore_standardized = scale(LanguageScore),
Physical_standardized = scale(Physical),
Composite_standardized = scale(composite)
)
learning_columns <- c("TC6_A", "TC6_B", "TC6_C", "TC6_D", "TC6_E", "TC6_F", "TC6_G")
# view(ECEDBackground_final)
ECEDBackground_final <- ECEDBackground_final %>%
mutate(
EMISCode = Code.x,
Age_months = Age_months.x, #ifelse(Age_months.x >= 12 & Age_months.x <= 24, 1, 0),
Gender_Male = ifelse(A8 == 2, 1, 0), # A8 is the gender column, छात्र २ | Boy->
Primary_language_instruction = ifelse(TC1_A>1, 1, 0), # TC1_A is the language column, 1 is nepali,2 is english, we are checking for language other than nepali and
ECED_Attendance_Yes = ifelse(A12.x >=1, 1, 0), # A12 is the ECED attendance column
School_Type_Community = ifelse(PI_Type == "Community", 1, 0), # How does Community school affect the Score
Urbanicity_Urban = ifelse(Urbanicity.x == "Urban", 1, 0), # Urban only
Teachers_Qualification_Grade_10_only = ifelse(Teacher_Qualification_TI5 ==3, 1, 0), # We check for grade 10 and grade 12 to see the relative affect. ; 3-> 10; 4->12
Pre_Service_Training_Received = ifelse(Preservice_TI6_A == 1, 1, 0), # Preservice training yes only
In_Service_Training_Received = ifelse(Inservice_Number_TI7_A>0, 1, 0), # In service training yes only,
Availability_of_Reading_Learning_Areas = if_else(TC6_A!=0,1, 0),
Availability_of_Mathematics_Learning_Areas = if_else(TC6_B!=0,1, 0),
Availability_of_RolePlay_Learning_Areas = if_else(TC6_C!=0,1, 0),
Availability_of_Science_Learning_Areas = if_else(TC6_D!=0,1, 0),
Availability_of_Creativity_Learning_Areas = if_else(TC6_E!=0,1, 0),
Availability_of_Constructions_Learning_Areas = if_else(TC6_F!=0,1, 0),
Availability_of_Caretaker_Yes = ifelse(rowSums(select(.,starts_with("Aaya_BI11_F")))>0, 1, 0), # if sum of caretakers is greater than 1
#Urbanicity.x == "Rural" &
Usage_of_textbook_Yes = ifelse(TC16 >0, 1, 0), # Changed to textbook || Exclude No only, no need to replace 98
Usage_of_curriculum_Yes = ifelse(TC7 ==1, 1, 0), # Useing curriculam yes only
Usage_of_ELDS_Yes = ifelse(TC8 ==1, 1, 0)
# homework_week_daily_yes = ifelse(TC9>=1, 1, 0) # removed becaise the variable has no major variance, as 92% have said they give homeowkr, multiple regression could not estimate in such case.
# ELDS report cards yes only
) %>%
mutate(
Weighted_Students = rowSums(across(starts_with("Student_number_BI11_D"), ~ replace_na(.x, 0) * school_weight)),
Weighted_Teachers = rowSums(across(starts_with("Teachers_BI11_E"), ~ replace_na(.x, 0) * school_weight))
) %>%
# Calculating the weighted student-to-teacher ratio, makin sure avoiding division by zero
mutate(Weighted_Student_Teacher_Ratio = ifelse(Weighted_Teachers > 0, Weighted_Students / Weighted_Teachers, NA))
Function to fir the model
# Function to fit OLS and use cluster-robust SEs with coeftest //StackExchange
fit_ols_with_robust_se <- function(formula, data, cluster_var) {
# Fit the OLS model
model <- lm(formula, data = data)
# Calculate the clustered variance-covariance matrix
clustered_vcov <- vcovCL(model, cluster = data[[cluster_var]])
# Apply coeftst to return robust standard erors
robust_results <- coeftest(model, vcov. = clustered_vcov)
return(robust_results)
}
Building the model
# ECEDBackground_final <- ECEDBackground_final %>% #already done abobve
# mutate(
# SE_standardized = scale(SE),
# Cognitive_standardized = scale(Cognitive),
# LanguageScore_standardized = scale(LanguageScore),
# Physical_standardized = scale(Physical),
# Composite_standardized = scale(composite)
# )
# view(ECEDBackground_final$Code.x)
model_SE <- fit_ols_with_robust_se(
SE_standardized ~ Age_months + Gender_Male + Primary_language_instruction + ECED_Attendance_Yes +
School_Type_Community + Urbanicity_Urban + Teachers_Qualification_Grade_10_only +
Pre_Service_Training_Received + In_Service_Training_Received + Availability_of_Reading_Learning_Areas +
Availability_of_Mathematics_Learning_Areas + Availability_of_RolePlay_Learning_Areas +
Availability_of_Science_Learning_Areas + Availability_of_Creativity_Learning_Areas +
Availability_of_Constructions_Learning_Areas + Availability_of_Caretaker_Yes +
Weighted_Student_Teacher_Ratio + Usage_of_textbook_Yes + Usage_of_curriculum_Yes + Usage_of_ELDS_Yes,
data = ECEDBackground_final,
cluster_var = "EMISCode"
)
# view(ECEDBackground_final$Code.x)
model_Cognitive <- fit_ols_with_robust_se(
Cognitive_standardized ~ Age_months + Gender_Male + Primary_language_instruction + ECED_Attendance_Yes +
School_Type_Community + Urbanicity_Urban + Teachers_Qualification_Grade_10_only +
Pre_Service_Training_Received + In_Service_Training_Received + Availability_of_Reading_Learning_Areas +
Availability_of_Mathematics_Learning_Areas + Availability_of_RolePlay_Learning_Areas +
Availability_of_Science_Learning_Areas + Availability_of_Creativity_Learning_Areas +
Availability_of_Constructions_Learning_Areas + Availability_of_Caretaker_Yes +
Weighted_Student_Teacher_Ratio + Usage_of_textbook_Yes + Usage_of_curriculum_Yes + Usage_of_ELDS_Yes,
data = ECEDBackground_final,
cluster_var = "EMISCode"
)
model_Language <- fit_ols_with_robust_se(
LanguageScore_standardized ~ Age_months + Gender_Male + Primary_language_instruction + ECED_Attendance_Yes +
School_Type_Community + Urbanicity_Urban + Teachers_Qualification_Grade_10_only +
Pre_Service_Training_Received + In_Service_Training_Received + Availability_of_Reading_Learning_Areas +
Availability_of_Mathematics_Learning_Areas + Availability_of_RolePlay_Learning_Areas +
Availability_of_Science_Learning_Areas + Availability_of_Creativity_Learning_Areas +
Availability_of_Constructions_Learning_Areas + Availability_of_Caretaker_Yes +
Weighted_Student_Teacher_Ratio + Usage_of_textbook_Yes + Usage_of_curriculum_Yes + Usage_of_ELDS_Yes,
data = ECEDBackground_final,
cluster_var = "EMISCode"
)
model_Physical <- fit_ols_with_robust_se(
Physical_standardized ~ Age_months + Gender_Male + Primary_language_instruction + ECED_Attendance_Yes +
School_Type_Community + Urbanicity_Urban + Teachers_Qualification_Grade_10_only +
Pre_Service_Training_Received + In_Service_Training_Received + Availability_of_Reading_Learning_Areas +
Availability_of_Mathematics_Learning_Areas + Availability_of_RolePlay_Learning_Areas +
Availability_of_Science_Learning_Areas + Availability_of_Creativity_Learning_Areas +
Availability_of_Constructions_Learning_Areas + Availability_of_Caretaker_Yes +
Weighted_Student_Teacher_Ratio + Usage_of_textbook_Yes + Usage_of_curriculum_Yes + Usage_of_ELDS_Yes,
data = ECEDBackground_final,
cluster_var ="EMISCode"
)
model_composite <- fit_ols_with_robust_se(
Composite_standardized ~ Age_months + Gender_Male + Primary_language_instruction + ECED_Attendance_Yes +
School_Type_Community + Urbanicity_Urban + Teachers_Qualification_Grade_10_only +
Pre_Service_Training_Received + In_Service_Training_Received + Availability_of_Reading_Learning_Areas +
Availability_of_Mathematics_Learning_Areas + Availability_of_RolePlay_Learning_Areas +
Availability_of_Science_Learning_Areas + Availability_of_Creativity_Learning_Areas +
Availability_of_Constructions_Learning_Areas + Availability_of_Caretaker_Yes +
Weighted_Student_Teacher_Ratio + Usage_of_textbook_Yes + Usage_of_curriculum_Yes + Usage_of_ELDS_Yes,
data = ECEDBackground_final,
cluster_var = "EMISCode"
)
Just a list of models for easy recalling
models <- list(
SE = model_SE,
Cognitive = model_Cognitive,
Language = model_Language,
Physical = model_Physical,
composite = model_composite
)
Extract Coefficients, and Significance
raw scores, justto cehck adn confirm
print(models)
$SE
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.8556416 0.8045903 1.0635 0.288588
Age_months 0.0092047 0.0042623 2.1596 0.031741 *
Gender_Male -0.2122895 0.4828820 -0.4396 0.660579
Primary_language_instruction -0.4800951 0.5279415 -0.9094 0.364016
ECED_Attendance_Yes -0.5652008 0.2442828 -2.3137 0.021482 *
School_Type_Community -0.5728832 0.4622755 -1.2393 0.216390
Urbanicity_Urban 1.0338997 0.3872672 2.6697 0.008081 **
Teachers_Qualification_Grade_10_only -1.5165349 0.6620800 -2.2906 0.022809 *
Pre_Service_Training_Received 0.4247265 0.5079113 0.8362 0.403816
In_Service_Training_Received 0.0026245 0.3608394 0.0073 0.994202
Availability_of_Reading_Learning_Areas -0.5734329 0.7096227 -0.8081 0.419800
Availability_of_Mathematics_Learning_Areas 0.2166686 0.2647358 0.8184 0.413877
Availability_of_RolePlay_Learning_Areas 0.3905955 0.9582187 0.4076 0.683891
Availability_of_Science_Learning_Areas 0.4335475 0.7287614 0.5949 0.552433
Availability_of_Creativity_Learning_Areas -1.6439521 0.5720228 -2.8739 0.004397 **
Availability_of_Constructions_Learning_Areas 1.3723579 0.9659765 1.4207 0.156632
Availability_of_Caretaker_Yes -2.4255139 0.4989687 -4.8611 2.047e-06 ***
Weighted_Student_Teacher_Ratio 0.0142970 0.0101896 1.4031 0.161808
Usage_of_textbook_Yes 0.0749074 0.5047407 0.1484 0.882139
Usage_of_curriculum_Yes -0.0421430 0.5855151 -0.0720 0.942678
Usage_of_ELDS_Yes 0.0629852 0.5171663 0.1218 0.903162
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$Cognitive
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.2246610 0.5261649 -0.4270 0.6697568
Age_months 0.0075503 0.0045007 1.6776 0.0946600 .
Gender_Male -0.7163217 0.3393584 -2.1108 0.0357660 *
Primary_language_instruction -0.1998520 0.3287157 -0.6080 0.5437451
ECED_Attendance_Yes -0.6562482 0.1895019 -3.4630 0.0006267 ***
School_Type_Community -0.2757081 0.2973116 -0.9273 0.3546319
Urbanicity_Urban 0.4849977 0.2379874 2.0379 0.0425948 *
Teachers_Qualification_Grade_10_only -1.1494056 0.5130622 -2.2403 0.0259375 *
Pre_Service_Training_Received 0.5926027 0.3327694 1.7808 0.0761368 .
In_Service_Training_Received -0.1160293 0.2708635 -0.4284 0.6687460
Availability_of_Reading_Learning_Areas 0.0345459 0.4555192 0.0758 0.9396073
Availability_of_Mathematics_Learning_Areas 1.0546702 0.2149260 4.9071 1.653e-06 ***
Availability_of_RolePlay_Learning_Areas 1.5630986 0.7564982 2.0662 0.0398207 *
Availability_of_Science_Learning_Areas 0.3371825 0.4638423 0.7269 0.4679362
Availability_of_Creativity_Learning_Areas -2.0519473 0.4896398 -4.1907 3.841e-05 ***
Availability_of_Constructions_Learning_Areas 0.0031873 0.7381248 0.0043 0.9965580
Availability_of_Caretaker_Yes -2.4904530 0.4244102 -5.8680 1.371e-08 ***
Weighted_Student_Teacher_Ratio 0.0179913 0.0061369 2.9317 0.0036789 **
Usage_of_textbook_Yes 1.2607315 0.3253654 3.8748 0.0001359 ***
Usage_of_curriculum_Yes 0.6496497 0.4365983 1.4880 0.1379965
Usage_of_ELDS_Yes -1.0153391 0.3414475 -2.9736 0.0032260 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$Language
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5160944 0.5395497 0.9565 0.3397150
Age_months 0.0026612 0.0039864 0.6676 0.5050202
Gender_Male -0.5040239 0.3190266 -1.5799 0.1153787
Primary_language_instruction -0.5194648 0.3167547 -1.6400 0.1022515
ECED_Attendance_Yes -0.5474689 0.2536540 -2.1583 0.0318395 *
School_Type_Community -0.5652276 0.3404858 -1.6601 0.0981360 .
Urbanicity_Urban 0.5693176 0.2499387 2.2778 0.0235681 *
Teachers_Qualification_Grade_10_only -1.2241148 0.4609277 -2.6558 0.0084135 **
Pre_Service_Training_Received 1.0639061 0.3346915 3.1788 0.0016624 **
In_Service_Training_Received 0.1790612 0.2920928 0.6130 0.5404059
Availability_of_Reading_Learning_Areas -0.9077014 0.4480241 -2.0260 0.0438090 *
Availability_of_Mathematics_Learning_Areas 1.4398584 0.2820624 5.1048 6.499e-07 ***
Availability_of_RolePlay_Learning_Areas 1.5437337 0.8434321 1.8303 0.0683769 .
Availability_of_Science_Learning_Areas 0.2235743 0.5420240 0.4125 0.6803353
Availability_of_Creativity_Learning_Areas -2.7434522 0.5475718 -5.0102 1.019e-06 ***
Availability_of_Constructions_Learning_Areas 0.3427029 0.7753674 0.4420 0.6588742
Availability_of_Caretaker_Yes -2.6881957 0.4554219 -5.9026 1.140e-08 ***
Weighted_Student_Teacher_Ratio 0.0184642 0.0061905 2.9827 0.0031354 **
Usage_of_textbook_Yes 1.5419127 0.4025172 3.8307 0.0001611 ***
Usage_of_curriculum_Yes -0.0070235 0.4435351 -0.0158 0.9873782
Usage_of_ELDS_Yes -0.9725957 0.3272576 -2.9720 0.0032431 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$Physical
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.6042149 0.6840780 -0.8833 0.377934
Age_months 0.0055298 0.0032444 1.7044 0.089530 .
Gender_Male -0.3980297 0.2957904 -1.3456 0.179616
Primary_language_instruction -0.1497218 0.6017656 -0.2488 0.803713
ECED_Attendance_Yes -0.2329371 0.2877486 -0.8095 0.418976
School_Type_Community -1.0517809 0.3169765 -3.3182 0.001039 **
Urbanicity_Urban -0.1948808 0.2998779 -0.6499 0.516365
Teachers_Qualification_Grade_10_only -1.0237623 0.8740336 -1.1713 0.242573
Pre_Service_Training_Received 0.4335251 0.5373026 0.8069 0.420505
In_Service_Training_Received 0.4344775 0.3971790 1.0939 0.275031
Availability_of_Reading_Learning_Areas -0.4025822 0.6897154 -0.5837 0.559945
Availability_of_Mathematics_Learning_Areas 0.6518367 0.2353286 2.7699 0.006021 **
Availability_of_RolePlay_Learning_Areas 0.8714956 1.0095136 0.8633 0.388796
Availability_of_Science_Learning_Areas 0.8359995 0.7003819 1.1936 0.233735
Availability_of_Creativity_Learning_Areas -1.6922555 0.7365518 -2.2975 0.022402 *
Availability_of_Constructions_Learning_Areas 0.0756388 0.9849918 0.0768 0.938850
Availability_of_Caretaker_Yes -1.5055295 0.6859907 -2.1947 0.029092 *
Weighted_Student_Teacher_Ratio 0.0201834 0.0081497 2.4766 0.013917 *
Usage_of_textbook_Yes 0.6893070 0.3315806 2.0789 0.038635 *
Usage_of_curriculum_Yes 0.6853144 0.5452876 1.2568 0.209983
Usage_of_ELDS_Yes -0.7611496 0.5025091 -1.5147 0.131092
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$composite
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1783044 0.6733649 0.2648 0.7913812
Age_months 0.0076858 0.0039965 1.9231 0.0555851 .
Gender_Male -0.5537650 0.3801055 -1.4569 0.1463868
Primary_language_instruction -0.4151065 0.4903338 -0.8466 0.3980265
ECED_Attendance_Yes -0.6118443 0.2696329 -2.2692 0.0240968 *
School_Type_Community -0.7566430 0.3614307 -2.0935 0.0372994 *
Urbanicity_Urban 0.5873556 0.3186960 1.8430 0.0664947 .
Teachers_Qualification_Grade_10_only -1.5083397 0.7157548 -2.1073 0.0360684 *
Pre_Service_Training_Received 0.7640436 0.4888869 1.5628 0.1193393
In_Service_Training_Received 0.1527797 0.3772038 0.4050 0.6857948
Availability_of_Reading_Learning_Areas -0.5691418 0.6394255 -0.8901 0.3742635
Availability_of_Mathematics_Learning_Areas 1.0139969 0.2375310 4.2689 2.776e-05 ***
Availability_of_RolePlay_Learning_Areas 1.3191443 1.0065974 1.3105 0.1912110
Availability_of_Science_Learning_Areas 0.5617631 0.6795773 0.8266 0.4092193
Availability_of_Creativity_Learning_Areas -2.4774163 0.6512524 -3.8041 0.0001784 ***
Availability_of_Constructions_Learning_Areas 0.5674947 0.9656097 0.5877 0.5572517
Availability_of_Caretaker_Yes -2.7850315 0.5978747 -4.6582 5.149e-06 ***
Weighted_Student_Teacher_Ratio 0.0216394 0.0079237 2.7310 0.0067572 **
Usage_of_textbook_Yes 1.0717672 0.3925384 2.7304 0.0067696 **
Usage_of_curriculum_Yes 0.3866621 0.5490799 0.7042 0.4819533
Usage_of_ELDS_Yes -0.8057064 0.4554025 -1.7692 0.0780575 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1