Summary
This notebook analyses IHDS ASER data to determine whether absent students could affect in-school assessments like the NAS. To do this we run a regression of the ASER score on a variable for the number of days absent from school in the previous month, state dummies, and interactions between the state dummies and the absence variable. We then inspect the coefficients for the interaction terms and test their joint significance. Lastly, we compare estimates of state averages on the full sample with estimates of state averages where we take into account possible missingness due to absence.
Install packages and import data.
library(tidyverse)
library(haven)
ihds_ind_dir <- "C:/Users/dougj/Documents/Data/IHDS/IHDS 2012/DS0001"
ind_file <- file.path(ihds_ind_dir, "36151-0001-Data.dta")
# read in just those variables that i need
# this is much faster than reading in everything and then selecting
df <- read_dta(ind_file, col_select = c(STATEID, PSUID, URBAN2011, HHID, HHSPLITID, PERSONID, IDPSU, WT, RO3, RO7, RO5, starts_with("CS"), starts_with("TA"), starts_with("ED")) )
Inspect data
Inspect the CS13 variable which is self-reported number of days absent from school each month. Only look at observations for which we have ASER score and the child attends a govt or govt aided school.
attributes(df$CS13)
$label
[1] "EQ4 2.13 Days/month absent"
$format.stata
[1] "%2.0f"
# simple tab of CS13 --> note that the max value is 30.
df %>% filter(!is.na(TA8B)) %>%
filter(CS4 == 2 | CS4 == 3) %>%
group_by(CS13) %>%
count()
# check that this is not missing for kids with ASER results --> looks like there are only a few NAs
df %>% filter(!is.na(TA8B)) %>% summarise(non_na_count = sum(!is.na(CS13)), na_count = sum(is.na(CS13)) )
Graphical analysis of ASER scores by absence
Compare average ASER score for different levels of absence for kids attending govt schools. First, graph average score vs absence. Note that there are relatively few kids with absence over 10 so the area of this graph with absence > 10 should probably be ignored. Second, create histograms of score for each value of absence up to 10.
temp <- df %>% filter(!is.na(TA8B) & !is.na(CS13)) %>%
filter(CS4 == 2 | CS4 == 3) %>%
group_by(CS13) %>%
summarise(ASER_score = weighted.mean(TA8B, WT)) %>%
ungroup() %>%
rename(days_absent = CS13)
ggplot(temp, aes(x = days_absent, y = ASER_score)) + geom_line()

temp <- df %>% mutate(days_absent_capped = ifelse(CS13 > 10, 10, CS13)) %>%
filter(!is.na(TA8B) & !is.na(days_absent_capped))
# check that this worked
temp %>% group_by(days_absent_capped) %>% count()
# the graph below shows the relative frequency of different ASER scores by # days absent
# note that 10 could be 10 or greater as I have replaced all values of CS13>10 with 10
ggplot(temp, aes(factor(TA8B), group = factor(days_absent_capped))) +
geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count", fill = "gray") +
scale_y_continuous(labels=scales::percent) +
ylab("relative frequencies") +
xlab("ASER reading score") +
facet_grid(~factor(days_absent_capped))

Regression of ASER score on absence and state vars
Regress ASER score on age, absence, state, and state x absence. Then test the joint significance of all the state x absence terms.
# display the label for each of variables
vars <- list(df$CS13, df$RO5)
lapply(vars, FUN = function(x) attributes(x)$label)
[[1]]
[1] "EQ4 2.13 Days/month absent"
[[2]]
[1] "HQ4 2.5 Age"
model <- lm(TA8B ~ CS13 + factor(STATEID) + CS13*factor(STATEID) + RO5, data = df)
library(car)
Loading required package: carData
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: 㤼㸱car㤼㸲
The following object is masked from 㤼㸱package:dplyr㤼㸲:
recode
The following object is masked from 㤼㸱package:purrr㤼㸲:
some
linearHypothesis(model, matchCoefs(model, "CS13:"), white.adjust = "hc1")
Linear hypothesis test
Hypothesis:
CS13:factor(STATEID)2 = 0
CS13:factor(STATEID)3 = 0
CS13:factor(STATEID)4 = 0
CS13:factor(STATEID)5 = 0
CS13:factor(STATEID)6 = 0
CS13:factor(STATEID)7 = 0
CS13:factor(STATEID)8 = 0
CS13:factor(STATEID)9 = 0
CS13:factor(STATEID)10 = 0
CS13:factor(STATEID)11 = 0
CS13:factor(STATEID)12 = 0
CS13:factor(STATEID)13 = 0
CS13:factor(STATEID)14 = 0
CS13:factor(STATEID)15 = 0
CS13:factor(STATEID)16 = 0
CS13:factor(STATEID)17 = 0
CS13:factor(STATEID)18 = 0
CS13:factor(STATEID)19 = 0
CS13:factor(STATEID)20 = 0
CS13:factor(STATEID)21 = 0
CS13:factor(STATEID)22 = 0
CS13:factor(STATEID)23 = 0
CS13:factor(STATEID)24 = 0
CS13:factor(STATEID)25 = 0
CS13:factor(STATEID)26 = 0
CS13:factor(STATEID)27 = 0
CS13:factor(STATEID)28 = 0
CS13:factor(STATEID)29 = 0
CS13:factor(STATEID)30 = 0
CS13:factor(STATEID)32 = 0
CS13:factor(STATEID)33 = 0
Model 1: restricted model
Model 2: TA8B ~ CS13 + factor(STATEID) + CS13 * factor(STATEID) + RO5
Note: Coefficient covariance matrix supplied.
Res.Df Df F Pr(>F)
1 11129
2 11098 31 4.8327 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(broom)
coefs <- tidy(model)
coefs
Compare state estimates assuming some students more likely to be absent
We can also more directly calculate the effect of absence on state scores by comparing estimates of state averages with estimates which take into account potential absence on the day of the assessment. To do this, we create a new weight variable which takes into account the probability that the student would be present on the day of the exam. Our new WT variable is…
\[ newweight = oldweight*\frac{30-daysabsentpermonth}{30} \] As in previous analyses, we only look at students in govt or private aided schools.
state_averages <- df %>% filter(!is.na(TA8B) & !is.na(CS13)) %>%
filter(CS4 == 2 | CS4 == 3) %>%
mutate(new.weight = WT*(30-CS13)/30) %>%
group_by(STATEID) %>%
summarise(full_sample = weighted.mean(TA8B, WT), absence_weighted = weighted.mean(TA8B, new.weight),
diff = full_sample-absence_weighted)
state_averages$full_rank[order(state_averages$full_sample)] <- 1:nrow(state_averages)
Unknown or uninitialised column: 'full_rank'.
state_averages$absence_rank[order(state_averages$absence_weighted)] <- 1:nrow(state_averages)
Unknown or uninitialised column: 'absence_rank'.
state_averages
NA
LS0tDQp0aXRsZTogIkFuYWx5c2UgSUhEUyBBU0VSIGRhdGEiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgaHRtbF9kb2N1bWVudDoNCiAgICBkZl9wcmludDogcGFnZWQNCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCiMgU3VtbWFyeQ0KVGhpcyBub3RlYm9vayBhbmFseXNlcyBJSERTIEFTRVIgZGF0YSB0byBkZXRlcm1pbmUgd2hldGhlciBhYnNlbnQgc3R1ZGVudHMgY291bGQgYWZmZWN0IGluLXNjaG9vbCBhc3Nlc3NtZW50cyBsaWtlIHRoZSBOQVMuIFRvIGRvIHRoaXMgd2UgcnVuIGEgcmVncmVzc2lvbiBvZiB0aGUgQVNFUiBzY29yZSBvbiBhIHZhcmlhYmxlIGZvciB0aGUgbnVtYmVyIG9mIGRheXMgYWJzZW50IGZyb20gc2Nob29sIGluIHRoZSBwcmV2aW91cyBtb250aCwgc3RhdGUgZHVtbWllcywgYW5kIGludGVyYWN0aW9ucyBiZXR3ZWVuIHRoZSBzdGF0ZSBkdW1taWVzIGFuZCB0aGUgYWJzZW5jZSB2YXJpYWJsZS4gIFdlIHRoZW4gaW5zcGVjdCB0aGUgY29lZmZpY2llbnRzIGZvciB0aGUgaW50ZXJhY3Rpb24gdGVybXMgYW5kIHRlc3QgdGhlaXIgam9pbnQgc2lnbmlmaWNhbmNlLiBMYXN0bHksIHdlIGNvbXBhcmUgZXN0aW1hdGVzIG9mIHN0YXRlIGF2ZXJhZ2VzIG9uIHRoZSBmdWxsIHNhbXBsZSB3aXRoIGVzdGltYXRlcyBvZiBzdGF0ZSBhdmVyYWdlcyB3aGVyZSB3ZSB0YWtlIGludG8gYWNjb3VudCBwb3NzaWJsZSBtaXNzaW5nbmVzcyBkdWUgdG8gYWJzZW5jZS4NCg0KDQojIyMgSW5zdGFsbCBwYWNrYWdlcyBhbmQgaW1wb3J0IGRhdGEuDQpgYGB7ciBzZXR1cH0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShoYXZlbikNCmBgYA0KDQpgYGB7cn0NCmloZHNfaW5kX2RpciA8LSAiQzovVXNlcnMvZG91Z2ovRG9jdW1lbnRzL0RhdGEvSUhEUy9JSERTIDIwMTIvRFMwMDAxIg0KaW5kX2ZpbGUgPC0gZmlsZS5wYXRoKGloZHNfaW5kX2RpciwgIjM2MTUxLTAwMDEtRGF0YS5kdGEiKQ0KIyByZWFkIGluIGp1c3QgdGhvc2UgdmFyaWFibGVzIHRoYXQgaSBuZWVkDQojIHRoaXMgaXMgbXVjaCBmYXN0ZXIgdGhhbiByZWFkaW5nIGluIGV2ZXJ5dGhpbmcgYW5kIHRoZW4gc2VsZWN0aW5nDQpkZiA8LSByZWFkX2R0YShpbmRfZmlsZSwgY29sX3NlbGVjdCA9IGMoU1RBVEVJRCwgUFNVSUQsIFVSQkFOMjAxMSwgSEhJRCwgSEhTUExJVElELCBQRVJTT05JRCwgSURQU1UsIFdULCBSTzMsIFJPNywgUk81LCBzdGFydHNfd2l0aCgiQ1MiKSwgc3RhcnRzX3dpdGgoIlRBIiksIHN0YXJ0c193aXRoKCJFRCIpKSApDQpgYGANCg0KIyMjIEluc3BlY3QgZGF0YQ0KSW5zcGVjdCB0aGUgQ1MxMyB2YXJpYWJsZSB3aGljaCBpcyBzZWxmLXJlcG9ydGVkIG51bWJlciBvZiBkYXlzIGFic2VudCBmcm9tIHNjaG9vbCBlYWNoIG1vbnRoLiBPbmx5IGxvb2sgYXQgb2JzZXJ2YXRpb25zIGZvciB3aGljaCB3ZSBoYXZlIEFTRVIgc2NvcmUgYW5kIHRoZSBjaGlsZCBhdHRlbmRzIGEgZ292dCBvciBnb3Z0IGFpZGVkIHNjaG9vbC4NCmBgYHtyfQ0KYXR0cmlidXRlcyhkZiRDUzEzKQ0KIyBzaW1wbGUgdGFiIG9mIENTMTMgLS0+IG5vdGUgdGhhdCB0aGUgbWF4IHZhbHVlIGlzIDMwLg0KZGYgJT4lIGZpbHRlcighaXMubmEoVEE4QikpICU+JQ0KICBmaWx0ZXIoQ1M0ID09IDIgfCBDUzQgPT0gMykgJT4lDQogIGdyb3VwX2J5KENTMTMpICU+JSANCiAgY291bnQoKQ0KDQojIGNoZWNrIHRoYXQgdGhpcyBpcyBub3QgbWlzc2luZyBmb3Iga2lkcyB3aXRoIEFTRVIgcmVzdWx0cyAtLT4gbG9va3MgbGlrZSB0aGVyZSBhcmUgb25seSBhIGZldyBOQXMNCmRmICU+JSBmaWx0ZXIoIWlzLm5hKFRBOEIpKSAlPiUgc3VtbWFyaXNlKG5vbl9uYV9jb3VudCA9IHN1bSghaXMubmEoQ1MxMykpLCBuYV9jb3VudCA9IHN1bShpcy5uYShDUzEzKSkgKQ0KYGBgDQoNCiMjIyBHcmFwaGljYWwgYW5hbHlzaXMgb2YgQVNFUiBzY29yZXMgYnkgYWJzZW5jZQ0KQ29tcGFyZSBhdmVyYWdlIEFTRVIgc2NvcmUgZm9yIGRpZmZlcmVudCBsZXZlbHMgb2YgYWJzZW5jZSBmb3Iga2lkcyBhdHRlbmRpbmcgZ292dCBzY2hvb2xzLiBGaXJzdCwgZ3JhcGggYXZlcmFnZSBzY29yZSB2cyBhYnNlbmNlLiAgTm90ZSB0aGF0IHRoZXJlIGFyZSByZWxhdGl2ZWx5IGZldyBraWRzIHdpdGggYWJzZW5jZSBvdmVyIDEwIHNvIHRoZSBhcmVhIG9mIHRoaXMgZ3JhcGggd2l0aCBhYnNlbmNlID4gMTAgc2hvdWxkIHByb2JhYmx5IGJlIGlnbm9yZWQuICBTZWNvbmQsIGNyZWF0ZSBoaXN0b2dyYW1zIG9mIHNjb3JlIGZvciBlYWNoIHZhbHVlIG9mIGFic2VuY2UgdXAgdG8gMTAuIA0KDQoNCmBgYHtyfQ0KdGVtcCA8LSBkZiAlPiUgZmlsdGVyKCFpcy5uYShUQThCKSAmICFpcy5uYShDUzEzKSkgJT4lIA0KICBmaWx0ZXIoQ1M0ID09IDIgfCBDUzQgPT0gMykgJT4lDQogIGdyb3VwX2J5KENTMTMpICU+JSANCiAgc3VtbWFyaXNlKEFTRVJfc2NvcmUgPSB3ZWlnaHRlZC5tZWFuKFRBOEIsIFdUKSkgJT4lIA0KICB1bmdyb3VwKCkgJT4lDQogIHJlbmFtZShkYXlzX2Fic2VudCA9IENTMTMpDQpnZ3Bsb3QodGVtcCwgYWVzKHggPSBkYXlzX2Fic2VudCwgeSA9IEFTRVJfc2NvcmUpKSArIGdlb21fbGluZSgpDQp0ZW1wIDwtIGRmICU+JSBtdXRhdGUoZGF5c19hYnNlbnRfY2FwcGVkID0gaWZlbHNlKENTMTMgPiAxMCwgMTAsIENTMTMpKSAlPiUgDQogIGZpbHRlcighaXMubmEoVEE4QikgJiAhaXMubmEoZGF5c19hYnNlbnRfY2FwcGVkKSkNCiMgY2hlY2sgdGhhdCB0aGlzIHdvcmtlZA0KdGVtcCAlPiUgZ3JvdXBfYnkoZGF5c19hYnNlbnRfY2FwcGVkKSAlPiUgY291bnQoKQ0KDQojIHRoZSBncmFwaCBiZWxvdyBzaG93cyB0aGUgcmVsYXRpdmUgZnJlcXVlbmN5IG9mIGRpZmZlcmVudCBBU0VSIHNjb3JlcyBieSAjIGRheXMgYWJzZW50DQojIG5vdGUgdGhhdCAxMCBjb3VsZCBiZSAxMCBvciBncmVhdGVyIGFzIEkgaGF2ZSByZXBsYWNlZCBhbGwgdmFsdWVzIG9mIENTMTM+MTAgd2l0aCAxMA0KZ2dwbG90KHRlbXAsIGFlcyhmYWN0b3IoVEE4QiksIGdyb3VwID0gZmFjdG9yKGRheXNfYWJzZW50X2NhcHBlZCkpKSArIA0KICBnZW9tX2JhcihhZXMoeSA9IC4ucHJvcC4uLCBmaWxsID0gZmFjdG9yKC4ueC4uKSksIHN0YXQ9ImNvdW50IiwgZmlsbCA9ICJncmF5IikgKyANCiAgc2NhbGVfeV9jb250aW51b3VzKGxhYmVscz1zY2FsZXM6OnBlcmNlbnQpICsNCiAgeWxhYigicmVsYXRpdmUgZnJlcXVlbmNpZXMiKSArDQogIHhsYWIoIkFTRVIgcmVhZGluZyBzY29yZSIpICsNCiAgZmFjZXRfZ3JpZCh+ZmFjdG9yKGRheXNfYWJzZW50X2NhcHBlZCkpDQpgYGANCg0KIyMjIFJlZ3Jlc3Npb24gb2YgQVNFUiBzY29yZSBvbiBhYnNlbmNlIGFuZCBzdGF0ZSB2YXJzDQpSZWdyZXNzIEFTRVIgc2NvcmUgb24gYWdlLCBhYnNlbmNlLCBzdGF0ZSwgYW5kIHN0YXRlIHggYWJzZW5jZS4gIFRoZW4gdGVzdCB0aGUgam9pbnQgc2lnbmlmaWNhbmNlIG9mIGFsbCB0aGUgc3RhdGUgeCBhYnNlbmNlIHRlcm1zLg0KDQpgYGB7cn0NCiMgZGlzcGxheSB0aGUgbGFiZWwgZm9yIGVhY2ggb2YgdmFyaWFibGVzDQp2YXJzIDwtIGxpc3QoZGYkQ1MxMywgZGYkUk81KQ0KbGFwcGx5KHZhcnMsIEZVTiA9IGZ1bmN0aW9uKHgpIGF0dHJpYnV0ZXMoeCkkbGFiZWwpDQptb2RlbCA8LSBsbShUQThCIH4gQ1MxMyArIGZhY3RvcihTVEFURUlEKSArIENTMTMqZmFjdG9yKFNUQVRFSUQpICsgUk81LCBkYXRhID0gZGYpDQpsaWJyYXJ5KGNhcikNCmxpbmVhckh5cG90aGVzaXMobW9kZWwsIG1hdGNoQ29lZnMobW9kZWwsICJDUzEzOiIpLCB3aGl0ZS5hZGp1c3QgPSAiaGMxIikNCmxpYnJhcnkoYnJvb20pDQpjb2VmcyA8LSB0aWR5KG1vZGVsKQ0KY29lZnMNCmBgYA0KDQojIyMgQ29tcGFyZSBzdGF0ZSBlc3RpbWF0ZXMgYXNzdW1pbmcgc29tZSBzdHVkZW50cyBtb3JlIGxpa2VseSB0byBiZSBhYnNlbnQNCldlIGNhbiBhbHNvIG1vcmUgZGlyZWN0bHkgY2FsY3VsYXRlIHRoZSBlZmZlY3Qgb2YgYWJzZW5jZSBvbiBzdGF0ZSBzY29yZXMgYnkgY29tcGFyaW5nIGVzdGltYXRlcyBvZiBzdGF0ZSBhdmVyYWdlcyB3aXRoIGVzdGltYXRlcyB3aGljaCB0YWtlIGludG8gYWNjb3VudCBwb3RlbnRpYWwgYWJzZW5jZSBvbiB0aGUgZGF5IG9mIHRoZSBhc3Nlc3NtZW50LiBUbyBkbyB0aGlzLCB3ZSBjcmVhdGUgYSBuZXcgd2VpZ2h0IHZhcmlhYmxlIHdoaWNoIHRha2VzIGludG8gYWNjb3VudCB0aGUgcHJvYmFiaWxpdHkgdGhhdCB0aGUgc3R1ZGVudCB3b3VsZCBiZSBwcmVzZW50IG9uIHRoZSBkYXkgb2YgdGhlIGV4YW0uIE91ciBuZXcgV1QgdmFyaWFibGUgaXMuLi4NCg0KJCQgbmV3d2VpZ2h0ID0gb2xkd2VpZ2h0KlxmcmFjezMwLWRheXNhYnNlbnRwZXJtb250aH17MzB9ICQkIA0KQXMgaW4gcHJldmlvdXMgYW5hbHlzZXMsIHdlIG9ubHkgbG9vayBhdCBzdHVkZW50cyBpbiBnb3Z0IG9yIHByaXZhdGUgYWlkZWQgc2Nob29scy4NCmBgYHtyfQ0Kc3RhdGVfYXZlcmFnZXMgPC0gZGYgJT4lIGZpbHRlcighaXMubmEoVEE4QikgJiAhaXMubmEoQ1MxMykpICU+JSANCiAgZmlsdGVyKENTNCA9PSAyIHwgQ1M0ID09IDMpICU+JQ0KICBtdXRhdGUobmV3LndlaWdodCA9IFdUKigzMC1DUzEzKS8zMCkgJT4lDQogIGdyb3VwX2J5KFNUQVRFSUQpICU+JSANCiAgc3VtbWFyaXNlKGZ1bGxfc2FtcGxlID0gd2VpZ2h0ZWQubWVhbihUQThCLCBXVCksIGFic2VuY2Vfd2VpZ2h0ZWQgPSB3ZWlnaHRlZC5tZWFuKFRBOEIsIG5ldy53ZWlnaHQpLCANCiAgICAgICAgICAgIGRpZmYgPSBmdWxsX3NhbXBsZS1hYnNlbmNlX3dlaWdodGVkKQ0KDQpzdGF0ZV9hdmVyYWdlcyRmdWxsX3Jhbmtbb3JkZXIoc3RhdGVfYXZlcmFnZXMkZnVsbF9zYW1wbGUpXSA8LSAxOm5yb3coc3RhdGVfYXZlcmFnZXMpDQpzdGF0ZV9hdmVyYWdlcyRhYnNlbmNlX3Jhbmtbb3JkZXIoc3RhdGVfYXZlcmFnZXMkYWJzZW5jZV93ZWlnaHRlZCldIDwtIDE6bnJvdyhzdGF0ZV9hdmVyYWdlcykNCnN0YXRlX2F2ZXJhZ2VzDQoNCmBgYA0KDQoNCg0KDQoNCg==