# Libraries
pacman::p_load(haven, estimatr, extrafont, texreg, janitor, hrbrthemes, xtable, papeR, tidyverse, compareGroups, Hmisc, skimr)
# Student Data
df_stu <-
read_dta(here::here("data", "stu_admin_all_latest.dta")) %>%
left_join(read_dta(here::here("data/reservation_new.dta")), by = "stdid") %>%
mutate(
reservation = dplyr::recode(reservation, "Non-reservation" = 0L, "Reservation" = 1L, .default = NA_integer_),
)
df <-
read_dta(here::here("data", "all_appended.dta")) %>%
distinct(stdid, course_name, facid, .keep_all = T) %>%
left_join(
df_stu %>%
select(stdid, classid, department_id, reservation, caste, ea_stud_group_criteria, grade),
by = "stdid"
) %>%
filter(ea_stud_group_criteria == 2, grade == 2) %>%
left_join(read_dta(here::here("data/fac_covariates.dta")), by = "facid")
# fixing for course cleaning done by sk in sep 2021
df <-
df %>%
left_join(
read_dta(here::here("data/students_clean.dta")),
by = c("department_id", "course_name")
) %>%
mutate(
course_name = if_else(!is.na(course_clean_sk), course_clean_sk, course_name)
) %>%
distinct(stdid, course_name, facid, .keep_all = T)
# grade data after veda's work
df_grades <-
read_dta(here::here("data/30jan2022/df_gradefac.dta")) %>%
filter(facid != "") %>%
left_join(df_stu %>% select(stdid, department_id, reservation, caste, b_i_jeemain_score, b_math_g1_score, b_physics_g1_score, b_ses_1, b_ses_2, b_ses_3, b_ses_4, b_ses_5, b_ses_6, b_i_ses_1, b_i_ses_2, b_i_ses_3, b_ct_score, b_ql_score, b_i_enter_engineering_exam, elite, e_talkprof_research, e_workprof_research, you_believe_math = e_belief_1, mathprofs_believe_math = e_belief_2, most_classmates_believe_math = e_belief_3, you_believe_physics = e_belief_4, physprofs_believe_physics = e_belief_5, most_classmates_believe_physics = e_belief_6, b_mathstudy_30, e_mathstudy_30, b_physicsstudy_30, e_physicsstudy_30), by = "stdid") %>%
left_join(read_dta(here::here("data/controls.dta")), by = "stdid") %>%
left_join(read_dta(here::here("data/fac_covariates.dta")), by = "facid") %>%
arrange(department_id, course_name, course_grade_clean) %>%
group_by(department_id, course_name) %>%
mutate(
course_rank = (scale(course_grade_clean) %>% pnorm() * 100) %>% as.vector
) %>%
ungroup() %>%
mutate(
univcode = str_sub(department_id, 1, 5),
father_college = if_else(father_ed_ind > 3 & !is.na(father_ed_ind), 1L, 0L),
father_college = if_else(is.na(father_ed_ind), NA_integer_, father_college),
mother_college = if_else(mother_ed_ind > 3 & !is.na(mother_ed_ind), 1L, 0L),
mother_college = if_else(is.na(mother_ed_ind), NA_integer_, mother_college),
female = female %>% as.integer(),
age = age %>% as.integer(),
) %>%
fastDummies::dummy_cols(select_columns = c("b_i_enter_engineering_exam", "hstype", "area", "father_ed_ind", "mother_ed_ind"), remove_first_dummy = T, ignore_na = F) %>%
relocate(department_id, course_name, stdid, course_rank) %>%
rename(reservation_stu = reservation) %>%
filter(univcode != "IR036") %>%
mutate(
# student variables
# miss_age = if_else(is.na(age), 1L, 0L),
# age = if_else(is.na(age), 0L, age),
#
# miss_reservation_stu = if_else(is.na(reservation_stu), 1L, 0L),
# reservation_stu = if_else(is.na(reservation_stu), 0L, reservation_stu),
#
# miss_father_college = if_else(is.na(father_college), 1L, 0L),
# father_college = if_else(is.na(father_college), 0L, father_college),
#
# miss_mother_college = if_else(is.na(mother_college), 1L, 0L),
# mother_college = if_else(is.na(mother_college), 0L, mother_college),
#
# miss_b_i_jeemain_score = if_else(is.na(b_i_jeemain_score), 1L, 0L),
# b_i_jeemain_score = if_else(is.na(b_i_jeemain_score), 0, b_i_jeemain_score),
#
# miss_b_math_g1_score = if_else(is.na(b_math_g1_score), 1L, 0L),
# b_math_g1_score = if_else(is.na(b_math_g1_score), 0, b_math_g1_score),
#
# miss_b_physics_g1_score = if_else(is.na(b_physics_g1_score), 1L, 0L),
# b_physics_g1_score = if_else(is.na(b_physics_g1_score), 0, b_physics_g1_score),
#
# miss_b_ct_score = if_else(is.na(b_ct_score), 1L, 0L),
# b_ct_score = if_else(is.na(b_ct_score), 0, b_ct_score),
#
# miss_b_ql_score = if_else(is.na(b_ql_score), 1L, 0L),
# b_ql_score = if_else(is.na(b_ql_score), 0, b_ql_score),
#
# # faculty variables
# miss_fac_associate_professor = if_else(is.na(fac_associate_professor), 1L, 0L),
# fac_associate_professor = if_else(is.na(fac_associate_professor), 0, fac_associate_professor),
#
# miss_fac_professor = if_else(is.na(fac_professor), 1L, 0L),
# fac_professor = if_else(is.na(fac_professor), 0, fac_professor),
#
# miss_fac_yearsinhighed = if_else(is.na(fac_yearsinhighed), 1L, 0L),
# fac_yearsinhighed = if_else(is.na(fac_yearsinhighed), 0, fac_yearsinhighed),
#
# miss_fac_highest_degree_phd = if_else(is.na(fac_highest_degree_phd), 1L, 0L),
# fac_highest_degree_phd = if_else(is.na(fac_highest_degree_phd), 0, fac_highest_degree_phd),
#
# miss_fac_highest_degree_phd_in_prog = if_else(is.na(fac_highest_degree_phd_in_prog), 1L, 0L),
# fac_highest_degree_phd_in_prog = if_else(is.na(fac_highest_degree_phd_in_prog), 0, fac_highest_degree_phd_in_prog),
#
# miss_fac_degree_college_elite = if_else(is.na(fac_degree_college_elite), 1L, 0L),
# fac_degree_college_elite = if_else(is.na(fac_degree_college_elite), 0, fac_degree_college_elite),
#
# miss_reservation_fac = if_else(is.na(reservation_fac), 1L, 0L),
# reservation_fac = if_else(is.na(reservation_fac), 0, reservation_fac)
) %>%
mutate_at(vars(b_ses_1, b_ses_2, b_ses_3, b_ses_4, b_ses_5, b_ses_6, b_i_ses_1, b_i_ses_2, b_i_ses_3), ~ as.integer(.)) %>%
mutate(
b_ses_1 = if_else(b_ses_1 == 1L, 1L, 0L) %>% replace_na(0L),
b_ses_2 = if_else(b_ses_2 == 1L, 1L, 0L) %>% replace_na(0L),
b_ses_3 = if_else(b_ses_3 == 1L, 1L, 0L) %>% replace_na(0L),
b_ses_4 = if_else(b_ses_4 == 1L, 1L, 0L) %>% replace_na(0L),
b_ses_5 = if_else(b_ses_5 == 1L, 1L, 0L) %>% replace_na(0L),
b_ses_6 = if_else(b_ses_6 == 1L, 1L, 0L) %>% replace_na(0L),
b_i_ses_1 = if_else(b_i_ses_1 == 1L, 1L, 0L) %>% replace_na(0L),
b_i_ses_2 = if_else(b_i_ses_2 == 1L, 1L, 0L) %>% replace_na(0L),
b_i_ses_3 = if_else(b_i_ses_3 == 1L, 1L, 0L) %>% replace_na(0L),
) %>%
mutate(
old_sib_factor = case_when(
old_sib == 0 ~ "0",
old_sib == 1 ~ "1",
old_sib == 2 ~ "2",
old_sib == 3 ~ "3",
old_sib == 4 ~ "4",
old_sib > 4 ~ "more",
TRUE ~ NA_character_
) %>% as_factor(),
young_sib_factor = case_when(
young_sib == 0 ~ "0",
young_sib == 1 ~ "1",
young_sib == 2 ~ "2",
young_sib == 3 ~ "3",
young_sib == 4 ~ "4",
young_sib > 4 ~ "more",
TRUE ~ NA_character_
) %>% as_factor(),
school_years_english_factor = case_when(
school_years_english == 0 ~ "0",
school_years_english >= 1 & school_years_english <= 9 ~ "1-9",
school_years_english >= 10 & school_years_english <= 12 ~ "10-12",
school_years_english >= 13 & school_years_english <= 16 ~ "13-16",
TRUE ~ NA_character_
) %>% as_factor(),
) %>%
fastDummies::dummy_cols(select_columns = c("old_sib_factor", "young_sib_factor", "school_years_english_factor"), remove_first_dummy = T, ignore_na = F, remove_selected_columns = T) %>%
mutate_at(vars(b_i_enter_engineering_exam_2, b_i_enter_engineering_exam_3, area_2, area_3, hstype_2, hstype_3, hstype_4, hstype_5, hstype_6, hstype_7, father_ed_ind_2, father_ed_ind_3, father_ed_ind_4, father_ed_ind_5, father_ed_ind_6, father_ed_ind_7, father_ed_ind_8, mother_ed_ind_2, mother_ed_ind_3, mother_ed_ind_4, mother_ed_ind_5, mother_ed_ind_6, mother_ed_ind_7, mother_ed_ind_8), ~ replace_na(., 0L)) %>%
mutate_at(vars(contains("_sib_factor_")), ~ replace_na(., 0L)) %>%
mutate_at(vars(contains("school_years_english_factor")), ~ replace_na(., 0L))
rm(df)
# excluding students facing no gender variation
df_grades_sfix <-
df_grades %>%
left_join(read_dta(here::here("data/cities.dta")) %>% select(stdid, district)) %>%
mutate(district = str_squish(district) %>% str_to_title()) %>%
left_join(read_dta(here::here("data/districts_clean.dta"))) %>%
arrange(stdid, fac_female) %>%
group_by(stdid) %>%
mutate(prop_female = mean(fac_female, na.rm = T)) %>%
ungroup() %>%
relocate(stdid, fac_female, prop_female) %>%
filter(prop_female > 0, prop_female < 1)
univcodes <- df_grades_sfix %>% count(univcode) %>% pull(univcode)
depids <- df_grades_sfix %>% count(department_id) %>% pull(department_id)
stdids <- df_grades_sfix %>% distinct(stdid) %>% pull(stdid)
# filtering to all faculty surveyed during the endline phase who were also mentioned by students
facdata_mentions <-
read_dta(here::here("data/all_appended.dta")) %>%
filter(tag > 1) %>% # endline only
left_join(df_stu %>% select(stdid, department_id), by = "stdid") %>% # get dep_id
select(facid, department_id) %>%
count(facid, department_id) %>%
arrange(facid, desc(n)) %>%
distinct(facid, .keep_all = T) %>%
select(-n) %>%
left_join(read_dta(here::here("data/endline_faculty_data_final.dta")), by = "facid")
# fac_stereotype <-
# read_dta("~/Downloads/faculty_data.dta") %>%
# select(facid, views_gender_math_core) %>%
# separate(views_gender_math_core, into = c("math_science_better_start", "math_science_better_after2yrs", "major_better_after2yrs"), sep = "::") %>%
# mutate_at(vars(contains("better")), ~ str_replace(., ".*&&", "") %>% str_squish() %>% na_if(""))
Baseline math confidence:
df_stu %>%
select(b_subjective_4, b_subjective_6, b_subjective_7, b_subjective_9) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.769911
Baseline physics confidence:
df_stu %>%
select(b_subjective_14, b_subjective_16, b_subjective_17, b_subjective_19) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.799303
Endline math confidence:
df_stu %>%
select(e_subjective_4, e_subjective_6, e_subjective_7, e_subjective_9) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.7827924
Endline physics confidence:
df_stu %>%
select(e_subjective_14, e_subjective_16, e_subjective_17, e_subjective_19) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.8005205
Endline math anxiety:
df_stu %>%
select(e_subjective_1, e_subjective_2, e_subjective_3, e_subjective_5, e_subjective_8, e_subjective_10) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.8190887
Endline physics anxiety:
df_stu %>%
select(e_subjective_11, e_subjective_12, e_subjective_13, e_subjective_15, e_subjective_18, e_subjective_20) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.8239434
Baseline math anxiety:
df_stu %>%
select(b_subjective_1, b_subjective_2, b_subjective_3, b_subjective_5, b_subjective_8, b_subjective_10) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.8072705
Baseline physics anxiety:
df_stu %>%
select(b_subjective_11, b_subjective_12, b_subjective_13, b_subjective_15, b_subjective_18, b_subjective_20) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::alpha() %>%
pluck("total", "raw_alpha")
## [1] 0.8126959
# endline math anxiety
df_stu <-
df_stu %>%
mutate(
e_math_anxiety = select(., e_subjective_1, e_subjective_2, e_subjective_3, e_subjective_5, e_subjective_8, e_subjective_10) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# endline physics anxiety
df_stu <-
df_stu %>%
mutate(
e_physics_anxiety = select(., e_subjective_11, e_subjective_12, e_subjective_13, e_subjective_15, e_subjective_18, e_subjective_20) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# baseline math anxiety
df_stu <-
df_stu %>%
mutate(
b_math_anxiety = select(., b_subjective_1, b_subjective_2, b_subjective_3, b_subjective_5, b_subjective_8, b_subjective_10) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# baseline physics anxiety
df_stu <-
df_stu %>%
mutate(
b_physics_anxiety = select(., b_subjective_11, b_subjective_12, b_subjective_13, b_subjective_15, b_subjective_18, b_subjective_20) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(5 - .)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# baseline math confidence
df_stu <-
df_stu %>%
mutate(
b_math_confidence = select(., b_subjective_4, b_subjective_6, b_subjective_7, b_subjective_9) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# baseline physics confidence
df_stu <-
df_stu %>%
mutate(
b_physics_confidence = select(., b_subjective_14, b_subjective_16, b_subjective_17, b_subjective_19) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# endline math confidence
df_stu <-
df_stu %>%
mutate(
e_math_confidence = select(., e_subjective_4, e_subjective_6, e_subjective_7, e_subjective_9) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
# endline physics confidence
df_stu <-
df_stu %>%
mutate(
e_physics_confidence = select(., e_subjective_14, e_subjective_16, e_subjective_17, e_subjective_19) %>%
mutate(across(contains("subjective"), ~ as.integer(.))) %>%
mutate(across(contains("subjective"), ~scale(.)[,1])) %>%
psych::principal(nfactors = 1, rotate = "none") %>%
pluck("scores") %>%
as.vector()
)
df_grades_sfix <-
df_grades_sfix %>%
left_join(
df_stu %>%
select(stdid, e_math_anxiety, e_physics_anxiety, b_math_anxiety, b_physics_anxiety, e_math_confidence, e_physics_confidence, b_math_confidence, b_physics_confidence),
by = "stdid"
)
Table 2:
df_temp1 <-
df_grades_sfix %>%
filter(subject == "math") %>%
group_by(stdid, subject, female, reservation_stu, father_college, mother_college, age, department_id, b_math_anxiety, e_math_anxiety, b_math_confidence, e_math_confidence) %>%
summarise(
reservation_fac = mean(reservation_fac, na.rm = T) * 10,
fac_associate_professor = mean(fac_associate_professor, na.rm = T) * 10,
fac_professor = mean(fac_professor, na.rm = T) * 10,
fac_yearsinhighed = mean(fac_yearsinhighed, na.rm = T) * 10,
fac_highest_degree_phd = mean(fac_highest_degree_phd, na.rm = T) * 10,
fac_highest_degree_phd_in_prog = mean(fac_highest_degree_phd_in_prog, na.rm = T) * 10,
fac_degree_college_elite = mean(fac_degree_college_elite, na.rm = T) * 10,
fac_female = mean(fac_female, na.rm = T) * 10,
course_rank = mean(course_rank, na.rm = T)
) %>%
ungroup() %>%
mutate(
b_anxiety = scale(b_math_anxiety) %>% as.vector,
e_anxiety = scale(e_math_anxiety) %>% as.vector,
b_confidence = scale(b_math_confidence) %>% as.vector,
e_confidence = scale(e_math_confidence) %>% as.vector,
# handle age missingness
miss_age = if_else(is.na(age), 1L, 0L),
age = if_else(is.na(age), 0L, age),
course_rank = scale(course_rank) %>% as.vector,
female_x_fac_female = female * fac_female
)
df_temp2 <-
df_grades_sfix %>%
filter(subject == "physics") %>%
group_by(stdid, subject, female, reservation_stu, father_college, mother_college, age, department_id, b_physics_anxiety, e_physics_anxiety, b_physics_confidence, e_physics_confidence) %>%
summarise(
reservation_fac = mean(reservation_fac, na.rm = T) * 10,
fac_associate_professor = mean(fac_associate_professor, na.rm = T) * 10,
fac_professor = mean(fac_professor, na.rm = T) * 10,
fac_yearsinhighed = mean(fac_yearsinhighed, na.rm = T) * 10,
fac_highest_degree_phd = mean(fac_highest_degree_phd, na.rm = T) * 10,
fac_highest_degree_phd_in_prog = mean(fac_highest_degree_phd_in_prog, na.rm = T) * 10,
fac_degree_college_elite = mean(fac_degree_college_elite, na.rm = T) * 10,
fac_female = mean(fac_female, na.rm = T) * 10,
course_rank = mean(course_rank, na.rm = T)
) %>%
ungroup() %>%
mutate(
b_anxiety = scale(b_physics_anxiety) %>% as.vector,
e_anxiety = scale(e_physics_anxiety) %>% as.vector,
b_confidence = scale(b_physics_confidence) %>% as.vector,
e_confidence = scale(e_physics_confidence) %>% as.vector,
# handle age missingness
miss_age = if_else(is.na(age), 1L, 0L),
age = if_else(is.na(age), 0L, age),
course_rank = scale(course_rank) %>% as.vector,
female_x_fac_female = female * fac_female
)
df_temp <-
bind_rows(df_temp1, df_temp2) %>%
arrange(stdid, subject) %>%
relocate(stdid, subject)
rm(df_temp1, df_temp2)
lm1 <-
lm_robust(e_confidence ~ female * fac_female + subject + b_confidence + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp, se_type = "stata", fixed_effects = ~ department_id)
lm2 <-
lm_robust(e_anxiety ~ female * fac_female + subject + b_anxiety + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp, se_type = "stata", fixed_effects = ~ department_id)
knitreg(list(lm1, lm2), custom.note = "%stars", stars = c(0.01, 0.05, 0.1), include.ci = F, dcolumn = T, booktabs = T, float.pos = "H", caption = "", digits = 3, include.nclust = F, include.rsquared = F, include.adjrs = F, include.rmse = F, custom.model.names = c("Confidence", "Anxiety"), custom.coef.map = list("female:fac_female" = "Female student x Female faculty share", "fac_female" = "Female faculty share", "female" = "Female student"))
| Â | Confidence | Anxiety |
|---|---|---|
| Female student x Female faculty share | -0.011 | -0.057*** |
| Â | (0.015) | (0.015) |
| Female faculty share | -0.005 | 0.006 |
| Â | (0.013) | (0.014) |
| Female student | 0.014 | 0.062 |
| Â | (0.064) | (0.068) |
| Num. obs. | 1568 | 1556 |
| ***p < 0.01; **p < 0.05; *p < 0.1 | ||
All students, female stu x female fac interaction as treatment:
# Step 1: Model M ~ X (mediator model)
model.m <- lm(e_anxiety ~ female_x_fac_female + female + fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% drop_na(e_anxiety))
# Step 2: Model Y ~ X + M (outcome model)
model.y <- lm(course_rank ~ female_x_fac_female + e_anxiety + female + fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp)
# Step 3: Mediation analysis
results <- mediation::mediate(model.m, model.y,
treat = "female_x_fac_female",
mediator = "e_anxiety",
boot = TRUE,
sims = 1000)
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.0119667 0.0056353 0.0186000 <2e-16 ***
## ADE -0.0380642 -0.0693565 -0.0027793 0.038 *
## Total Effect -0.0260975 -0.0587958 0.0090127 0.140
## Prop. Mediated -0.4585366 -3.4086674 3.6496338 0.140
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 1729
##
##
## Simulations: 1000
plot(results)
Female students only:
# Step 1: Model M ~ X (mediator model)
model.m <- lm(e_anxiety ~ fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 1) %>% drop_na(e_anxiety))
# Step 2: Model Y ~ X + M (outcome model)
model.y <- lm(course_rank ~ fac_female + e_anxiety + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 1))
# Step 3: Mediation analysis
results <- mediation::mediate(model.m, model.y,
treat = "fac_female",
mediator = "e_anxiety",
boot = TRUE,
sims = 1000)
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.0110069 0.0028527 0.0202437 0.010 **
## ADE -0.0186328 -0.0524821 0.0126291 0.232
## Total Effect -0.0076259 -0.0416317 0.0261877 0.622
## Prop. Mediated -1.4433456 -11.3378671 8.0638307 0.628
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 562
##
##
## Simulations: 1000
plot(results)
Male students only:
# Step 1: Model M ~ X (mediator model)
model.m <- lm(e_anxiety ~ fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 0) %>% drop_na(e_anxiety))
# Step 2: Model Y ~ X + M (outcome model)
model.y <- lm(course_rank ~ fac_female + e_anxiety + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 0))
# Step 3: Mediation analysis
results <- mediation::mediate(model.m, model.y,
treat = "fac_female",
mediator = "e_anxiety",
boot = TRUE,
sims = 1000)
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME -0.00366624 -0.00779264 0.00011921 0.054 .
## ADE 0.00036366 -0.01832029 0.02022600 0.992
## Total Effect -0.00330257 -0.02215784 0.01716742 0.732
## Prop. Mediated 1.11011536 -7.59067974 4.79610806 0.714
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 1167
##
##
## Simulations: 1000
plot(results)
All students, female stu x female fac interaction as treatment:
# Step 1: Model M ~ X (mediator model)
model.m <- lm(e_confidence ~ female_x_fac_female + female + fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% drop_na(e_confidence))
# Step 2: Model Y ~ X + M (outcome model)
model.y <- lm(course_rank ~ female_x_fac_female + e_confidence + female + fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp)
# Step 3: Mediation analysis
results <- mediation::mediate(model.m, model.y,
treat = "female_x_fac_female",
mediator = "e_confidence",
boot = TRUE,
sims = 1000)
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.00433474 -0.00045205 0.01014912 0.076 .
## ADE -0.03054338 -0.06291720 0.00143577 0.060 .
## Total Effect -0.02620863 -0.05923856 0.00592994 0.116
## Prop. Mediated -0.16539376 -2.04360994 1.08352045 0.188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 1735
##
##
## Simulations: 1000
plot(results)
Female students only:
# Step 1: Model M ~ X (mediator model)
model.m <- lm(e_confidence ~ fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 1) %>% drop_na(e_confidence))
# Step 2: Model Y ~ X + M (outcome model)
model.y <- lm(course_rank ~ fac_female + e_confidence + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 1))
# Step 3: Mediation analysis
results <- mediation::mediate(model.m, model.y,
treat = "fac_female",
mediator = "e_confidence",
boot = TRUE,
sims = 1000)
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.0041146 -0.0039791 0.0126946 0.298
## ADE -0.0122971 -0.0443945 0.0174192 0.436
## Total Effect -0.0081825 -0.0420999 0.0225491 0.630
## Prop. Mediated -0.5028480 -4.2266439 3.2607702 0.848
##
## Sample Size Used: 560
##
##
## Simulations: 1000
plot(results)
Male students only:
# Step 1: Model M ~ X (mediator model)
model.m <- lm(e_confidence ~ fac_female + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 0) %>% drop_na(e_confidence))
# Step 2: Model Y ~ X + M (outcome model)
model.y <- lm(course_rank ~ fac_female + e_confidence + reservation_stu + age + miss_age + father_college + mother_college + fac_associate_professor + fac_professor + fac_yearsinhighed + fac_highest_degree_phd + fac_highest_degree_phd_in_prog + fac_degree_college_elite + reservation_fac, data = df_temp %>% filter(female == 0))
# Step 3: Mediation analysis
results <- mediation::mediate(model.m, model.y,
treat = "fac_female",
mediator = "e_confidence",
boot = TRUE,
sims = 1000)
summary(results)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME -0.00099019 -0.00406790 0.00169239 0.462
## ADE -0.00175747 -0.02145989 0.01779428 0.920
## Total Effect -0.00274767 -0.02218984 0.01742164 0.836
## Prop. Mediated 0.36037635 -2.10213813 3.23889059 0.918
##
## Sample Size Used: 1175
##
##
## Simulations: 1000
plot(results)