This code is part of the second assignment of Causal Inference. The analysis includes a sample of 332 institutions who have received the Hispanic Serving Designation as of the year 2017. 137 of them are recipients of an HSI related grant, while 195 have not received any type grant yet, remained eligible.
Description of project
ipeds1<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Final Project/Data/other.csv")
ipeds2<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Final Project/Data/staff.csv")
hsi<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Final Project/Data/ipedsdata2017.csv")
#merging files with various information
ipeds3<-merge(ipeds1,ipeds2, by = "unitid")
ipeds4<-merge(ipeds3,hsi, by = "unitid")
ipeds4<-ipeds4%>%
filter(complete.cases(.))
#removing unused databases
rm(ipeds1, ipeds2)
#names(ipeds4)
There are 137 institutions who have received an HSI grant included in this sample and 195 who are eligible but have not received a grant.
#Summary Statistics for HSI Institutions with Grant (treatment group)
ipedsHSIGrant<-filter(ipeds4, HSI_GRANT==1)
attach(ipedsHSIGrant)
vars<-cbind(HSI_GRANT,PELL_PERC, FT_UG, HISP_UG_TOTAL_FT, STUDENTFACULTYRATIO, TOTAL_ENROLLMENT, HISPGRADRATE, HISPSTAFF, DISABILITIES,WHITEGRADRATE, TYPE)
library(stargazer)
df <- data.frame(vars)
cols <- c('HSI_GRANT', 'PELL_PERC', "FT_UG", "HISP_UG_TOTAL_FT", "STUDENTFACULTYRATIO", "TOTAL_ENROLLMENT", "HISPGRADRATE", "HISPSTAFF", "DISABILITIES", "WHITEGRADRATE", "TYPE")
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
##
## =================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## ---------------------------------------------------------------------------------
## HSI_GRANT 137 1 1 1 1 1 1.000 0.000
## PELL_PERC 137 9 30 37 47 86 39.416 14.614
## FT_UG 137 7 122 184 282 382 198.489 103.391
## HISP_UG_TOTAL_FT 137 5 83 191 279 377 186.226 110.490
## STUDENTFACULTYRATIO 137 6 17 22 26 35 21.380 5.920
## TOTAL_ENROLLMENT 137 389 3,995 9,652 19,000 71,551 13,515.280 12,178.210
## HISPGRADRATE 137 9 20 27 41 77 31.693 15.233
## HISPSTAFF 137 11 99 170 370 5,191 358.686 588.807
## DISABILITIES 137 2 2 2 3 3 2.482 0.502
## WHITEGRADRATE 137 0 24 34 45 84 35.708 16.561
## TYPE 137 2 3 3 4 4 3.204 0.655
## ---------------------------------------------------------------------------------
#Summary Statistics for HSI Institutions with NO Grant (control group)
ipedsHSIGrant2<-filter(ipeds4, HSI_GRANT==0)
attach(ipedsHSIGrant2)
vars2<-cbind(HSI_GRANT,PELL_PERC, FT_UG, HISP_UG_TOTAL_FT, STUDENTFACULTYRATIO, TOTAL_ENROLLMENT, HISPGRADRATE, HISPSTAFF, DISABILITIES,WHITEGRADRATE, TYPE)
library(stargazer)
df <- data.frame(vars2)
cols <- c('HSI_GRANT', 'PELL_PERC', "FT_UG", "HISP_UG_TOTAL_FT", "STUDENTFACULTYRATIO", "TOTAL_ENROLLMENT", "HISPGRADRATE", "HISPSTAFF", "DISABILITIES", "WHITEGRADRATE", "TYPE")
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
##
## ===============================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## -------------------------------------------------------------------------------
## HSI_GRANT 195 0 0 0 0 0 0.000 0.000
## PELL_PERC 195 5 32.5 43 53 93 43.497 16.240
## FT_UG 195 1 80.5 198 298 387 190.585 118.406
## HISP_UG_TOTAL_FT 195 2 112 195 291.5 375 196.236 107.291
## STUDENTFACULTYRATIO 195 6 14 18 23.5 38 19.241 6.363
## TOTAL_ENROLLMENT 195 44 2,004 6,925 12,571 57,032 9,245.405 9,829.791
## HISPGRADRATE 195 0 20.5 30 46.5 88 34.138 19.216
## HISPSTAFF 195 0 46.5 120 228.5 2,887 216.210 346.315
## DISABILITIES 195 2 2 2 3 3 2.390 0.489
## WHITEGRADRATE 195 0 22.5 35 53 100 38.549 20.571
## TYPE 195 1 2 3 3 4 2.908 0.747
## -------------------------------------------------------------------------------
Below is our independent variable in a histogram
#Histogram of Hispanic Graduation Rate
ggplot(ipeds4, aes(x=HISPGRADRATE))+
geom_histogram(color="black", fill="purple", bins = 50) + labs(title="Graduation Rate of Hispanics in HSI Institutions for the Year 2017",x="Graduation Rate", y = "Number of Institutions in 2017")
#Histogram of Total Enrollment
ggplot(ipeds4, aes(x=TOTAL_ENROLLMENT))+
geom_histogram(color="black", fill="blue", alpha = .7, bins = 50) + labs(title="Total Student Enrollment of HSI Institutions in 2017",x="Total Student Enrollment", y = "Number of Institutions")
#Histogram of student Faculty Ratio
ggplot(ipeds4, aes(x=STUDENTFACULTYRATIO))+
geom_histogram(color="black", fill="blue", alpha = .7, bins = 50) + labs(title="Student Faculty Ratio of HSI Institutions in 2017",x="Student Faculty Ratio", y = "Number of Institutions")
#Histogram of Hispanic Graduation Rate by Type of All HSI Institutions
i <- ipeds4
levels(i$TYPE) <- c("2 Year Private", "4 Year Private", "2 Year Public", "4 Year Public")
ggplot (i, aes(x = HISPGRADRATE)) +
geom_histogram(color="black", fill="orange", alpha = .7) + facet_wrap( TYPE ~.) + labs(title= "Hispanic Graduation Rate by Type of HSI Institutions", y="Number of Institutions in 2017", x="Graduation Rate")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Hispanic Graduation Rate by Institutions Recipients of HSI Grant vs. Eligible Institutions
i <- ipeds4
levels(i$HSI_GRANT) <- c("No Award", "Awardee")
ggplot (i, aes(x = HISPGRADRATE)) +
geom_histogram(color="black", fill="green", alpha = .7, bins = 50) + facet_wrap(HSI_GRANT) + labs(title= "Hispanic Graduation Rate by Awardees of HSI Grant vs HSI Grant Eligible", y="Number of Institutions in 2017", x="Graduation Rate") + facet_grid (HSI_GRANT ~. ,margins = TRUE)
#Percentage of Disabilities in Institution more than 3 percent is coded as 1, less than 3 percent is coded as 0
ipeds4$disabilities<-ifelse (ipeds4$DISABILITIES== 'More than 3 percent', 1, 0 )
#Hispanic Graduation Rate, codes as 1 for universities with higher than 46% graduation rate for Hispanics and 0 anything lower than that. Using quantiles, I was able to establish that anything higher than 45 or 75% as "high"
ipeds4$highhispgradrate<-ifelse (ipeds4$HISPGRADRATE>= 46, 1, 0 )
#WhiteGraduationRate, codes as 1 for universities with higher than 50% graduation rate for Whites and 0 anything lower than that. Using quantiles, I was able to establish that anything higher than 50.2 or 75% as "high"
ipeds4$highwhitegradrate<-ifelse (ipeds4$WHITEGRADRATE>= 50.2, 1, 0 )
#HSI Grant already coded prior to upload, 1=grant awarded, 0=eligible without grant awarded
#Hispanic Staff (using quantiles, I selected universities with a Hispanic staff higher than 75%, coded as 1=high)
ipeds4$hispstaffrate<-ipeds4$HISPSTAFF/ipeds4$TOTALSTAFF*100
ipeds4$hispstaffratehigh<-ifelse(ipeds4$hispstaffrate>=27, 1, 0)
ipeds4$hispstaffratelow<-ifelse(ipeds4$hispstaffrate<=26, 1, 0)
#Type of Institution coded by 0,1 depending on the type of college Private vs. Public and 4 year vs. 2 year
ipeds4$pubfouryear<-ifelse(ipeds4$TYPE=='Pub 4yr', 1,0)
ipeds4$privfouryear<-ifelse(ipeds4$TYPE=='Pri 4yr', 1,0)
ipeds4$privtwoyear<-ifelse(ipeds4$TYPE=='Pri 2yr', 1,0)
ipeds4$pubtwoyear<-ifelse(ipeds4$TYPE=='Pub 2yr', 1,0)
#Pell Percentage of Recipients, coded as 1 for universities with higher than 50% pell recipient percentage and 0 anything lower than that. Using quantiles, I was able to establish that anything higher than 50.25 or 75% as "high"
ipeds4$highPellPerc<-ifelse (ipeds4$PELL_PERC>= 50.25, 1, 0 )
#Number of Students per Faculty Member (student faculty ratio) coded as 1 for universities with higher than 25 student faculty ratio and 0 anything lower than that. Using quantiles, I was able to establish that anything higher than 25 or 75% as "high"
ipeds4$highsfratio<-ifelse (ipeds4$STUDENTFACULTYRATIO>=25, 1, 0)
#High Percentage of Hispanic undergraduate students. Coded as 1 for universities with higher than 56.65% of Hispanic undergraduate students and 0 for anything lower than that. Using quantiles, I was able to establish that anything higher than 56.6% or 75% as "high"
ipeds4$highhispftstudentsperc<- ifelse (ipeds4$HISP_UG_TOTAL_PERC_FT>=56.6, 1, 0)
The following test shows that there is a significant difference betweenn institutiosn who received a grant vs. those that have not. On each of the covariates we observe a signficant p-value: hispanic graduation rate which is our outcome, type of institution, high student staff ratio, a high pell recipient percentage, total enrollment and high percentage of whites graduating from HSI institutions
#Institution Recipent of HSI Grant and Hispanic Graduation Rate
t.test(table(ipeds4$HSI_GRANT, ipeds4$HISPGRADRATE))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$HISPGRADRATE)
## t = 13.654, df = 147, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.918562 2.567925
## sample estimates:
## mean of x
## 2.243243
#Institution Recipent of HSI Grant and Type of School
t.test(table(ipeds4$HSI_GRANT, ipeds4$TYPE))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$TYPE)
## t = 3.5368, df = 7, p-value = 0.00951
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 13.75432 69.24568
## sample estimates:
## mean of x
## 41.5
#Institution Recipent of HSI Grant and Spefici Type of School
t.test(table(ipeds4$HSI_GRANT, ipeds4$privfouryear))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$privfouryear)
## t = 2.9415, df = 3, p-value = 0.06043
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -6.797259 172.797259
## sample estimates:
## mean of x
## 83
t.test(table(ipeds4$HSI_GRANT, ipeds4$privtwoyear))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$privtwoyear)
## t = 1.7156, df = 3, p-value = 0.1847
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -70.96266 236.96266
## sample estimates:
## mean of x
## 83
t.test(table(ipeds4$HSI_GRANT, ipeds4$pubfouryear))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$pubfouryear)
## t = 3.2568, df = 3, p-value = 0.04725
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.894433 164.105567
## sample estimates:
## mean of x
## 83
t.test(table(ipeds4$HSI_GRANT, ipeds4$pubtwoyear))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$pubtwoyear)
## t = 9.5522, df = 3, p-value = 0.002434
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 55.34749 110.65251
## sample estimates:
## mean of x
## 83
#Institution Recipent of HSI Grant and Student Faculty Ratio
t.test(table(ipeds4$HSI_GRANT, ipeds4$STUDENTFACULTYRATIO))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$STUDENTFACULTYRATIO)
## t = 10.476, df = 61, p-value = 2.924e-15
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 4.332703 6.376974
## sample estimates:
## mean of x
## 5.354839
#Institution Recipent of HSI Grant and a High Percentage Hispanic Staff
t.test(table(ipeds4$HSI_GRANT, ipeds4$HISPGRADRATE))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$HISPGRADRATE)
## t = 13.654, df = 147, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.918562 2.567925
## sample estimates:
## mean of x
## 2.243243
#Institution Recipent of HSI Grant and Student Pell Recipient Percentage
t.test(table(ipeds4$HSI_GRANT, ipeds4$PELL_PERC))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$PELL_PERC)
## t = 13.9, df = 129, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 2.190324 2.917368
## sample estimates:
## mean of x
## 2.553846
#Institution Recipent of HSI Grant and Student White Graduation Rate
t.test(table(ipeds4$HSI_GRANT, ipeds4$WHITEGRADRATE))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$WHITEGRADRATE)
## t = 15.037, df = 157, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.825257 2.377275
## sample estimates:
## mean of x
## 2.101266
#Institution Recipent of HSI Grant and Percentage of Black Students Enrolled
t.test(table(ipeds4$HSI_GRANT, ipeds4$BLACK_PERC))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$BLACK_PERC)
## t = 19.492, df = 347, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.8577602 1.0502858
## sample estimates:
## mean of x
## 0.954023
#Institution Recipent of HSI Grant and Percentage of Total Minorities
t.test(table(ipeds4$HSI_GRANT, ipeds4$TOTAL_MIN_NOASIAN_PERC))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$TOTAL_MIN_NOASIAN_PERC)
## t = 23.052, df = 507, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.5978427 0.7092439
## sample estimates:
## mean of x
## 0.6535433
#Institution Recipent of HSI Grant and Total Enrollment
t.test(table(ipeds4$HSI_GRANT, ipeds4$TOTAL_ENROLLMENT))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$TOTAL_ENROLLMENT)
## t = 25.52, df = 659, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.4643253 0.5417353
## sample estimates:
## mean of x
## 0.5030303
#Institution Recipent of HSI Grant and high number of students with disabilities
t.test(table(ipeds4$HSI_GRANT, ipeds4$DISABILITIES))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$DISABILITIES)
## t = 2.3275, df = 9, p-value = 0.04493
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.9325485 65.4674515
## sample estimates:
## mean of x
## 33.2
#Institution Recipent of HSI Grant and percentage of hispanic undergraduate full time students enrolled
t.test(table(ipeds4$HSI_GRANT, ipeds4$HISP_UG_TOTAL_PERC_FT))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$HISP_UG_TOTAL_PERC_FT)
## t = 21.876, df = 509, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.5925171 0.7094437
## sample estimates:
## mean of x
## 0.6509804
#Institution Recipent of HSI Grant and percentage of hispanic staff
t.test(table(ipeds4$HSI_GRANT, ipeds4$HISPSTAFF))
##
## One Sample t-test
##
## data: table(ipeds4$HSI_GRANT, ipeds4$HISPSTAFF)
## t = 21.016, df = 489, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.6142052 0.7408968
## sample estimates:
## mean of x
## 0.677551
The following includes a T-Test of significance test and the outcome varialbe which in this case is Hispanic Graduation Rate
#Institution Recipent of HSI Grant and Hispanic Graduation Rate
t.test(table(ipeds4$HISPGRADRATE, ipeds4$HISPGRADRATE))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$HISPGRADRATE)
## t = 6.9894, df = 5475, p-value = 3.09e-12
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.04362316 0.07763323
## sample estimates:
## mean of x
## 0.0606282
#Institution Recipent of HSI Grant and Type of School
t.test(table(ipeds4$HISPGRADRATE, ipeds4$TYPE))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$TYPE)
## t = 10.078, df = 295, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.9025862 1.3406570
## sample estimates:
## mean of x
## 1.121622
#Institution Recipent of HSI Grant and Spefici Type of School
t.test(table(ipeds4$HISPGRADRATE, ipeds4$privfouryear))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$privfouryear)
## t = 9.693, df = 147, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.785884 2.700602
## sample estimates:
## mean of x
## 2.243243
t.test(table(ipeds4$HISPGRADRATE, ipeds4$privtwoyear))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$privtwoyear)
## t = 8.4889, df = 147, p-value = 2.086e-14
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.721013 2.765474
## sample estimates:
## mean of x
## 2.243243
t.test(table(ipeds4$HISPGRADRATE, ipeds4$pubfouryear))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$pubfouryear)
## t = 11.096, df = 147, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.843704 2.642782
## sample estimates:
## mean of x
## 2.243243
t.test(table(ipeds4$HISPGRADRATE, ipeds4$pubtwoyear))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$pubtwoyear)
## t = 10.985, df = 147, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.839681 2.646806
## sample estimates:
## mean of x
## 2.243243
#Institution Recipent of HSI Grant and Student Faculty Ratio
t.test(table(ipeds4$HISPGRADRATE, ipeds4$STUDENTFACULTYRATIO))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$STUDENTFACULTYRATIO)
## t = 17.304, df = 2293, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1283242 0.1611265
## sample estimates:
## mean of x
## 0.1447254
#Institution Recipent of HSI Grant and Percentage Hispanic Staff
t.test(table(ipeds4$HISPGRADRATE, ipeds4$HISPSTAFF))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$HISPSTAFF)
## t = 18.114, df = 18129, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01633062 0.02029376
## sample estimates:
## mean of x
## 0.01831219
#Institution Recipent of HSI Grant and Student Pell Recipient Percentage
t.test(table(ipeds4$HISPGRADRATE, ipeds4$PELL_PERC))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$PELL_PERC)
## t = 17.922, df = 4809, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.06147268 0.07657306
## sample estimates:
## mean of x
## 0.06902287
#Institution Recipent of HSI Grant and Student White Graduation Rate
t.test(table(ipeds4$HISPGRADRATE, ipeds4$WHITEGRADRATE))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$WHITEGRADRATE)
## t = 16.743, df = 5845, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.05014141 0.06344053
## sample estimates:
## mean of x
## 0.05679097
#Institution Recipent of HSI Grant and Percentage of Black Students Enrolled
t.test(table(ipeds4$HISPGRADRATE, ipeds4$BLACK_PERC))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$BLACK_PERC)
## t = 18.181, df = 12875, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.02300448 0.02856433
## sample estimates:
## mean of x
## 0.02578441
#Institution Recipent of HSI Grant and Percentage of Total Minorities
t.test(table(ipeds4$HISPGRADRATE, ipeds4$TOTAL_MIN_NOASIAN_PERC))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$TOTAL_MIN_NOASIAN_PERC)
## t = 18.162, df = 18795, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01575707 0.01956960
## sample estimates:
## mean of x
## 0.01766333
#Institution Recipent of HSI Grant and Total Enrollment
t.test(table(ipeds4$HISPGRADRATE, ipeds4$TOTAL_ENROLLMENT))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$TOTAL_ENROLLMENT)
## t = 18.346, df = 24419, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01214287 0.01504796
## sample estimates:
## mean of x
## 0.01359541
#Institution Recipent of HSI Grant and number of students with disabilities
t.test(table(ipeds4$HISPGRADRATE, ipeds4$DISABILITIES))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$DISABILITIES)
## t = 10.3, df = 369, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.7259958 1.0685988
## sample estimates:
## mean of x
## 0.8972973
#Institution Recipent of HSI Grant and percentage of hispanic undergraduate full time students enrolled
t.test(table(ipeds4$HISPGRADRATE, ipeds4$HISP_UG_TOTAL_PERC_FT))
##
## One Sample t-test
##
## data: table(ipeds4$HISPGRADRATE, ipeds4$HISP_UG_TOTAL_PERC_FT)
## t = 18.161, df = 18869, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01569521 0.01949292
## sample estimates:
## mean of x
## 0.01759406
hsi_ps <- glm(HSI_GRANT ~ highhispgradrate + highwhitegradrate + highPellPerc + highhispftstudentsperc + pubtwoyear+ pubfouryear + disabilities + highsfratio + hispstaffratehigh, family = binomial(), data = ipeds4)
summary(hsi_ps)
##
## Call:
## glm(formula = HSI_GRANT ~ highhispgradrate + highwhitegradrate +
## highPellPerc + highhispftstudentsperc + pubtwoyear + pubfouryear +
## disabilities + highsfratio + hispstaffratehigh, family = binomial(),
## data = ipeds4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6069 -1.0156 -0.6609 1.1874 2.0469
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.85974 0.36619 -2.348 0.01889 *
## highhispgradrate 0.11922 0.36868 0.323 0.74642
## highwhitegradrate -0.67245 0.38042 -1.768 0.07712 .
## highPellPerc -0.55068 0.31368 -1.756 0.07916 .
## highhispftstudentsperc 0.58994 0.39581 1.490 0.13610
## pubtwoyear 0.24754 0.38016 0.651 0.51495
## pubfouryear 0.99796 0.35901 2.780 0.00544 **
## disabilities 0.32674 0.24093 1.356 0.17506
## highsfratio 0.21901 0.28596 0.766 0.44375
## hispstaffratehigh 0.02242 0.37415 0.060 0.95222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 450.07 on 331 degrees of freedom
## Residual deviance: 420.77 on 322 degrees of freedom
## AIC: 440.77
##
## Number of Fisher Scoring iterations: 4
#Calculation of the likelihood of receiving treatment or in this care receiving HSI grant.
prs_df <- data.frame(pr_score = predict(hsi_ps, type = "response"),
grant_recipient= hsi_ps$model$HSI_GRANT)
head(prs_df)
## pr_score grant_recipient
## 1 0.3657449 0
## 2 0.4291149 0
## 3 0.3515578 1
## 4 0.3515578 1
## 5 0.3515578 1
## 6 0.4889933 1
labs <- paste("HSI Grant Status:", c("Recipient", "Eligible"))
prs_df %>%
mutate(Recipient = ifelse(grant_recipient == 1, labs[1], labs[2])) %>%
ggplot(aes(x = pr_score)) +
geom_histogram(color = "white", bins=50) +
facet_wrap(~Recipient) +
xlab("Probability of Receiving HSI Grant") +
theme_bw()
## Executing a matching algorithm
mod_match <- matchit(HSI_GRANT ~ highhispgradrate + highwhitegradrate + highPellPerc + highhispftstudentsperc + pubtwoyear+ pubfouryear + disabilities + highsfratio + hispstaffratehigh, method = "nearest",
data = ipeds4)
#tested optimal method but was unable to produce output
#mod_match2 <- matchit(HSI_GRANT ~ highhispgradrate + highwhitegradrate + highPellPerc + highhispftstudentsperc + pubtwoyear+ pubfouryear + disabilities + highsfratio + hispstaffratehigh, data = ipeds4, method = "optimal", ratio = 2)
#I create a dataframe containing the matched observations. The summary of it shows that there are 137 pairs of treated and control observations.
dta_m <- match.data(mod_match)
dim(dta_m)
## [1] 274 52
library("cobalt")
## Warning: package 'cobalt' was built under R version 3.5.3
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
##
## lalonde
library("MatchIt")
library("ggplot2")
# Checking balance before and after matching:
bal.tab(mod_match, m.threshold = 0.1, un = TRUE)
## Call
## matchit(formula = HSI_GRANT ~ highhispgradrate + highwhitegradrate +
## highPellPerc + highhispftstudentsperc + pubtwoyear + pubfouryear +
## disabilities + highsfratio + hispstaffratehigh, data = ipeds4,
## method = "nearest")
##
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 0.6616 0.1519
## highhispgradrate Binary -0.0593 0.0146 Balanced, <0.1
## highwhitegradrate Binary -0.1274 0.0000 Balanced, <0.1
## highPellPerc Binary -0.0901 0.0511 Balanced, <0.1
## highhispftstudentsperc Binary 0.1212 0.0803 Balanced, <0.1
## pubtwoyear Binary 0.0508 -0.0584 Balanced, <0.1
## pubfouryear Binary 0.1153 0.0584 Balanced, <0.1
## disabilities Binary 0.0920 0.0073 Balanced, <0.1
## highsfratio Binary 0.1233 0.0438 Balanced, <0.1
## hispstaffratehigh Binary 0.0736 0.0584 Balanced, <0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 9
## Not Balanced, >0.1 0
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## highhispftstudentsperc 0.0803 Balanced, <0.1
##
## Sample sizes
## Control Treated
## All 195 137
## Matched 137 137
## Unmatched 58 0
bal.plot(mod_match, var.name = "distance")
bal.plot(mod_match, var.name = "distance", mirror = TRUE, type = "histogram")
##Testing Matching The following test shows that none of the covariates are significant which is essentially what I am looking for otherwise the matching failed.
ecls_cov <- c('highhispgradrate', 'highwhitegradrate' , 'highPellPerc', 'highhispftstudentsperc', 'pubtwoyear', 'pubfouryear', 'disabilities' , 'highsfratio' , 'hispstaffratehigh')
lapply(ecls_cov, function(v) {
t.test(dta_m[, v] ~ dta_m$HSI_GRANT)
})
## [[1]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -0.30705, df = 271.76, p-value = 0.759
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1082004 0.0790033
## sample estimates:
## mean in group 0 mean in group 1
## 0.1824818 0.1970803
##
##
## [[2]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = 0, df = 272, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.09075167 0.09075167
## sample estimates:
## mean in group 0 mean in group 1
## 0.1751825 0.1751825
##
##
## [[3]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -1.1203, df = 268.22, p-value = 0.2636
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.14089345 0.03870367
## sample estimates:
## mean in group 0 mean in group 1
## 0.1459854 0.1970803
##
##
## [[4]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -1.4789, df = 269.92, p-value = 0.1403
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1871812 0.0265973
## sample estimates:
## mean in group 0 mean in group 1
## 0.2408759 0.3211679
##
##
## [[5]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = 0.97225, df = 271.94, p-value = 0.3318
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0598495 0.1766378
## sample estimates:
## mean in group 0 mean in group 1
## 0.5912409 0.5328467
##
##
## [[6]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -1.0465, df = 271.23, p-value = 0.2963
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.16825154 0.05146322
## sample estimates:
## mean in group 0 mean in group 1
## 0.2773723 0.3357664
##
##
## [[7]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -0.1205, df = 272, p-value = 0.9042
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1265530 0.1119544
## sample estimates:
## mean in group 0 mean in group 1
## 0.4744526 0.4817518
##
##
## [[8]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -0.78417, df = 271.57, p-value = 0.4336
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.15374910 0.06615786
## sample estimates:
## mean in group 0 mean in group 1
## 0.2846715 0.3284672
##
##
## [[9]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$HSI_GRANT
## t = -1.0869, df = 270.73, p-value = 0.2781
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.16416810 0.04737978
## sample estimates:
## mean in group 0 mean in group 1
## 0.2408759 0.2992701
glm_treat <- glm(highhispgradrate ~ HSI_GRANT + highhispgradrate + highwhitegradrate + highPellPerc + highhispftstudentsperc + pubtwoyear+ pubfouryear + disabilities + highsfratio + hispstaffratehigh, data = dta_m)
## Warning in model.matrix.default(mt, mf, contrasts): the response appeared
## on the right-hand side and was dropped
## Warning in model.matrix.default(mt, mf, contrasts): problem with term 2 in
## model.matrix: no columns are assigned
summary(glm_treat)
##
## Call:
## glm(formula = highhispgradrate ~ HSI_GRANT + highhispgradrate +
## highwhitegradrate + highPellPerc + highhispftstudentsperc +
## pubtwoyear + pubfouryear + disabilities + highsfratio + hispstaffratehigh,
## data = dta_m)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.77480 -0.14820 -0.03895 0.03295 0.98518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.155301 0.068718 2.260 0.0246 *
## HSI_GRANT 0.006455 0.038634 0.167 0.8674
## highwhitegradrate 0.457773 0.058761 7.790 1.53e-13 ***
## highPellPerc 0.063380 0.058468 1.084 0.2793
## highhispftstudentsperc -0.045953 0.064970 -0.707 0.4800
## pubtwoyear -0.140480 0.067974 -2.067 0.0397 *
## pubfouryear 0.020804 0.064378 0.323 0.7468
## disabilities 0.077538 0.039515 1.962 0.0508 .
## highsfratio -0.047771 0.045358 -1.053 0.2932
## hispstaffratehigh 0.012044 0.061703 0.195 0.8454
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1006892)
##
## Null deviance: 42.131 on 273 degrees of freedom
## Residual deviance: 26.582 on 264 degrees of freedom
## AIC: 160.36
##
## Number of Fisher Scoring iterations: 2
# Estimating treatment effects
with(dta_m, t.test(highhispgradrate~ HSI_GRANT))
##
## Welch Two Sample t-test
##
## data: highhispgradrate by HSI_GRANT
## t = -0.30705, df = 271.76, p-value = 0.759
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1082004 0.0790033
## sample estimates:
## mean in group 0 mean in group 1
## 0.1824818 0.1970803
lm_treat1 <- lm(highhispgradrate~ HSI_GRANT, data = dta_m)
summary(lm_treat1)
##
## Call:
## lm(formula = highhispgradrate ~ HSI_GRANT, data = dta_m)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1971 -0.1971 -0.1825 -0.1825 0.8175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.18248 0.03362 5.428 1.26e-07 ***
## HSI_GRANT 0.01460 0.04754 0.307 0.759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3935 on 272 degrees of freedom
## Multiple R-squared: 0.0003465, Adjusted R-squared: -0.003329
## F-statistic: 0.09428 on 1 and 272 DF, p-value: 0.759
This analysis shows the marginal effects of all HSI institutions as observed in the logistic regression model. We see that the treatment of having an HSI grant as an institution does not impact the graduation rates of Hispanics.