This code identifies the influence of HSI grants on the graduation rates of HSI institutions.
If wanting to download data from google drive:
id1 <- “1ehlTI0MPTCQLyVzyVcReZ3BOntI0_tdN” hsi <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id1))
id2 <- “1kub_6nc8RH2CRaDrW5yzG-8-Vy0Ez47l” gradRates <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id2))
id3 <- “15dwlS8X8BNNQDujLKd22urXqN04p06Dv” finaid <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id3))
id4 <- “1cNDfkcG0l6Zawb3itHmi83YMaAeZYF9q” staff<- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id4))
id5 <- “1-1rBS6xvgFPU-UlADk6zbUNfnTfF473H” part_time <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id5))
id6 <- “12fi8dgndOp_GNjuJzbfolFRpbM3zkSu6” AllStaff <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id6))
id7 <- “1X-HcLq4MDF1Ezr6wN_dUQKYnjxb_O20i” statefund <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id7))
removes or clean data sets from environment rm(id1, id2, id3, id4, id5, id6, id7)
#upload HSI data from Dept. Education Eligibility Matrix
hsi<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/HSI_granteeData2018.csv")
hsi$unitid<-as.character(hsi$unitid)
hsi$institution.name<-NULL
#upload percent of graduation rates and also availability of distanceEdu education
gradRates<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017to2018_gradratesdistance.csv")
gradRates$unitid<-as.character(gradRates$unitid)
gradRates$name<-NULL
gradRates$year<-NULL
gradRates$DISTANCE_EDU<-ifelse(gradRates$distance == "Yes", 1, 0)
#upload Percent of UG students receiving financial aid
finaid<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017PercentUGAwardedFinAid.csv")
finaid$unitid<-as.character( finaid$unitid)
#upload percent of Hispanic instructional staff
staff<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017HispFaculty.csv")
staff$unitid<-as.character(staff$unitid)
staff$HISP_INST_STAFFPERC<-staff$Hisp_InstructionalStaff/staff$total_instructionalstaff*100
staff$S2017_IS.Instructional.staff.category<-NULL
staff$institution.name<-NULL
staff$year<-NULL
#upload percent of students enrolled part-time
part_time<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017PartTime.csv")
part_time$year<-NULL
part_time$unitid<-as.character(part_time$unitid)
part_time$institution.name<-NULL
#upload percent of ALL Hispanic Staff
AllStaff<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017AllStaff.csv")
AllStaff$unitid<-as.character(AllStaff$unitid)
AllStaff$AllSTAFFHISP_PERC<-AllStaff$HISPSTAFF/AllStaff$TOTALSTAFF*100
#add information on state funds
statefund<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/SHEEO2017.csv")
#merging files with various information
merge1<-left_join(hsi, gradRates, by="unitid")
merge2<-left_join(merge1, staff, by="unitid")
merge3<-left_join(merge2, finaid, by="unitid")
merge4<-left_join(merge3,part_time, by="unitid")
merge5<-left_join(merge4,AllStaff, by="unitid")
merge6<<-left_join(merge5,statefund, by='STATE')
## Warning: Column `STATE` joining factors with different levels, coercing to
## character vector
ipeds2<-merge6%>% select (unitid, name,STATE, HSI_GRANT, TYPE, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, AllSTAFFHISP_PERC, HISP_INST_STAFFPERC, TOTAL_ENROLLMENT, StateSupport)
#remove Puerto Rico
#ipeds2<-subset(ipeds2, STATE!="PR")
#filter complete cases
ipeds2<-ipeds2 %>% filter(complete.cases(.))
#removing unused databases
rm(gradRates, hsi, part_time, staff, merge1, merge2, merge3, merge4, merge5,AllStaff, finaid)
#ipeds2<-ipeds2%>% mutate_if(is.integer, as.numeric)
There are 133 institutions who have received an HSI grant included in this sample and 188 who are eligible but have not received a grant (updated as of November, 2019)
#Summary Statistics for HSI Institutions with Grant (treatment group)
ipedsHSIGrant<-filter(ipeds2, HSI_GRANT==1)
attach(ipedsHSIGrant)
vars<-cbind(HSI_GRANT, TYPE, HISP_INST_STAFFPERC, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, HISP_INST_STAFFPERC, AllSTAFFHISP_PERC, TOTAL_ENROLLMENT )
library(stargazer)
df <- data.frame(vars)
cols <- c('HSI_GRANT', 'TYPE', 'HISP_INST_STAFFPERC', 'GRADRATE_HISP', 'GRADRATE_NHWHITE', 'DISTANCE_EDU', 'PERC_PARTTIMEENROLL', 'STU_FINAID','PELL_PERC', 'POST_GRAD_OFFER' , 'HISP_UG_TOTAL_PERC_FT', 'AllSTAFFHISP_PERC', 'HISP_INST_STAFFPERC','TOTAL_ENROLLMENT')
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
##
## =====================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## -------------------------------------------------------------------------------------
## HSI_GRANT 133 1 1 1 1 1 1.000 0.000
## TYPE 133 2 3 3 4 4 3.211 0.640
## HISP_INST_STAFFPERC 133 0.000 8.190 12.537 18.462 59.618 15.381 10.631
## GRADRATE_HISP 133 9 19 27 41 77 31.361 15.254
## GRADRATE_NHWHITE 133 0 25 34 45 84 36.549 15.893
## DISTANCE_EDU 133 0 1 1 1 1 0.977 0.149
## PERC_PARTTIMEENROLL 133 2.360 30.000 60.270 71.730 84.750 52.609 22.902
## STU_FINAID 133 23 47 59 69 99 59.271 17.095
## PELL_PERC 133 9 30 37 46 72 38.218 13.039
## POST_GRAD_OFFER 133 0 0 0 1 1 0.338 0.475
## HISP_UG_TOTAL_PERC_FT 133 19 36.3 47.1 60.7 94 49.409 16.960
## AllSTAFFHISP_PERC 133 5.445 15.056 20.090 27.479 81.078 24.350 14.527
## HISP_INST_STAFFPERC.1 133 0.000 8.190 12.537 18.462 59.618 15.381 10.631
## TOTAL_ENROLLMENT 133 389 4,101 9,831 19,220 71,551 13,819.140 12,230.010
## -------------------------------------------------------------------------------------
#Summary Statistics for HSI Institutions with NO Grant (control group)
ipedsHSIGrant2<-filter(ipeds2, HSI_GRANT==0)
attach(ipedsHSIGrant2)
vars2<-cbind(HSI_GRANT, TYPE, HISP_INST_STAFFPERC, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, HISP_INST_STAFFPERC, AllSTAFFHISP_PERC, TOTAL_ENROLLMENT )
library(stargazer)
df <- data.frame(vars2)
cols <- c('HSI_GRANT', 'TYPE', 'HISP_INST_STAFFPERC', 'GRADRATE_HISP', 'GRADRATE_NHWHITE', 'DISTANCE_EDU', 'PERC_PARTTIMEENROLL', 'STU_FINAID','PELL_PERC', 'POST_GRAD_OFFER' , 'HISP_UG_TOTAL_PERC_FT','AllSTAFFHISP_PERC','HISP_INST_STAFFPERC', 'TOTAL_ENROLLMENT')
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
##
## ====================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## ------------------------------------------------------------------------------------
## HSI_GRANT 188 0 0 0 0 0 0.000 0.000
## TYPE 188 1 2 3 3 4 2.910 0.714
## HISP_INST_STAFFPERC 188 0 6.0 9.4 16.4 80 13.132 13.170
## GRADRATE_HISP 188 0 19.8 30 45.2 88 33.755 19.250
## GRADRATE_NHWHITE 188 0 23 35 53 87 38.420 19.419
## DISTANCE_EDU 188 0 1 1 1 1 0.926 0.263
## PERC_PARTTIMEENROLL 188 0.000 25.912 54.680 69.045 88.220 47.843 25.096
## STU_FINAID 188 21 48 61.5 77 100 62.356 18.582
## PELL_PERC 188 5 31.8 43 52 93 42.537 15.648
## POST_GRAD_OFFER 188 0 0 0 1 1 0.383 0.487
## HISP_UG_TOTAL_PERC_FT 188 25.000 30.050 37.650 49.375 97.700 41.841 15.599
## AllSTAFFHISP_PERC 188 1.852 11.167 16.185 22.797 91.141 19.977 14.889
## HISP_INST_STAFFPERC.1 188 0 6.0 9.4 16.4 80 13.132 13.170
## TOTAL_ENROLLMENT 188 164 2,041.8 7,098 12,728.2 57,032 9,422.750 9,904.428
## ------------------------------------------------------------------------------------
#Summary Statistics for both groups combined
ipedsHSIGrant3<-ipeds2
attach(ipedsHSIGrant3)
vars3<-cbind(HSI_GRANT, TYPE, HISP_INST_STAFFPERC, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, HISP_INST_STAFFPERC, AllSTAFFHISP_PERC, TOTAL_ENROLLMENT )
library(stargazer)
df <- data.frame(vars3)
cols <- c('HSI_GRANT', 'TYPE', 'HISP_INST_STAFFPERC', 'GRADRATE_HISP', 'GRADRATE_NHWHITE', 'DISTANCE_EDU', 'PERC_PARTTIMEENROLL', 'STU_FINAID','PELL_PERC', 'POST_GRAD_OFFER' , 'HISP_UG_TOTAL_PERC_FT', 'AllSTAFFHISP_PERC', 'HISP_INST_STAFFPERC','TOTAL_ENROLLMENT')
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
##
## ======================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## --------------------------------------------------------------------------------------
## HSI_GRANT 321 0 0 0 1 1 0.414 0.493
## TYPE 321 1 3 3 4 4 3.034 0.700
## HISP_INST_STAFFPERC 321 0 6.9 10.5 17.5 80 14.064 12.215
## GRADRATE_HISP 321 0 19 29 44 88 32.763 17.718
## GRADRATE_NHWHITE 321 0 24 35 50 87 37.645 18.039
## DISTANCE_EDU 321 0 1 1 1 1 0.947 0.224
## PERC_PARTTIMEENROLL 321 0.000 28.010 57.520 70.560 88.220 49.818 24.289
## STU_FINAID 321 21 47 60 73 100 61.078 18.018
## PELL_PERC 321 5 31 41 49 93 40.748 14.757
## POST_GRAD_OFFER 321 0 0 0 1 1 0.364 0.482
## HISP_UG_TOTAL_PERC_FT 321 19.200 31.400 40.800 54.900 97.700 44.977 16.577
## AllSTAFFHISP_PERC 321 1.852 12.367 17.836 25.831 91.141 21.789 14.874
## HISP_INST_STAFFPERC.1 321 0 6.9 10.5 17.5 80 14.064 12.215
## TOTAL_ENROLLMENT 321 164 3,270 8,503 15,178 71,551 11,244.310 11,123.380
## --------------------------------------------------------------------------------------
Below are a series of histograms to show the relationship between variables.
#Histogram of Hispanic Graduation Rate
ggplot(ipeds2, aes(x=GRADRATE_HISP))+
geom_histogram(color="gray", fill="blue", bins = 50) + labs(title="Figure 1. Graduation Rate of Hispanics in HSI Institutions for the Year 2017",x="Graduation Rate", y = "Number of Institutions")
#Histogram of Total Enrollment
ggplot(ipeds2, aes(x=PELL_PERC))+
geom_histogram(color="gray", fill="darkgreen", alpha = .7, bins = 50) + labs(title="Percent of Students Receiving Pell Grants at HSI Institutions in 2017",x="Percent of Students with Pell Grants", y = "Number of Institutions")
#Histogram of instructional Hispanic staff
ggplot(ipeds2, aes(x=HISP_INST_STAFFPERC))+
geom_histogram(color="gray", fill="darkblue", alpha = .7, bins = 50) + labs(title="Percent of Hispanic Instructional Staff at HSI Institutions in 2017",x="Percent of Hispanic Instructional Staff", y = "Number of Institutions")
#Histogram of Hispanic Graduation Rate by Type of All HSI Institutions
i <- ipeds2
levels(i$TYPE) <- c("Pri 2yr", "Pri 4yr", "Pub 2yr", "Pub 4yr" )
ggplot (i, aes(x = GRADRATE_HISP)) +
geom_histogram(color="black", fill="#66CC00", alpha = .7) + facet_wrap( TYPE ~.) + labs(title= "Figure 2. Hispanic Graduation Rate by Type of HSI Institutions", y="Number of Institutions in 2017", x="Graduation Rate")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Hispanic Graduation Rate by Institutions Recipients of HSI Grant vs. Eligible Institutions
library(stringr)
levels(i$HSI_GRANT) <- c("0", "1")
ggplot (i, aes(x = GRADRATE_HISP)) +
geom_histogram(color="black", fill="#228B22", alpha = .7, bins = 50) + facet_wrap(HSI_GRANT~.) +labs(title= "Figure 1. Hispanic Graduation Rate by Awardees of HSI Grant vs HSI Grant Eligible", y="Number of Institutions in 2018", x="Graduation Rate")
#Separe type of institution
#Private Institutions
ipeds2$privtwoyear<-as.numeric(ifelse(ipeds2$TYPE=="Pri 2yr", 1,0))
ipeds2$privfouryear<-as.numeric(ifelse(ipeds2$TYPE=="Pri 4yr", 1,0))
#Public Institutions
ipeds2$pubtwoyear<-as.numeric(ifelse(ipeds2$TYPE=="Pub 2yr", 1,0))
ipeds2$pubfouryear<-as.numeric(ifelse(ipeds2$TYPE=="Pub 4yr", 1,0))
The following test shows that there is a significant difference betweenn institutions who received a grant vs. those that have not.
#make copy of df
ipeds4<-ipeds2
#Institution Recipent of HSI Grant and State Support
t.test(ipeds4$HSI_GRANT, ipeds4$StateSupport)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$StateSupport
## t = -28.394, df = 320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8375591812 -7290107578
## sample estimates:
## mean of x mean of y
## 4.143302e-01 7.832850e+09
#Institution Recipent of HSI Grant and percent of students receiving financial aid
t.test(ipeds4$HSI_GRANT, ipeds4$STU_FINAID)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$STU_FINAID
## t = -60.299, df = 320.48, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -62.64283 -58.68427
## sample estimates:
## mean of x mean of y
## 0.4143302 61.0778816
#Institution Recipent of HSI Grant and Hispanic Graduation Rate
t.test(ipeds4$HSI_GRANT, ipeds4$GRADRATE_HISP)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$GRADRATE_HISP
## t = -32.699, df = 320.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -34.29524 -30.40258
## sample estimates:
## mean of x mean of y
## 0.4143302 32.7632399
#Institution Recipent of HSI Grant and Spefici Type of School
t.test(ipeds4$HSI_GRANT, ipeds4$privfouryear)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$privfouryear
## t = 5.4619, df = 621.06, p-value = 6.822e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1256970 0.2668264
## sample estimates:
## mean of x mean of y
## 0.4143302 0.2180685
t.test(ipeds4$HSI_GRANT, ipeds4$privtwoyear)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$privtwoyear
## t = 14.838, df = 328.19, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.3566971 0.4657328
## sample estimates:
## mean of x mean of y
## 0.414330218 0.003115265
t.test(ipeds4$HSI_GRANT, ipeds4$pubfouryear)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$pubfouryear
## t = 4.2278, df = 631.31, p-value = 2.709e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.08341353 0.22811295
## sample estimates:
## mean of x mean of y
## 0.4143302 0.2585670
t.test(ipeds4$HSI_GRANT, ipeds4$pubtwoyear)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$pubtwoyear
## t = -2.7006, df = 639.87, p-value = 0.007105
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.18293634 -0.02890167
## sample estimates:
## mean of x mean of y
## 0.4143302 0.5202492
#Institution Recipent of HSI Grant and distanceEdu education program
t.test(ipeds4$HSI_GRANT, ipeds4$DISTANCE_EDU)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$DISTANCE_EDU
## t = -17.61, df = 446.86, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.5921598 -0.4732608
## sample estimates:
## mean of x mean of y
## 0.4143302 0.9470405
#Institution Recipent of HSI Grant and a High Percentage Hispanic Staff
t.test(ipeds4$HSI_GRANT, ipeds4$GRADRATE_HISP)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$GRADRATE_HISP
## t = -32.699, df = 320.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -34.29524 -30.40258
## sample estimates:
## mean of x mean of y
## 0.4143302 32.7632399
#Institution Recipent of HSI Grant and Student Pell Recipient Percentage
t.test(ipeds4$HSI_GRANT, ipeds4$PELL_PERC)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$PELL_PERC
## t = -48.942, df = 320.72, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -41.95467 -38.71200
## sample estimates:
## mean of x mean of y
## 0.4143302 40.7476636
#Institution Recipent of HSI Grant and Student White Graduation Rate
t.test(ipeds4$HSI_GRANT, ipeds4$GRADRATE_NHWHITE)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$GRADRATE_NHWHITE
## t = -36.964, df = 320.48, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -39.21212 -35.24894
## sample estimates:
## mean of x mean of y
## 0.4143302 37.6448598
#Institution Recipent of HSI Grant and institution offering post-baccalareaute programs
t.test(ipeds4$HSI_GRANT, ipeds4$POST_GRAD_OFFER)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$POST_GRAD_OFFER
## t = 1.2947, df = 639.65, p-value = 0.1959
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02575556 0.12544404
## sample estimates:
## mean of x mean of y
## 0.4143302 0.3644860
#Institution Recipent of HSI Grant and and percentage of hispanic staff
t.test(ipeds4$HSI_GRANT, ipeds4$HISP_INST_STAFFPERC)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$HISP_INST_STAFFPERC
## t = -20.004, df = 321.04, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.99182 -12.30701
## sample estimates:
## mean of x mean of y
## 0.4143302 14.0637418
#Institution Recipent of HSI Grant and and percentage of hispanic staff
t.test(ipeds4$HSI_GRANT, ipeds4$AllSTAFFHISP_PERC)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$AllSTAFFHISP_PERC
## t = -25.732, df = 320.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -23.00868 -19.74019
## sample estimates:
## mean of x mean of y
## 0.4143302 21.7887673
#Institution Recipent of HSI Grant and Total Enrollment of Hispanic Students
t.test(ipeds4$HSI_GRANT, ipeds4$HISP_UG_TOTAL_PERC_FT)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$HISP_UG_TOTAL_PERC_FT
## t = -48.142, df = 320.57, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -46.38341 -42.74120
## sample estimates:
## mean of x mean of y
## 0.4143302 44.9766355
#Institution Recipent of HSI Grant and percentage of students enrolled part-time
t.test(ipeds4$HSI_GRANT, ipeds4$PERC_PARTTIMEENROLL)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$PERC_PARTTIMEENROLL
## t = -36.435, df = 320.26, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -52.07132 -46.73597
## sample estimates:
## mean of x mean of y
## 0.4143302 49.8179751
#Institution Recipent of HSI Grant and total enrollment
t.test(ipeds4$HSI_GRANT, ipeds4$TOTAL_ENROLLMENT)
##
## Welch Two Sample t-test
##
## data: ipeds4$HSI_GRANT and ipeds4$TOTAL_ENROLLMENT
## t = -18.111, df = 320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -12465.35 -10022.44
## sample estimates:
## mean of x mean of y
## 4.143302e-01 1.124431e+04
The following includes a T-Test of significance test and the outcome varialbe which in this case is Hispanic Graduation Rate
#Institution Recipent of HSI Grant and Speficic Type of School
t.test(ipeds4$GRADRATE_HISP, ipeds4$privfouryear)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$privfouryear
## t = 32.901, df = 320.35, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 30.59907 34.49128
## sample estimates:
## mean of x mean of y
## 32.7632399 0.2180685
t.test(ipeds4$GRADRATE_HISP, ipeds4$privtwoyear)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$privtwoyear
## t = 33.127, df = 320.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 30.81453 34.70572
## sample estimates:
## mean of x mean of y
## 32.763239875 0.003115265
t.test(ipeds4$GRADRATE_HISP, ipeds4$pubfouryear)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$pubfouryear
## t = 32.859, df = 320.39, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 30.55850 34.45084
## sample estimates:
## mean of x mean of y
## 32.763240 0.258567
t.test(ipeds4$GRADRATE_HISP, ipeds4$pubtwoyear)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$pubtwoyear
## t = 32.592, df = 320.51, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 30.29664 34.18934
## sample estimates:
## mean of x mean of y
## 32.7632399 0.5202492
#Institution Recipent of HSI Grant and distanceEdu Learning Programs
t.test(ipeds4$GRADRATE_HISP, ipeds4$DISTANCE_EDU)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$DISTANCE_EDU
## t = 32.17, df = 320.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 29.87046 33.76194
## sample estimates:
## mean of x mean of y
## 32.7632399 0.9470405
#Institution Recipent of HSI Grant and Percentage Hispanic Staff
t.test(ipeds4$GRADRATE_HISP, ipeds4$HISPSTAFF)
##
## One Sample t-test
##
## data: ipeds4$GRADRATE_HISP
## t = 33.131, df = 320, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 30.81766 34.70882
## sample estimates:
## mean of x
## 32.76324
#Institution Recipent of HSI Grant and Percentage Hispanic Staff
t.test(ipeds4$GRADRATE_HISP, ipeds4$AllSTAFFHISP_PERC)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$AllSTAFFHISP_PERC
## t = 8.4995, df = 621.37, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 8.438834 13.510112
## sample estimates:
## mean of x mean of y
## 32.76324 21.78877
#Institution Recipent of HSI Grant and Student Pell Recipient Percentage
t.test(ipeds4$GRADRATE_HISP, ipeds4$PELL_PERC)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$PELL_PERC
## t = -6.204, df = 619.73, p-value = 1.007e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.511806 -5.457041
## sample estimates:
## mean of x mean of y
## 32.76324 40.74766
#Institution Recipent of HSI Grant and Student White Graduation Rate
t.test(ipeds4$GRADRATE_HISP, ipeds4$GRADRATE_NHWHITE)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$GRADRATE_NHWHITE
## t = -3.459, df = 639.79, p-value = 0.0005781
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.652886 -2.110354
## sample estimates:
## mean of x mean of y
## 32.76324 37.64486
#Institution Recipent of HSI Grant and the institutions offering programs post-bacc
t.test(ipeds4$GRADRATE_HISP, ipeds4$POST_GRAD_OFFER)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$POST_GRAD_OFFER
## t = 32.75, df = 320.47, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 30.45246 34.34505
## sample estimates:
## mean of x mean of y
## 32.763240 0.364486
#Institution Recipent of HSI Grant and Total Enrollment of Hispanic Students
t.test(ipeds4$GRADRATE_HISP, ipeds4$HISP_UG_TOTAL_PERC_FT)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$HISP_UG_TOTAL_PERC_FT
## t = -9.0185, df = 637.19, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.872734 -9.554057
## sample estimates:
## mean of x mean of y
## 32.76324 44.97664
#Institution Recipent of HSI Grant and percentage of students enrolled part-time
t.test(ipeds4$GRADRATE_HISP, ipeds4$PERC_PARTTIMEENROLL)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$PERC_PARTTIMEENROLL
## t = -10.164, df = 585.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.35041 -13.75906
## sample estimates:
## mean of x mean of y
## 32.76324 49.81798
#Institution Recipent of HSI Grant and total enrollment
t.test(ipeds4$GRADRATE_HISP, ipeds4$TOTAL_ENROLLMENT)
##
## Welch Two Sample t-test
##
## data: ipeds4$GRADRATE_HISP and ipeds4$TOTAL_ENROLLMENT
## t = -18.058, df = 320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -12433.003 -9990.087
## sample estimates:
## mean of x mean of y
## 32.76324 11244.30841
Please note that two of the variables are not scaled but rather, I use a log function for total enrollment of undergraduate students and also for total enrollment of hispanic students.
#DEFINITIONS OF VARIABLES
#Type of institutions remains as categorical: Pri2yr, Pri4yr, Pubyr, Pub4yr
#Post baccalaurate offerings remains categorical as 0=no and 1=yes
#HSI grant remains categorical as 0=no and 1=yes if institution is currently recipient or not
#distanceEdu Education PROGRAMS offerings remains categorical as 0=no and 1=yes
#Scale Percent of Students Receiving Pell Grant
ipeds4$STU_FINAIDz<-as.numeric(scale(ipeds4$STU_FINAID, scale=T, center=T))
#Scale Percent of Students Receiving Pell Grant
ipeds4$PELL_PERCz<-as.numeric(scale(ipeds4$PELL_PERC, scale=T, center=T))
#Scale Percent of Hispanic Students Enrolled in Undergraduate Programs
ipeds4$HISP_UG_TOTAL_PERC_FTz<-as.numeric(log(ipeds4$HISP_UG_TOTAL_PERC_FT, base = exp(1)))
#Scale Percent of Hispanic Faculty Staff
ipeds4$HISP_INST_STAFFPERCz<-as.numeric(scale(ipeds4$HISP_INST_STAFFPERC, scale=T, center=T))
#Scale Percent of Graduation Rate of Hispanic Students
ipeds4$GRADRATE_HISPz<-as.numeric(scale(ipeds4$GRADRATE_HISP, scale=T, center=T))
#Scale Percent of Graduation Rate of Non-Hispanic White Students
ipeds4$GRADRATE_NHWHITEz<-as.numeric(scale(ipeds4$GRADRATE_NHWHITE, scale=T, center=T))
#Scale Percent of Part-Time Students Enrolled
ipeds4$PERC_PARTTIMEENROLLz<-as.numeric(scale(ipeds4$PERC_PARTTIMEENROLL, scale=T, center=T))
#Scale Percent of Part-Time Students Enrolled
ipeds4$TOTAL_ENROLLMENTz<-as.numeric(log(ipeds4$TOTAL_ENROLLMENT, base = exp(1)))
#Scale State Support
ipeds4$StateSupportz<-as.numeric(scale(ipeds4$StateSupport, scale=T, center=T))
Considering that there are so few 2 year institutions, I decided to removed them.
ipeds4<-subset(ipeds4, TYPE!="Pri 2yr")
hsi_ps <- glm(HSI_GRANT~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + AllSTAFFHISP_PERC + TOTAL_ENROLLMENTz + StateSupportz, family = binomial(), data = ipeds4)
summary(hsi_ps)
##
## Call:
## glm(formula = HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz +
## PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear +
## STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER +
## AllSTAFFHISP_PERC + TOTAL_ENROLLMENTz + StateSupportz, family = binomial(),
## data = ipeds4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6661 -1.0239 -0.6543 1.1431 2.0051
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.39664 1.36825 -3.213 0.00131 **
## GRADRATE_HISPz -0.09285 0.24342 -0.381 0.70289
## GRADRATE_NHWHITEz -0.08776 0.23491 -0.374 0.70872
## PELL_PERCz -0.44850 0.20308 -2.209 0.02721 *
## PERC_PARTTIMEENROLLz -0.23184 0.26026 -0.891 0.37304
## pubtwoyear 0.42881 0.60232 0.712 0.47651
## pubfouryear 0.92368 0.49853 1.853 0.06391 .
## STU_FINAIDz 0.27026 0.21077 1.282 0.19976
## HISP_INST_STAFFPERCz -0.42367 0.27789 -1.525 0.12736
## DISTANCE_EDU 0.62314 0.71397 0.873 0.38279
## POST_GRAD_OFFER -0.33620 0.52035 -0.646 0.51821
## AllSTAFFHISP_PERC 0.04839 0.01810 2.673 0.00751 **
## TOTAL_ENROLLMENTz 0.22836 0.14624 1.562 0.11838
## StateSupportz 0.11167 0.15169 0.736 0.46164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 434.46 on 319 degrees of freedom
## Residual deviance: 397.32 on 306 degrees of freedom
## AIC: 425.32
##
## Number of Fisher Scoring iterations: 4
#Calculation of the likelihood of receiving treatment or in this care receiving HSI grant.
prs_df <- data.frame(pr_score = predict(hsi_ps, type = "response"),
grant_recipient= hsi_ps$model$HSI_GRANT)
head(prs_df)
## pr_score grant_recipient
## 1 0.04827105 0
## 2 0.27771679 0
## 3 0.50940258 0
## 4 0.36462495 0
## 5 0.48040860 0
## 6 0.22770268 0
exp(coef(hsi_ps))
## (Intercept) GRADRATE_HISPz GRADRATE_NHWHITEz
## 0.01231871 0.91133441 0.91598457
## PELL_PERCz PERC_PARTTIMEENROLLz pubtwoyear
## 0.63858278 0.79307671 1.53542471
## pubfouryear STU_FINAIDz HISP_INST_STAFFPERCz
## 2.51854548 1.31030400 0.65463893
## DISTANCE_EDU POST_GRAD_OFFER AllSTAFFHISP_PERC
## 1.86477231 0.71447787 1.04957599
## TOTAL_ENROLLMENTz StateSupportz
## 1.25654059 1.11813876
labs <- paste("HSI Grant Status:", c("Recipient", "Eligible"))
prs_df %>%
mutate(Recipient = ifelse(grant_recipient == 1, labs[1], labs[2])) %>%
ggplot(aes(x = pr_score)) +
geom_histogram(color = "white", bins=50) +
facet_wrap(~Recipient) +
xlab("Probability of Receiving HSI Grant") +
theme_bw()
## Step 4. Execute Matching Algorith
#install.packages("optmatch")
library(optmatch)
## Warning: package 'optmatch' was built under R version 3.6.1
## The optmatch package has an academic license. Enter relaxinfo() for more information.
library(rgenoud)
## Warning: package 'rgenoud' was built under R version 3.6.1
## ## rgenoud (Version 5.8-3.0, Build Date: 2019-01-22)
## ## See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## ## Please cite software as:
## ## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ## ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## ## Journal of Statistical Software, 42(11): 1-26.
## ##
library(cem)
## Warning: package 'cem' was built under R version 3.6.1
## Loading required package: tcltk
## Loading required package: lattice
##
## How to use CEM? Type vignette("cem")
##
## Attaching package: 'cem'
## The following object is masked from 'package:optmatch':
##
## pair
library(cobalt)
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
##
## lalonde
#ipeds4$HISP_UG_TOTAL_PERC_FTz2<-log(ipeds4$HISP_UG_TOTAL_PERC_FTz)
mod_match2 <- matchit(HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz +StateSupportz, method = "nearest",data = ipeds4)
#mod_match3 <- matchit(HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + HISP_UG_TOTAL_PERC_FTz +TOTAL_ENROLLMENTz+ HISP_UG_TOTAL_PERC_FTz , method = "genetic",data = ipeds4)
#mod_match4 <- matchit(HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + HISP_UG_TOTAL_PERC_FTz + TOTAL_ENROLLMENTz+ HISP_UG_TOTAL_PERC_FTz , method = "cem",data = ipeds4)
dta_m2 <- match.data(mod_match2)
dim(dta_m2)
## [1] 266 32
# Checking balance before and after matching:
bal.tab(mod_match2, m.threshold = 0.1, un = TRUE)
## Call
## matchit(formula = HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz +
## PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear +
## STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER +
## TOTAL_ENROLLMENTz + StateSupportz, data = ipeds4, method = "nearest")
##
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 0.6640 0.2054
## GRADRATE_HISPz Contin. -0.1387 0.0015 Balanced, <0.1
## GRADRATE_NHWHITEz Contin. -0.1014 0.0142 Balanced, <0.1
## PELL_PERCz Contin. -0.3249 -0.0288 Balanced, <0.1
## PERC_PARTTIMEENROLLz Contin. 0.1969 -0.0018 Balanced, <0.1
## pubtwoyear Binary 0.0462 -0.0376 Balanced, <0.1
## pubfouryear Binary 0.1223 0.0526 Balanced, <0.1
## STU_FINAIDz Contin. -0.1778 0.0501 Balanced, <0.1
## HISP_INST_STAFFPERCz Contin. 0.2123 0.1161 Not Balanced, >0.1
## DISTANCE_EDU Binary 0.0470 0.0150 Balanced, <0.1
## POST_GRAD_OFFER Binary -0.0467 0.0226 Balanced, <0.1
## TOTAL_ENROLLMENTz Contin. 0.5168 0.1061 Not Balanced, >0.1
## StateSupportz Contin. 0.2345 0.1660 Not Balanced, >0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 9
## Not Balanced, >0.1 3
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## StateSupportz 0.166 Not Balanced, >0.1
##
## Sample sizes
## Control Treated
## All 187 133
## Matched 133 133
## Unmatched 54 0
bal.plot(mod_match2, var.name = "distance")
bal.plot(mod_match2, var.name = "distance", mirror = TRUE, type = "histogram")
#GENETIC METHOD (not included)
#I create a dataframe containing the matched observations. The summary of it shows that there are 133 pairs of treated and control observations.
#dta_m3 <- match.data(mod_match3)
#dim(dta_m3)
# Checking balance before and after matching:
#bal.tab(mod_match3, m.threshold = 0.1, un = TRUE)
#bal.plot(mod_match3, var.name = "distance")
#bal.plot(mod_match3, var.name = "distance", mirror = TRUE, type = "histogram")
#EXACT METHOD (not included)
#I create a dataframe containing the matched observations. The summary of it shows that there are 137 pairs of treated and control observations.
#dta_m4 <- match.data(mod_match4)
#dim(dta_m4)
# Checking balance before and after matching:
#bal.tab(mod_match4, m.threshold = 0.1, un = TRUE)
#bal.plot(mod_match4, var.name = "distance")
#bal.plot(mod_match4, var.name = "distance", mirror = TRUE, type = "histogram")
The following t-tests need to show a NON significant pvalue, otherwise the matching failed. Please note the following code runs all covariates through a loop and will number each accordingly.
ecls_cov <- c('GRADRATE_HISPz' , 'GRADRATE_NHWHITEz' , 'PELL_PERCz' , 'PERC_PARTTIMEENROLLz', 'HISP_INST_STAFFPERCz', 'DISTANCE_EDU', 'pubtwoyear', 'pubfouryear' , 'POST_GRAD_OFFER', 'TOTAL_ENROLLMENTz', 'STU_FINAIDz', "StateSupportz" )
lapply(ecls_cov, function(v) {
t.test(dta_m2[, v] ~ dta_m2$HSI_GRANT)
})
## [[1]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.011144, df = 258.49, p-value = 0.9911
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2262382 0.2236920
## sample estimates:
## mean in group 0 mean in group 1
## -0.08042182 -0.07914873
##
##
## [[2]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.10629, df = 257.69, p-value = 0.9154
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2441588 0.2191503
## sample estimates:
## mean in group 0 mean in group 1
## -0.07326089 -0.06075664
##
##
## [[3]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = 0.23329, df = 263.94, p-value = 0.8157
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1895384 0.2404897
## sample estimates:
## mean in group 0 mean in group 1
## -0.1459445 -0.1714201
##
##
## [[4]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = 0.015018, df = 263.97, p-value = 0.988
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2247455 0.2282002
## sample estimates:
## mean in group 0 mean in group 1
## 0.1166513 0.1149239
##
##
## [[5]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.84515, df = 253.59, p-value = 0.3988
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3363786 0.1343613
## sample estimates:
## mean in group 0 mean in group 1
## 0.006827784 0.107836452
##
##
## [[6]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.71598, df = 249.31, p-value = 0.4747
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05640340 0.02632821
## sample estimates:
## mean in group 0 mean in group 1
## 0.9624060 0.9774436
##
##
## [[7]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = 0.61695, df = 263.97, p-value = 0.5378
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08238765 0.15757562
## sample estimates:
## mean in group 0 mean in group 1
## 0.5864662 0.5488722
##
##
## [[8]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.93064, df = 263.37, p-value = 0.3529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.16398714 0.05872398
## sample estimates:
## mean in group 0 mean in group 1
## 0.2781955 0.3308271
##
##
## [[9]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.39072, df = 263.92, p-value = 0.6963
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.13622780 0.09111502
## sample estimates:
## mean in group 0 mean in group 1
## 0.3157895 0.3383459
##
##
## [[10]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.88745, df = 263.28, p-value = 0.3756
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3519283 0.1332527
## sample estimates:
## mean in group 0 mean in group 1
## 8.994589 9.103926
##
##
## [[11]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.41252, df = 263.92, p-value = 0.6803
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2746337 0.1794905
## sample estimates:
## mean in group 0 mean in group 1
## -0.1478717 -0.1003002
##
##
## [[12]]
##
## Welch Two Sample t-test
##
## data: dta_m2[, v] by dta_m2$HSI_GRANT
## t = -1.3662, df = 263.91, p-value = 0.173
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.40498501 0.07320078
## sample estimates:
## mean in group 0 mean in group 1
## -0.0252103 0.1406818
#This code can be used to identify the names institutions excluded from the matching dataset (dta_m2)
#ipeds4$included_a1 <- TRUE
#dta_m2$included_a2 <- TRUE
#res <- merge(ipeds4, dta_m2, all=TRUE)
Note, this includes an interaction term betweeen graduation rates of non-hispanic white students and the percent of students receiving financial aid.
glm_treat <- glm(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz + (GRADRATE_NHWHITEz*STU_FINAIDz), data = dta_m2)
summary(glm_treat)
##
## Call:
## glm(formula = GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz +
## PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz +
## DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz +
## StateSupportz + (GRADRATE_NHWHITEz * STU_FINAIDz), data = dta_m2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.81746 -0.23307 -0.04086 0.20902 1.64475
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.56922 0.31367 1.815 0.070752 .
## HSI_GRANT -0.02986 0.05372 -0.556 0.578821
## GRADRATE_NHWHITEz 0.55883 0.04348 12.854 < 2e-16 ***
## PERC_PARTTIMEENROLLz -0.22379 0.06128 -3.652 0.000317 ***
## TYPEPub 2yr -0.13878 0.13574 -1.022 0.307591
## TYPEPub 4yr -0.07706 0.11896 -0.648 0.517698
## PELL_PERCz 0.01769 0.04838 0.366 0.714852
## STU_FINAIDz -0.05775 0.04775 -1.209 0.227622
## DISTANCE_EDU -0.19891 0.16559 -1.201 0.230799
## POST_GRAD_OFFER 0.24249 0.11830 2.050 0.041422 *
## TOTAL_ENROLLMENTz -0.04409 0.03391 -1.300 0.194684
## HISP_INST_STAFFPERCz 0.04609 0.02933 1.571 0.117414
## StateSupportz 0.11109 0.03481 3.191 0.001595 **
## GRADRATE_NHWHITEz:STU_FINAIDz 0.11262 0.03013 3.738 0.000230 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1887539)
##
## Null deviance: 229.131 on 265 degrees of freedom
## Residual deviance: 47.566 on 252 degrees of freedom
## AIC: 326.99
##
## Number of Fisher Scoring iterations: 2
# Estimating treatment effects
with(dta_m2, t.test(GRADRATE_HISPz~ HSI_GRANT))
##
## Welch Two Sample t-test
##
## data: GRADRATE_HISPz by HSI_GRANT
## t = -0.011144, df = 258.49, p-value = 0.9911
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2262382 0.2236920
## sample estimates:
## mean in group 0 mean in group 1
## -0.08042182 -0.07914873
lm_treat1 <- lm(GRADRATE_HISPz~ HSI_GRANT, data = dta_m2)
summary(lm_treat1)
##
## Call:
## lm(formula = GRADRATE_HISPz ~ HSI_GRANT, data = dta_m2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7688 -0.6977 -0.2461 0.5440 3.1980
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.080422 0.080782 -0.996 0.320
## HSI_GRANT 0.001273 0.114243 0.011 0.991
##
## Residual standard error: 0.9316 on 264 degrees of freedom
## Multiple R-squared: 4.704e-07, Adjusted R-squared: -0.003787
## F-statistic: 0.0001242 on 1 and 264 DF, p-value: 0.9911
exp(coef(glm_treat))
## (Intercept) HSI_GRANT
## 1.7668961 0.9705803
## GRADRATE_NHWHITEz PERC_PARTTIMEENROLLz
## 1.7486338 0.7994849
## TYPEPub 2yr TYPEPub 4yr
## 0.8704223 0.9258322
## PELL_PERCz STU_FINAIDz
## 1.0178515 0.9438851
## DISTANCE_EDU POST_GRAD_OFFER
## 0.8196253 1.2744168
## TOTAL_ENROLLMENTz HISP_INST_STAFFPERCz
## 0.9568698 1.0471648
## StateSupportz GRADRATE_NHWHITEz:STU_FINAIDz
## 1.1174938 1.1192112
I generate the treatment effects with the original dataset that contains all institutions for comparison.
#add a column with a number for all matched institutions
dta_m2$matched<- 1
#subset only the column of the identifier and the new "included" column
comparison_data<-select(dta_m2, unitid, matched)
#bring in the original data set "ipeds4" and combine with the "comparison_data" data frame
comparison_data2 <- left_join(ipeds4, comparison_data, by="unitid")
#replace NA for 0
comparison_data2$matched[is.na(comparison_data2$matched)] <- 0
#remove unused dataset
rm(comparison_data)
glm_compare<-glm(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz+ (GRADRATE_NHWHITEz*STU_FINAIDz) + matched, data =comparison_data2)
summary(glm_compare)
##
## Call:
## glm(formula = GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz +
## PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz +
## DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz +
## StateSupportz + (GRADRATE_NHWHITEz * STU_FINAIDz) + matched,
## data = comparison_data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5829 -0.2582 -0.0255 0.2553 2.5607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.27974 0.30636 0.913 0.36191
## HSI_GRANT -0.02563 0.06890 -0.372 0.71012
## GRADRATE_NHWHITEz 0.58072 0.04599 12.626 < 2e-16 ***
## PERC_PARTTIMEENROLLz -0.19969 0.06162 -3.241 0.00132 **
## TYPEPub 2yr -0.10339 0.15137 -0.683 0.49513
## TYPEPub 4yr -0.10903 0.13051 -0.835 0.40416
## PELL_PERCz 0.10555 0.05008 2.108 0.03587 *
## STU_FINAIDz -0.11393 0.05335 -2.135 0.03352 *
## DISTANCE_EDU -0.14788 0.15573 -0.950 0.34307
## POST_GRAD_OFFER 0.29280 0.12106 2.419 0.01616 *
## TOTAL_ENROLLMENTz -0.02407 0.03763 -0.640 0.52290
## HISP_INST_STAFFPERCz 0.09799 0.03416 2.868 0.00441 **
## StateSupportz 0.10153 0.03753 2.705 0.00721 **
## matched 0.05916 0.10979 0.539 0.59035
## GRADRATE_NHWHITEz:STU_FINAIDz 0.01714 0.03324 0.516 0.60651
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3109252)
##
## Null deviance: 310.943 on 319 degrees of freedom
## Residual deviance: 94.832 on 305 degrees of freedom
## AIC: 550.93
##
## Number of Fisher Scoring iterations: 2
# Estimating treatment effects with original dataset
with(comparison_data2, t.test(GRADRATE_HISPz~ HSI_GRANT))
##
## Welch Two Sample t-test
##
## data: GRADRATE_HISPz by HSI_GRANT
## t = 1.1051, df = 312.96, p-value = 0.27
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.09315982 0.33190720
## sample estimates:
## mean in group 0 mean in group 1
## 0.04022496 -0.07914873
lm_treat2 <- lm(GRADRATE_HISPz~ HSI_GRANT, data = comparison_data2)
summary(lm_treat2)
##
## Call:
## lm(formula = GRADRATE_HISPz ~ HSI_GRANT, data = comparison_data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8894 -0.7042 -0.2461 0.5956 3.0774
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.04022 0.07218 0.557 0.578
## HSI_GRANT -0.11937 0.11196 -1.066 0.287
##
## Residual standard error: 0.9871 on 318 degrees of freedom
## Multiple R-squared: 0.003562, Adjusted R-squared: 0.0004284
## F-statistic: 1.137 on 1 and 318 DF, p-value: 0.2872
exp(coef(glm_compare))
## (Intercept) HSI_GRANT
## 1.3227870 0.9746936
## GRADRATE_NHWHITEz PERC_PARTTIMEENROLLz
## 1.7873231 0.8189857
## TYPEPub 2yr TYPEPub 4yr
## 0.9017774 0.8967072
## PELL_PERCz STU_FINAIDz
## 1.1113185 0.8923198
## DISTANCE_EDU POST_GRAD_OFFER
## 0.8625387 1.3401736
## TOTAL_ENROLLMENTz HISP_INST_STAFFPERCz
## 0.9762163 1.1029566
## StateSupportz matched
## 1.1068606 1.0609492
## GRADRATE_NHWHITEz:STU_FINAIDz
## 1.0172858
This analysis shows the marginal effects of all HSI institutions as observed in the logistic regression model. We see that the treatment of having an HSI grant as an institution does not impact the graduation rates of Hispanics.
library(lme4)
#Test for variation of outcome accross states ALWAYS THE FIRST STEP. F value appears to be higher than 1.
fit0<-lm(GRADRATE_HISPz ~ factor(STATE), data=dta_m2)
anova(fit0)
## Analysis of Variance Table
##
## Response: GRADRATE_HISPz
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(STATE) 15 24.501 1.63343 1.9956 0.0161 *
## Residuals 250 204.629 0.81852
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Simple model no covariates using matched dataset
fit1<-lmer(GRADRATE_HISPz ~ 1 + (1|STATE), data=dta_m2)
summary(fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: GRADRATE_HISPz ~ 1 + (1 | STATE)
## Data: dta_m2
##
## REML criterion at convergence: 717.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7867 -0.7561 -0.2921 0.5511 3.4149
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATE (Intercept) 0.05734 0.2395
## Residual 0.83448 0.9135
## Number of obs: 266, groups: STATE, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.1824 0.1000 -1.823
#Model with Covariates
fit2<-lmer(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz + (GRADRATE_NHWHITEz*STU_FINAIDz) + (1|STATE), data=dta_m2)
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz +
## TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER +
## TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz +
## (GRADRATE_NHWHITEz * STU_FINAIDz) + (1 | STATE)
## Data: dta_m2
##
## REML criterion at convergence: 347.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1823 -0.5511 -0.0296 0.4905 4.1317
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATE (Intercept) 0.03854 0.1963
## Residual 0.17335 0.4164
## Number of obs: 266, groups: STATE, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.66005 0.32026 2.061
## HSI_GRANT -0.04002 0.05217 -0.767
## GRADRATE_NHWHITEz 0.53547 0.04329 12.368
## PERC_PARTTIMEENROLLz -0.27993 0.06202 -4.514
## TYPEPub 2yr 0.02550 0.14650 0.174
## TYPEPub 4yr -0.01226 0.12423 -0.099
## PELL_PERCz 0.01257 0.04846 0.259
## STU_FINAIDz -0.04874 0.04936 -0.988
## DISTANCE_EDU -0.31002 0.17287 -1.793
## POST_GRAD_OFFER 0.26907 0.12020 2.239
## TOTAL_ENROLLMENTz -0.05446 0.03472 -1.569
## HISP_INST_STAFFPERCz 0.02728 0.03029 0.901
## StateSupportz 0.12884 0.08370 1.539
## GRADRATE_NHWHITEz:STU_FINAIDz 0.11888 0.03038 3.914
##
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
#Test for variation with the original dataset of the outcome accross states. F value appears to be higher than 1.
fit3<-lm(GRADRATE_HISPz ~ factor(STATE), data=comparison_data2)
anova(fit3)
## Analysis of Variance Table
##
## Response: GRADRATE_HISPz
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(STATE) 17 26.907 1.58278 1.6829 0.04489 *
## Residuals 302 284.036 0.94052
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Simple model no covariates with the original dataset
fit4<-lmer(GRADRATE_HISPz ~ 1 + (1|STATE), data=comparison_data2)
summary(fit4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: GRADRATE_HISPz ~ 1 + (1 | STATE)
## Data: comparison_data2
##
## REML criterion at convergence: 901.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9575 -0.7699 -0.2857 0.6354 3.1476
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATE (Intercept) 0.05044 0.2246
## Residual 0.94656 0.9729
## Number of obs: 320, groups: STATE, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.10966 0.09334 -1.175
#Model with Covariates
fit5<-lmer(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz + (GRADRATE_NHWHITEz*STU_FINAIDz) + matched + (1|STATE), data=comparison_data2)
summary(fit5)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz +
## TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER +
## TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz +
## (GRADRATE_NHWHITEz * STU_FINAIDz) + matched + (1 | STATE)
## Data: comparison_data2
##
## REML criterion at convergence: 577.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.5324 -0.4707 -0.0402 0.4574 4.6958
##
## Random effects:
## Groups Name Variance Std.Dev.
## STATE (Intercept) 0.01733 0.1317
## Residual 0.30315 0.5506
## Number of obs: 320, groups: STATE, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.26485 0.31561 0.839
## HSI_GRANT -0.03014 0.06841 -0.441
## GRADRATE_NHWHITEz 0.57494 0.04647 12.371
## PERC_PARTTIMEENROLLz -0.22424 0.06333 -3.541
## TYPEPub 2yr -0.03088 0.15857 -0.195
## TYPEPub 4yr -0.08880 0.13518 -0.657
## PELL_PERCz 0.10373 0.05102 2.033
## STU_FINAIDz -0.10699 0.05447 -1.964
## DISTANCE_EDU -0.17808 0.16171 -1.101
## POST_GRAD_OFFER 0.30196 0.12206 2.474
## TOTAL_ENROLLMENTz -0.02343 0.03908 -0.600
## HISP_INST_STAFFPERCz 0.08492 0.03509 2.420
## StateSupportz 0.11169 0.06512 1.715
## matched 0.05578 0.10945 0.510
## GRADRATE_NHWHITEz:STU_FINAIDz 0.01338 0.03367 0.397
##
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
#GENERATE MAP WITH LOCATIONS OF HSI SCHOOLS AND PERCENTAGE OF HISPANICS BY STATE
#latlong<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Final Project/Data/lisofallschoolswithlatlong.csv")
#latlong2<-left_join(hsi, latlong, by="unitid")
#write.csv(latlong2,"C:/Users/PCMcC/Documents/Causal Inference/Final Project/map/locations")
#library(dplyr)
##census_api_key(key="95a91ddea073de2c8d473e622e3cb99007afdfee", install=T)
#MAPPING FOR PRESENTATION: OVERLAP OF DISTRIBUTION OF HSIs and DISTRIBUTION OF HISPANIC POPULATION IN BY STATE IN THE US Extract from ACS summary file data profile variables from 2015 for all states The data profile tables are very useful because they contain lots of pre-calculated variables.
#allstates_acs<-get_acs(geography = "state", year = 2017,
# variables=c( "DP05_0071P"),
# summary_var = "B01001_001",
# geometry = T, output = "wide")
#create a state FIPS code - 5 digit
#allstates_acs$state<-substr(allstates_acs$GEOID, 1, 5)
#SAVE AS SHAPEFILE
#library(sf)
#st_write(allstates_acs,dsn = "C:/Users/PCMcC/Documents/Causal Inference/Final Project/map", layer = "hsi_institutions_location", driver = "ESRI Shapefile", delete_layer = FALSE )