This code identifies the influence of HSI grants on the graduation rates of HSI institutions.

Download Data from Google Drive (If Needed)

If wanting to download data from google drive:

id1 <- “1ehlTI0MPTCQLyVzyVcReZ3BOntI0_tdN” hsi <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id1))

id2 <- “1kub_6nc8RH2CRaDrW5yzG-8-Vy0Ez47l” gradRates <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id2))

id3 <- “15dwlS8X8BNNQDujLKd22urXqN04p06Dv” finaid <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id3))

id4 <- “1cNDfkcG0l6Zawb3itHmi83YMaAeZYF9q” staff<- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id4))

id5 <- “1-1rBS6xvgFPU-UlADk6zbUNfnTfF473H” part_time <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id5))

id6 <- “12fi8dgndOp_GNjuJzbfolFRpbM3zkSu6” AllStaff <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id6))

id7 <- “1X-HcLq4MDF1Ezr6wN_dUQKYnjxb_O20i” statefund <- read.csv(sprintf(“https://docs.google.com/uc?id=%s&export=download”, id7))

removes or clean data sets from environment rm(id1, id2, id3, id4, id5, id6, id7)

Download Data Files from Hard Drive

#upload HSI data from Dept. Education Eligibility Matrix
hsi<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/HSI_granteeData2018.csv")
    hsi$unitid<-as.character(hsi$unitid)
    hsi$institution.name<-NULL
    
  #upload percent of graduation rates and also availability of distanceEdu education
gradRates<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017to2018_gradratesdistance.csv")  
  gradRates$unitid<-as.character(gradRates$unitid)
  gradRates$name<-NULL
  gradRates$year<-NULL  
  gradRates$DISTANCE_EDU<-ifelse(gradRates$distance == "Yes", 1, 0)
  
  
#upload Percent of UG students receiving financial aid
finaid<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017PercentUGAwardedFinAid.csv")
  finaid$unitid<-as.character( finaid$unitid)  

#upload percent of Hispanic instructional staff
staff<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017HispFaculty.csv")
    staff$unitid<-as.character(staff$unitid)
  staff$HISP_INST_STAFFPERC<-staff$Hisp_InstructionalStaff/staff$total_instructionalstaff*100
  staff$S2017_IS.Instructional.staff.category<-NULL
  staff$institution.name<-NULL
  staff$year<-NULL
  
#upload percent of students enrolled part-time   
part_time<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017PartTime.csv")
  part_time$year<-NULL
  part_time$unitid<-as.character(part_time$unitid)
  part_time$institution.name<-NULL
  
#upload percent of ALL Hispanic Staff 
AllStaff<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/Ipeds2017AllStaff.csv")
 AllStaff$unitid<-as.character(AllStaff$unitid)
  AllStaff$AllSTAFFHISP_PERC<-AllStaff$HISPSTAFF/AllStaff$TOTALSTAFF*100

#add information on state funds
statefund<-read.csv("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 3 HSIs/Data2/working/SHEEO2017.csv")


#merging files with various information
merge1<-left_join(hsi, gradRates, by="unitid")
merge2<-left_join(merge1, staff, by="unitid")                   
merge3<-left_join(merge2, finaid, by="unitid")    
merge4<-left_join(merge3,part_time, by="unitid")   
merge5<-left_join(merge4,AllStaff, by="unitid")  
merge6<<-left_join(merge5,statefund, by='STATE')
## Warning: Column `STATE` joining factors with different levels, coercing to
## character vector
ipeds2<-merge6%>% select (unitid, name,STATE, HSI_GRANT, TYPE, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, AllSTAFFHISP_PERC, HISP_INST_STAFFPERC, TOTAL_ENROLLMENT, StateSupport)

#remove Puerto Rico
#ipeds2<-subset(ipeds2, STATE!="PR")

#filter complete cases
ipeds2<-ipeds2 %>% filter(complete.cases(.))

#removing unused databases
rm(gradRates, hsi, part_time, staff, merge1, merge2, merge3, merge4, merge5,AllStaff, finaid)

#ipeds2<-ipeds2%>% mutate_if(is.integer, as.numeric)

Descriptive Statististics

There are 133 institutions who have received an HSI grant included in this sample and 188 who are eligible but have not received a grant (updated as of November, 2019)

#Summary Statistics for HSI Institutions with Grant (treatment group)
ipedsHSIGrant<-filter(ipeds2, HSI_GRANT==1)
attach(ipedsHSIGrant)
vars<-cbind(HSI_GRANT, TYPE, HISP_INST_STAFFPERC, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, HISP_INST_STAFFPERC, AllSTAFFHISP_PERC, TOTAL_ENROLLMENT ) 
library(stargazer)
df <- data.frame(vars)
cols <- c('HSI_GRANT', 'TYPE', 'HISP_INST_STAFFPERC', 'GRADRATE_HISP', 'GRADRATE_NHWHITE', 'DISTANCE_EDU', 'PERC_PARTTIMEENROLL', 'STU_FINAID','PELL_PERC', 'POST_GRAD_OFFER' , 'HISP_UG_TOTAL_PERC_FT', 'AllSTAFFHISP_PERC', 'HISP_INST_STAFFPERC','TOTAL_ENROLLMENT')
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
## 
## =====================================================================================
## Statistic              N   Min  Pctl(25) Median Pctl(75)  Max      Mean     St. Dev. 
## -------------------------------------------------------------------------------------
## HSI_GRANT             133   1      1       1       1       1      1.000      0.000   
## TYPE                  133   2      3       3       4       4      3.211      0.640   
## HISP_INST_STAFFPERC   133 0.000  8.190   12.537  18.462  59.618   15.381     10.631  
## GRADRATE_HISP         133   9      19      27      41      77     31.361     15.254  
## GRADRATE_NHWHITE      133   0      25      34      45      84     36.549     15.893  
## DISTANCE_EDU          133   0      1       1       1       1      0.977      0.149   
## PERC_PARTTIMEENROLL   133 2.360  30.000  60.270  71.730  84.750   52.609     22.902  
## STU_FINAID            133  23      47      59      69      99     59.271     17.095  
## PELL_PERC             133   9      30      37      46      72     38.218     13.039  
## POST_GRAD_OFFER       133   0      0       0       1       1      0.338      0.475   
## HISP_UG_TOTAL_PERC_FT 133  19     36.3    47.1    60.7     94     49.409     16.960  
## AllSTAFFHISP_PERC     133 5.445  15.056  20.090  27.479  81.078   24.350     14.527  
## HISP_INST_STAFFPERC.1 133 0.000  8.190   12.537  18.462  59.618   15.381     10.631  
## TOTAL_ENROLLMENT      133  389   4,101   9,831   19,220  71,551 13,819.140 12,230.010
## -------------------------------------------------------------------------------------
#Summary Statistics for HSI Institutions with NO Grant (control group)
ipedsHSIGrant2<-filter(ipeds2, HSI_GRANT==0)
attach(ipedsHSIGrant2)
vars2<-cbind(HSI_GRANT, TYPE, HISP_INST_STAFFPERC, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, HISP_INST_STAFFPERC, AllSTAFFHISP_PERC, TOTAL_ENROLLMENT ) 
library(stargazer)
df <- data.frame(vars2)
cols <- c('HSI_GRANT', 'TYPE', 'HISP_INST_STAFFPERC', 'GRADRATE_HISP', 'GRADRATE_NHWHITE', 'DISTANCE_EDU', 'PERC_PARTTIMEENROLL', 'STU_FINAID','PELL_PERC', 'POST_GRAD_OFFER' , 'HISP_UG_TOTAL_PERC_FT','AllSTAFFHISP_PERC','HISP_INST_STAFFPERC', 'TOTAL_ENROLLMENT')
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
## 
## ====================================================================================
## Statistic              N   Min   Pctl(25) Median Pctl(75)  Max     Mean    St. Dev. 
## ------------------------------------------------------------------------------------
## HSI_GRANT             188   0       0       0       0       0      0.000     0.000  
## TYPE                  188   1       2       3       3       4      2.910     0.714  
## HISP_INST_STAFFPERC   188   0      6.0     9.4     16.4     80    13.132    13.170  
## GRADRATE_HISP         188   0      19.8     30     45.2     88    33.755    19.250  
## GRADRATE_NHWHITE      188   0       23      35      53      87    38.420    19.419  
## DISTANCE_EDU          188   0       1       1       1       1      0.926     0.263  
## PERC_PARTTIMEENROLL   188 0.000   25.912  54.680  69.045  88.220  47.843    25.096  
## STU_FINAID            188   21      48     61.5     77     100    62.356    18.582  
## PELL_PERC             188   5      31.8     43      52      93    42.537    15.648  
## POST_GRAD_OFFER       188   0       0       0       1       1      0.383     0.487  
## HISP_UG_TOTAL_PERC_FT 188 25.000  30.050  37.650  49.375  97.700  41.841    15.599  
## AllSTAFFHISP_PERC     188 1.852   11.167  16.185  22.797  91.141  19.977    14.889  
## HISP_INST_STAFFPERC.1 188   0      6.0     9.4     16.4     80    13.132    13.170  
## TOTAL_ENROLLMENT      188  164   2,041.8  7,098  12,728.2 57,032 9,422.750 9,904.428
## ------------------------------------------------------------------------------------
#Summary Statistics for both groups combined
ipedsHSIGrant3<-ipeds2
attach(ipedsHSIGrant3)
vars3<-cbind(HSI_GRANT, TYPE, HISP_INST_STAFFPERC, GRADRATE_HISP, GRADRATE_NHWHITE, DISTANCE_EDU, PERC_PARTTIMEENROLL, STU_FINAID,PELL_PERC, POST_GRAD_OFFER , HISP_UG_TOTAL_PERC_FT, HISP_INST_STAFFPERC, AllSTAFFHISP_PERC, TOTAL_ENROLLMENT ) 
library(stargazer)
df <- data.frame(vars3)
cols <- c('HSI_GRANT', 'TYPE', 'HISP_INST_STAFFPERC', 'GRADRATE_HISP', 'GRADRATE_NHWHITE', 'DISTANCE_EDU', 'PERC_PARTTIMEENROLL', 'STU_FINAID','PELL_PERC', 'POST_GRAD_OFFER' , 'HISP_UG_TOTAL_PERC_FT',  'AllSTAFFHISP_PERC', 'HISP_INST_STAFFPERC','TOTAL_ENROLLMENT')
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
## 
## ======================================================================================
## Statistic              N   Min   Pctl(25) Median Pctl(75)  Max      Mean     St. Dev. 
## --------------------------------------------------------------------------------------
## HSI_GRANT             321   0       0       0       1       1      0.414      0.493   
## TYPE                  321   1       3       3       4       4      3.034      0.700   
## HISP_INST_STAFFPERC   321   0      6.9     10.5    17.5     80     14.064     12.215  
## GRADRATE_HISP         321   0       19      29      44      88     32.763     17.718  
## GRADRATE_NHWHITE      321   0       24      35      50      87     37.645     18.039  
## DISTANCE_EDU          321   0       1       1       1       1      0.947      0.224   
## PERC_PARTTIMEENROLL   321 0.000   28.010  57.520  70.560  88.220   49.818     24.289  
## STU_FINAID            321   21      47      60      73     100     61.078     18.018  
## PELL_PERC             321   5       31      41      49      93     40.748     14.757  
## POST_GRAD_OFFER       321   0       0       0       1       1      0.364      0.482   
## HISP_UG_TOTAL_PERC_FT 321 19.200  31.400  40.800  54.900  97.700   44.977     16.577  
## AllSTAFFHISP_PERC     321 1.852   12.367  17.836  25.831  91.141   21.789     14.874  
## HISP_INST_STAFFPERC.1 321   0      6.9     10.5    17.5     80     14.064     12.215  
## TOTAL_ENROLLMENT      321  164    3,270   8,503   15,178  71,551 11,244.310 11,123.380
## --------------------------------------------------------------------------------------

Continuation of Descriptive Statistics

Below are a series of histograms to show the relationship between variables.

#Histogram of Hispanic Graduation Rate 
ggplot(ipeds2, aes(x=GRADRATE_HISP))+
  geom_histogram(color="gray", fill="blue", bins =  50) + labs(title="Figure 1. Graduation Rate of Hispanics in HSI Institutions for the Year 2017",x="Graduation Rate", y = "Number of Institutions")

#Histogram of Total Enrollment 
ggplot(ipeds2, aes(x=PELL_PERC))+ 
  geom_histogram(color="gray", fill="darkgreen", alpha = .7, bins = 50) + labs(title="Percent of Students Receiving Pell Grants at HSI Institutions in 2017",x="Percent of Students with Pell Grants", y = "Number of Institutions")

#Histogram of instructional Hispanic staff
ggplot(ipeds2, aes(x=HISP_INST_STAFFPERC))+ 
  geom_histogram(color="gray", fill="darkblue", alpha = .7, bins = 50) + labs(title="Percent of Hispanic Instructional Staff at HSI Institutions in 2017",x="Percent of Hispanic Instructional Staff", y = "Number of Institutions")

#Histogram of Hispanic Graduation Rate by Type of All HSI Institutions
 i <- ipeds2
 levels(i$TYPE) <- c("Pri 2yr", "Pri 4yr", "Pub 2yr", "Pub 4yr" )

 
 ggplot (i, aes(x = GRADRATE_HISP)) +
geom_histogram(color="black", fill="#66CC00", alpha = .7) + facet_wrap( TYPE ~.) + labs(title= "Figure 2. Hispanic Graduation Rate by Type of HSI Institutions", y="Number of Institutions in 2017", x="Graduation Rate")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Hispanic Graduation Rate by Institutions Recipients of HSI Grant vs. Eligible Institutions
 library(stringr)
levels(i$HSI_GRANT) <- c("0", "1")

ggplot (i, aes(x = GRADRATE_HISP)) +
  geom_histogram(color="black", fill="#228B22", alpha = .7, bins = 50) + facet_wrap(HSI_GRANT~.) +labs(title= "Figure 1. Hispanic Graduation Rate by Awardees of HSI Grant vs HSI Grant Eligible", y="Number of Institutions in 2018", x="Graduation Rate") 

#Separe type of institution
 #Private Institutions
ipeds2$privtwoyear<-as.numeric(ifelse(ipeds2$TYPE=="Pri 2yr", 1,0))
ipeds2$privfouryear<-as.numeric(ifelse(ipeds2$TYPE=="Pri 4yr", 1,0))
 #Public Institutions
ipeds2$pubtwoyear<-as.numeric(ifelse(ipeds2$TYPE=="Pub 2yr", 1,0))
ipeds2$pubfouryear<-as.numeric(ifelse(ipeds2$TYPE=="Pub 4yr", 1,0))

Propensity Score Matching Steps

Step 1. Basic T-Test of Independence between Receiving an HSI Grant and each covariate

The following test shows that there is a significant difference betweenn institutions who received a grant vs. those that have not.

#make copy of df
ipeds4<-ipeds2
#Institution Recipent of HSI Grant and State Support
t.test(ipeds4$HSI_GRANT, ipeds4$StateSupport)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$StateSupport
## t = -28.394, df = 320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8375591812 -7290107578
## sample estimates:
##    mean of x    mean of y 
## 4.143302e-01 7.832850e+09
#Institution Recipent of HSI Grant and percent of students receiving financial aid
t.test(ipeds4$HSI_GRANT, ipeds4$STU_FINAID)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$STU_FINAID
## t = -60.299, df = 320.48, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -62.64283 -58.68427
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 61.0778816
#Institution Recipent of HSI Grant and Hispanic Graduation Rate

t.test(ipeds4$HSI_GRANT, ipeds4$GRADRATE_HISP)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$GRADRATE_HISP
## t = -32.699, df = 320.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -34.29524 -30.40258
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 32.7632399
#Institution Recipent of HSI Grant and Spefici Type of School
t.test(ipeds4$HSI_GRANT, ipeds4$privfouryear) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$privfouryear
## t = 5.4619, df = 621.06, p-value = 6.822e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1256970 0.2668264
## sample estimates:
## mean of x mean of y 
## 0.4143302 0.2180685
t.test(ipeds4$HSI_GRANT, ipeds4$privtwoyear)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$privtwoyear
## t = 14.838, df = 328.19, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.3566971 0.4657328
## sample estimates:
##   mean of x   mean of y 
## 0.414330218 0.003115265
t.test(ipeds4$HSI_GRANT, ipeds4$pubfouryear) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$pubfouryear
## t = 4.2278, df = 631.31, p-value = 2.709e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.08341353 0.22811295
## sample estimates:
## mean of x mean of y 
## 0.4143302 0.2585670
t.test(ipeds4$HSI_GRANT, ipeds4$pubtwoyear) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$pubtwoyear
## t = -2.7006, df = 639.87, p-value = 0.007105
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.18293634 -0.02890167
## sample estimates:
## mean of x mean of y 
## 0.4143302 0.5202492
#Institution Recipent of HSI Grant and distanceEdu education program
t.test(ipeds4$HSI_GRANT, ipeds4$DISTANCE_EDU) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$DISTANCE_EDU
## t = -17.61, df = 446.86, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5921598 -0.4732608
## sample estimates:
## mean of x mean of y 
## 0.4143302 0.9470405
#Institution Recipent of HSI Grant and a High Percentage Hispanic Staff
t.test(ipeds4$HSI_GRANT, ipeds4$GRADRATE_HISP) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$GRADRATE_HISP
## t = -32.699, df = 320.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -34.29524 -30.40258
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 32.7632399
#Institution Recipent of HSI Grant and Student Pell Recipient Percentage
t.test(ipeds4$HSI_GRANT, ipeds4$PELL_PERC) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$PELL_PERC
## t = -48.942, df = 320.72, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -41.95467 -38.71200
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 40.7476636
#Institution Recipent of HSI Grant and Student White Graduation Rate
t.test(ipeds4$HSI_GRANT, ipeds4$GRADRATE_NHWHITE) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$GRADRATE_NHWHITE
## t = -36.964, df = 320.48, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -39.21212 -35.24894
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 37.6448598
#Institution Recipent of HSI Grant and institution offering post-baccalareaute programs
t.test(ipeds4$HSI_GRANT, ipeds4$POST_GRAD_OFFER) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$POST_GRAD_OFFER
## t = 1.2947, df = 639.65, p-value = 0.1959
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02575556  0.12544404
## sample estimates:
## mean of x mean of y 
## 0.4143302 0.3644860
#Institution Recipent of HSI Grant and and percentage of hispanic staff
t.test(ipeds4$HSI_GRANT, ipeds4$HISP_INST_STAFFPERC) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$HISP_INST_STAFFPERC
## t = -20.004, df = 321.04, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.99182 -12.30701
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 14.0637418
#Institution Recipent of HSI Grant and and percentage of hispanic staff
t.test(ipeds4$HSI_GRANT, ipeds4$AllSTAFFHISP_PERC) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$AllSTAFFHISP_PERC
## t = -25.732, df = 320.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -23.00868 -19.74019
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 21.7887673
#Institution Recipent of HSI Grant and Total Enrollment of Hispanic Students
t.test(ipeds4$HSI_GRANT, ipeds4$HISP_UG_TOTAL_PERC_FT) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$HISP_UG_TOTAL_PERC_FT
## t = -48.142, df = 320.57, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -46.38341 -42.74120
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 44.9766355
#Institution Recipent of HSI Grant and percentage of students enrolled part-time
t.test(ipeds4$HSI_GRANT, ipeds4$PERC_PARTTIMEENROLL) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$PERC_PARTTIMEENROLL
## t = -36.435, df = 320.26, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -52.07132 -46.73597
## sample estimates:
##  mean of x  mean of y 
##  0.4143302 49.8179751
#Institution Recipent of HSI Grant and total enrollment
t.test(ipeds4$HSI_GRANT, ipeds4$TOTAL_ENROLLMENT) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$HSI_GRANT and ipeds4$TOTAL_ENROLLMENT
## t = -18.111, df = 320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -12465.35 -10022.44
## sample estimates:
##    mean of x    mean of y 
## 4.143302e-01 1.124431e+04

Step 2. Basic T-Test of Independence between Graduation Rates of Hispanics and each covariate

The following includes a T-Test of significance test and the outcome varialbe which in this case is Hispanic Graduation Rate

#Institution Recipent of HSI Grant and Speficic Type of School
t.test(ipeds4$GRADRATE_HISP, ipeds4$privfouryear)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$privfouryear
## t = 32.901, df = 320.35, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  30.59907 34.49128
## sample estimates:
##  mean of x  mean of y 
## 32.7632399  0.2180685
t.test(ipeds4$GRADRATE_HISP, ipeds4$privtwoyear)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$privtwoyear
## t = 33.127, df = 320.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  30.81453 34.70572
## sample estimates:
##    mean of x    mean of y 
## 32.763239875  0.003115265
t.test(ipeds4$GRADRATE_HISP, ipeds4$pubfouryear)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$pubfouryear
## t = 32.859, df = 320.39, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  30.55850 34.45084
## sample estimates:
## mean of x mean of y 
## 32.763240  0.258567
t.test(ipeds4$GRADRATE_HISP, ipeds4$pubtwoyear)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$pubtwoyear
## t = 32.592, df = 320.51, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  30.29664 34.18934
## sample estimates:
##  mean of x  mean of y 
## 32.7632399  0.5202492
#Institution Recipent of HSI Grant and distanceEdu Learning Programs
t.test(ipeds4$GRADRATE_HISP, ipeds4$DISTANCE_EDU)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$DISTANCE_EDU
## t = 32.17, df = 320.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  29.87046 33.76194
## sample estimates:
##  mean of x  mean of y 
## 32.7632399  0.9470405
#Institution Recipent of HSI Grant and  Percentage Hispanic Staff
t.test(ipeds4$GRADRATE_HISP, ipeds4$HISPSTAFF)
## 
##  One Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP
## t = 33.131, df = 320, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  30.81766 34.70882
## sample estimates:
## mean of x 
##  32.76324
#Institution Recipent of HSI Grant and  Percentage Hispanic Staff
t.test(ipeds4$GRADRATE_HISP, ipeds4$AllSTAFFHISP_PERC)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$AllSTAFFHISP_PERC
## t = 8.4995, df = 621.37, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   8.438834 13.510112
## sample estimates:
## mean of x mean of y 
##  32.76324  21.78877
#Institution Recipent of HSI Grant and Student Pell Recipient Percentage
t.test(ipeds4$GRADRATE_HISP, ipeds4$PELL_PERC)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$PELL_PERC
## t = -6.204, df = 619.73, p-value = 1.007e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.511806  -5.457041
## sample estimates:
## mean of x mean of y 
##  32.76324  40.74766
#Institution Recipent of HSI Grant and Student White Graduation Rate
t.test(ipeds4$GRADRATE_HISP, ipeds4$GRADRATE_NHWHITE)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$GRADRATE_NHWHITE
## t = -3.459, df = 639.79, p-value = 0.0005781
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.652886 -2.110354
## sample estimates:
## mean of x mean of y 
##  32.76324  37.64486
#Institution Recipent of HSI Grant and the institutions offering programs post-bacc
t.test(ipeds4$GRADRATE_HISP, ipeds4$POST_GRAD_OFFER)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$POST_GRAD_OFFER
## t = 32.75, df = 320.47, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  30.45246 34.34505
## sample estimates:
## mean of x mean of y 
## 32.763240  0.364486
#Institution Recipent of HSI Grant and Total Enrollment of Hispanic Students
t.test(ipeds4$GRADRATE_HISP, ipeds4$HISP_UG_TOTAL_PERC_FT)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$HISP_UG_TOTAL_PERC_FT
## t = -9.0185, df = 637.19, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.872734  -9.554057
## sample estimates:
## mean of x mean of y 
##  32.76324  44.97664
#Institution Recipent of HSI Grant and percentage of students enrolled part-time
t.test(ipeds4$GRADRATE_HISP, ipeds4$PERC_PARTTIMEENROLL)
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$PERC_PARTTIMEENROLL
## t = -10.164, df = 585.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.35041 -13.75906
## sample estimates:
## mean of x mean of y 
##  32.76324  49.81798
#Institution Recipent of HSI Grant and total enrollment
t.test(ipeds4$GRADRATE_HISP, ipeds4$TOTAL_ENROLLMENT) 
## 
##  Welch Two Sample t-test
## 
## data:  ipeds4$GRADRATE_HISP and ipeds4$TOTAL_ENROLLMENT
## t = -18.058, df = 320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -12433.003  -9990.087
## sample estimates:
##   mean of x   mean of y 
##    32.76324 11244.30841

Recoding and/or scaling of variables

Please note that two of the variables are not scaled but rather, I use a log function for total enrollment of undergraduate students and also for total enrollment of hispanic students.

#DEFINITIONS OF VARIABLES
#Type of institutions remains as categorical: Pri2yr, Pri4yr, Pubyr, Pub4yr
#Post baccalaurate offerings remains categorical as 0=no and 1=yes
#HSI grant remains categorical as 0=no and 1=yes if institution is currently recipient or not
#distanceEdu Education PROGRAMS offerings remains categorical as 0=no and 1=yes

#Scale Percent of Students Receiving Pell Grant
ipeds4$STU_FINAIDz<-as.numeric(scale(ipeds4$STU_FINAID, scale=T, center=T))
#Scale Percent of Students Receiving Pell Grant
ipeds4$PELL_PERCz<-as.numeric(scale(ipeds4$PELL_PERC, scale=T, center=T))
#Scale Percent of Hispanic Students Enrolled in Undergraduate Programs
ipeds4$HISP_UG_TOTAL_PERC_FTz<-as.numeric(log(ipeds4$HISP_UG_TOTAL_PERC_FT, base = exp(1)))
#Scale Percent of Hispanic Faculty Staff 
ipeds4$HISP_INST_STAFFPERCz<-as.numeric(scale(ipeds4$HISP_INST_STAFFPERC, scale=T, center=T))
#Scale Percent of Graduation Rate of Hispanic Students 
ipeds4$GRADRATE_HISPz<-as.numeric(scale(ipeds4$GRADRATE_HISP, scale=T, center=T))
#Scale Percent of Graduation Rate of Non-Hispanic White Students 
ipeds4$GRADRATE_NHWHITEz<-as.numeric(scale(ipeds4$GRADRATE_NHWHITE, scale=T, center=T))
#Scale Percent of Part-Time Students Enrolled
ipeds4$PERC_PARTTIMEENROLLz<-as.numeric(scale(ipeds4$PERC_PARTTIMEENROLL, scale=T, center=T))
#Scale Percent of Part-Time Students Enrolled
ipeds4$TOTAL_ENROLLMENTz<-as.numeric(log(ipeds4$TOTAL_ENROLLMENT, base = exp(1)))
#Scale State Support
ipeds4$StateSupportz<-as.numeric(scale(ipeds4$StateSupport, scale=T, center=T))

Remove two year private institutions

Considering that there are so few 2 year institutions, I decided to removed them.

ipeds4<-subset(ipeds4, TYPE!="Pri 2yr")

Step 3. Propensity score estimation

hsi_ps <- glm(HSI_GRANT~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + AllSTAFFHISP_PERC + TOTAL_ENROLLMENTz + StateSupportz, family = binomial(), data = ipeds4)
summary(hsi_ps)
## 
## Call:
## glm(formula = HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + 
##     PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + 
##     STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + 
##     AllSTAFFHISP_PERC + TOTAL_ENROLLMENTz + StateSupportz, family = binomial(), 
##     data = ipeds4)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6661  -1.0239  -0.6543   1.1431   2.0051  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)   
## (Intercept)          -4.39664    1.36825  -3.213  0.00131 **
## GRADRATE_HISPz       -0.09285    0.24342  -0.381  0.70289   
## GRADRATE_NHWHITEz    -0.08776    0.23491  -0.374  0.70872   
## PELL_PERCz           -0.44850    0.20308  -2.209  0.02721 * 
## PERC_PARTTIMEENROLLz -0.23184    0.26026  -0.891  0.37304   
## pubtwoyear            0.42881    0.60232   0.712  0.47651   
## pubfouryear           0.92368    0.49853   1.853  0.06391 . 
## STU_FINAIDz           0.27026    0.21077   1.282  0.19976   
## HISP_INST_STAFFPERCz -0.42367    0.27789  -1.525  0.12736   
## DISTANCE_EDU          0.62314    0.71397   0.873  0.38279   
## POST_GRAD_OFFER      -0.33620    0.52035  -0.646  0.51821   
## AllSTAFFHISP_PERC     0.04839    0.01810   2.673  0.00751 **
## TOTAL_ENROLLMENTz     0.22836    0.14624   1.562  0.11838   
## StateSupportz         0.11167    0.15169   0.736  0.46164   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 434.46  on 319  degrees of freedom
## Residual deviance: 397.32  on 306  degrees of freedom
## AIC: 425.32
## 
## Number of Fisher Scoring iterations: 4
#Calculation of the likelihood of receiving treatment or in this care receiving HSI grant.
prs_df <- data.frame(pr_score = predict(hsi_ps, type = "response"),
                     grant_recipient= hsi_ps$model$HSI_GRANT)
head(prs_df)
##     pr_score grant_recipient
## 1 0.04827105               0
## 2 0.27771679               0
## 3 0.50940258               0
## 4 0.36462495               0
## 5 0.48040860               0
## 6 0.22770268               0

Convert to odd ratios

exp(coef(hsi_ps))
##          (Intercept)       GRADRATE_HISPz    GRADRATE_NHWHITEz 
##           0.01231871           0.91133441           0.91598457 
##           PELL_PERCz PERC_PARTTIMEENROLLz           pubtwoyear 
##           0.63858278           0.79307671           1.53542471 
##          pubfouryear          STU_FINAIDz HISP_INST_STAFFPERCz 
##           2.51854548           1.31030400           0.65463893 
##         DISTANCE_EDU      POST_GRAD_OFFER    AllSTAFFHISP_PERC 
##           1.86477231           0.71447787           1.04957599 
##    TOTAL_ENROLLMENTz        StateSupportz 
##           1.25654059           1.11813876

Step 4. Examine Region of Common Support

labs <- paste("HSI Grant Status:", c("Recipient", "Eligible"))
prs_df %>%
  mutate(Recipient = ifelse(grant_recipient == 1, labs[1], labs[2])) %>%
  ggplot(aes(x = pr_score)) +
  geom_histogram(color = "white", bins=50) +
  facet_wrap(~Recipient) +
  xlab("Probability of Receiving HSI Grant") +
  theme_bw()

## Step 4. Execute Matching Algorith

#install.packages("optmatch")
library(optmatch)
## Warning: package 'optmatch' was built under R version 3.6.1
## The optmatch package has an academic license. Enter relaxinfo() for more information.
library(rgenoud)
## Warning: package 'rgenoud' was built under R version 3.6.1
## ##  rgenoud (Version 5.8-3.0, Build Date: 2019-01-22)
## ##  See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## ##  Please cite software as:
## ##   Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ##   ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## ##   Journal of Statistical Software, 42(11): 1-26. 
## ##
library(cem)
## Warning: package 'cem' was built under R version 3.6.1
## Loading required package: tcltk
## Loading required package: lattice
## 
## How to use CEM? Type vignette("cem")
## 
## Attaching package: 'cem'
## The following object is masked from 'package:optmatch':
## 
##     pair
library(cobalt)
## 
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
## 
##     lalonde
#ipeds4$HISP_UG_TOTAL_PERC_FTz2<-log(ipeds4$HISP_UG_TOTAL_PERC_FTz)

mod_match2 <- matchit(HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz +StateSupportz,  method = "nearest",data = ipeds4)


#mod_match3 <- matchit(HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + HISP_UG_TOTAL_PERC_FTz +TOTAL_ENROLLMENTz+ HISP_UG_TOTAL_PERC_FTz , method = "genetic",data = ipeds4)

#mod_match4 <- matchit(HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + HISP_UG_TOTAL_PERC_FTz + TOTAL_ENROLLMENTz+ HISP_UG_TOTAL_PERC_FTz , method = "cem",data = ipeds4)

Step 5. Check the Match “Nearest Method”

dta_m2 <- match.data(mod_match2)
dim(dta_m2)
## [1] 266  32
# Checking balance before and after matching:
bal.tab(mod_match2, m.threshold = 0.1, un = TRUE)
## Call
##  matchit(formula = HSI_GRANT ~ GRADRATE_HISPz + GRADRATE_NHWHITEz + 
##     PELL_PERCz + PERC_PARTTIMEENROLLz + pubtwoyear + pubfouryear + 
##     STU_FINAIDz + HISP_INST_STAFFPERCz + DISTANCE_EDU + POST_GRAD_OFFER + 
##     TOTAL_ENROLLMENTz + StateSupportz, data = ipeds4, method = "nearest")
## 
## Balance Measures
##                          Type Diff.Un Diff.Adj        M.Threshold
## distance             Distance  0.6640   0.2054                   
## GRADRATE_HISPz        Contin. -0.1387   0.0015     Balanced, <0.1
## GRADRATE_NHWHITEz     Contin. -0.1014   0.0142     Balanced, <0.1
## PELL_PERCz            Contin. -0.3249  -0.0288     Balanced, <0.1
## PERC_PARTTIMEENROLLz  Contin.  0.1969  -0.0018     Balanced, <0.1
## pubtwoyear             Binary  0.0462  -0.0376     Balanced, <0.1
## pubfouryear            Binary  0.1223   0.0526     Balanced, <0.1
## STU_FINAIDz           Contin. -0.1778   0.0501     Balanced, <0.1
## HISP_INST_STAFFPERCz  Contin.  0.2123   0.1161 Not Balanced, >0.1
## DISTANCE_EDU           Binary  0.0470   0.0150     Balanced, <0.1
## POST_GRAD_OFFER        Binary -0.0467   0.0226     Balanced, <0.1
## TOTAL_ENROLLMENTz     Contin.  0.5168   0.1061 Not Balanced, >0.1
## StateSupportz         Contin.  0.2345   0.1660 Not Balanced, >0.1
## 
## Balance tally for mean differences
##                    count
## Balanced, <0.1         9
## Not Balanced, >0.1     3
## 
## Variable with the greatest mean difference
##       Variable Diff.Adj        M.Threshold
##  StateSupportz    0.166 Not Balanced, >0.1
## 
## Sample sizes
##           Control Treated
## All           187     133
## Matched       133     133
## Unmatched      54       0
bal.plot(mod_match2, var.name = "distance")

bal.plot(mod_match2, var.name = "distance", mirror = TRUE, type = "histogram")

#GENETIC METHOD (not included)

#I create a dataframe containing the matched observations. The summary of it shows that there are 133 pairs of treated and control observations.
#dta_m3 <- match.data(mod_match3)
#dim(dta_m3)

# Checking balance before and after matching:
#bal.tab(mod_match3, m.threshold = 0.1, un = TRUE)
#bal.plot(mod_match3, var.name = "distance")
#bal.plot(mod_match3, var.name = "distance", mirror = TRUE, type = "histogram")

#EXACT METHOD (not included)

#I create a dataframe containing the matched observations. The summary of it shows that there are 137 pairs of treated and control observations.
#dta_m4 <- match.data(mod_match4)
#dim(dta_m4)

# Checking balance before and after matching:
#bal.tab(mod_match4, m.threshold = 0.1, un = TRUE)
#bal.plot(mod_match4, var.name = "distance")
#bal.plot(mod_match4, var.name = "distance", mirror = TRUE, type = "histogram")

Step 6. Test Matching

The following t-tests need to show a NON significant pvalue, otherwise the matching failed. Please note the following code runs all covariates through a loop and will number each accordingly.

ecls_cov <- c('GRADRATE_HISPz' , 'GRADRATE_NHWHITEz' , 'PELL_PERCz'  , 'PERC_PARTTIMEENROLLz', 'HISP_INST_STAFFPERCz', 'DISTANCE_EDU', 'pubtwoyear', 'pubfouryear' , 'POST_GRAD_OFFER', 'TOTAL_ENROLLMENTz',  'STU_FINAIDz', "StateSupportz" )

lapply(ecls_cov, function(v) {
    t.test(dta_m2[, v] ~ dta_m2$HSI_GRANT)
})
## [[1]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.011144, df = 258.49, p-value = 0.9911
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2262382  0.2236920
## sample estimates:
## mean in group 0 mean in group 1 
##     -0.08042182     -0.07914873 
## 
## 
## [[2]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.10629, df = 257.69, p-value = 0.9154
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2441588  0.2191503
## sample estimates:
## mean in group 0 mean in group 1 
##     -0.07326089     -0.06075664 
## 
## 
## [[3]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = 0.23329, df = 263.94, p-value = 0.8157
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1895384  0.2404897
## sample estimates:
## mean in group 0 mean in group 1 
##      -0.1459445      -0.1714201 
## 
## 
## [[4]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = 0.015018, df = 263.97, p-value = 0.988
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2247455  0.2282002
## sample estimates:
## mean in group 0 mean in group 1 
##       0.1166513       0.1149239 
## 
## 
## [[5]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.84515, df = 253.59, p-value = 0.3988
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3363786  0.1343613
## sample estimates:
## mean in group 0 mean in group 1 
##     0.006827784     0.107836452 
## 
## 
## [[6]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.71598, df = 249.31, p-value = 0.4747
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05640340  0.02632821
## sample estimates:
## mean in group 0 mean in group 1 
##       0.9624060       0.9774436 
## 
## 
## [[7]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = 0.61695, df = 263.97, p-value = 0.5378
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08238765  0.15757562
## sample estimates:
## mean in group 0 mean in group 1 
##       0.5864662       0.5488722 
## 
## 
## [[8]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.93064, df = 263.37, p-value = 0.3529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.16398714  0.05872398
## sample estimates:
## mean in group 0 mean in group 1 
##       0.2781955       0.3308271 
## 
## 
## [[9]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.39072, df = 263.92, p-value = 0.6963
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.13622780  0.09111502
## sample estimates:
## mean in group 0 mean in group 1 
##       0.3157895       0.3383459 
## 
## 
## [[10]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.88745, df = 263.28, p-value = 0.3756
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3519283  0.1332527
## sample estimates:
## mean in group 0 mean in group 1 
##        8.994589        9.103926 
## 
## 
## [[11]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -0.41252, df = 263.92, p-value = 0.6803
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2746337  0.1794905
## sample estimates:
## mean in group 0 mean in group 1 
##      -0.1478717      -0.1003002 
## 
## 
## [[12]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m2[, v] by dta_m2$HSI_GRANT
## t = -1.3662, df = 263.91, p-value = 0.173
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.40498501  0.07320078
## sample estimates:
## mean in group 0 mean in group 1 
##      -0.0252103       0.1406818

#This code can be used to identify the names institutions excluded from the matching dataset (dta_m2)

#ipeds4$included_a1 <- TRUE
#dta_m2$included_a2 <- TRUE
#res <- merge(ipeds4, dta_m2, all=TRUE)

Step 7a. Estimate Treatment Effects using matching data set: dta_m2

Note, this includes an interaction term betweeen graduation rates of non-hispanic white students and the percent of students receiving financial aid.

glm_treat <- glm(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz  + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER  + TOTAL_ENROLLMENTz  + HISP_INST_STAFFPERCz + StateSupportz + (GRADRATE_NHWHITEz*STU_FINAIDz), data = dta_m2)
summary(glm_treat)
## 
## Call:
## glm(formula = GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + 
##     PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz + 
##     DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + 
##     StateSupportz + (GRADRATE_NHWHITEz * STU_FINAIDz), data = dta_m2)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.81746  -0.23307  -0.04086   0.20902   1.64475  
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.56922    0.31367   1.815 0.070752 .  
## HSI_GRANT                     -0.02986    0.05372  -0.556 0.578821    
## GRADRATE_NHWHITEz              0.55883    0.04348  12.854  < 2e-16 ***
## PERC_PARTTIMEENROLLz          -0.22379    0.06128  -3.652 0.000317 ***
## TYPEPub 2yr                   -0.13878    0.13574  -1.022 0.307591    
## TYPEPub 4yr                   -0.07706    0.11896  -0.648 0.517698    
## PELL_PERCz                     0.01769    0.04838   0.366 0.714852    
## STU_FINAIDz                   -0.05775    0.04775  -1.209 0.227622    
## DISTANCE_EDU                  -0.19891    0.16559  -1.201 0.230799    
## POST_GRAD_OFFER                0.24249    0.11830   2.050 0.041422 *  
## TOTAL_ENROLLMENTz             -0.04409    0.03391  -1.300 0.194684    
## HISP_INST_STAFFPERCz           0.04609    0.02933   1.571 0.117414    
## StateSupportz                  0.11109    0.03481   3.191 0.001595 ** 
## GRADRATE_NHWHITEz:STU_FINAIDz  0.11262    0.03013   3.738 0.000230 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1887539)
## 
##     Null deviance: 229.131  on 265  degrees of freedom
## Residual deviance:  47.566  on 252  degrees of freedom
## AIC: 326.99
## 
## Number of Fisher Scoring iterations: 2
# Estimating treatment effects
with(dta_m2, t.test(GRADRATE_HISPz~ HSI_GRANT))
## 
##  Welch Two Sample t-test
## 
## data:  GRADRATE_HISPz by HSI_GRANT
## t = -0.011144, df = 258.49, p-value = 0.9911
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2262382  0.2236920
## sample estimates:
## mean in group 0 mean in group 1 
##     -0.08042182     -0.07914873
lm_treat1 <- lm(GRADRATE_HISPz~ HSI_GRANT, data = dta_m2)
summary(lm_treat1)
## 
## Call:
## lm(formula = GRADRATE_HISPz ~ HSI_GRANT, data = dta_m2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7688 -0.6977 -0.2461  0.5440  3.1980 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.080422   0.080782  -0.996    0.320
## HSI_GRANT    0.001273   0.114243   0.011    0.991
## 
## Residual standard error: 0.9316 on 264 degrees of freedom
## Multiple R-squared:  4.704e-07,  Adjusted R-squared:  -0.003787 
## F-statistic: 0.0001242 on 1 and 264 DF,  p-value: 0.9911
exp(coef(glm_treat))
##                   (Intercept)                     HSI_GRANT 
##                     1.7668961                     0.9705803 
##             GRADRATE_NHWHITEz          PERC_PARTTIMEENROLLz 
##                     1.7486338                     0.7994849 
##                   TYPEPub 2yr                   TYPEPub 4yr 
##                     0.8704223                     0.9258322 
##                    PELL_PERCz                   STU_FINAIDz 
##                     1.0178515                     0.9438851 
##                  DISTANCE_EDU               POST_GRAD_OFFER 
##                     0.8196253                     1.2744168 
##             TOTAL_ENROLLMENTz          HISP_INST_STAFFPERCz 
##                     0.9568698                     1.0471648 
##                 StateSupportz GRADRATE_NHWHITEz:STU_FINAIDz 
##                     1.1174938                     1.1192112

Step 7b. Estimate Treatment Effects using ORIGINAL data set: ipeds4

I generate the treatment effects with the original dataset that contains all institutions for comparison.

#add a column with a number for all matched institutions
dta_m2$matched<- 1
#subset only the column of the identifier and the new "included" column
comparison_data<-select(dta_m2, unitid, matched)
#bring in the original data set "ipeds4" and combine with the "comparison_data" data frame
comparison_data2 <- left_join(ipeds4, comparison_data, by="unitid")
#replace NA for 0
comparison_data2$matched[is.na(comparison_data2$matched)] <- 0
#remove unused dataset
rm(comparison_data)


glm_compare<-glm(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz  + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER  + TOTAL_ENROLLMENTz  + HISP_INST_STAFFPERCz + StateSupportz+ (GRADRATE_NHWHITEz*STU_FINAIDz) + matched, data =comparison_data2)
summary(glm_compare)
## 
## Call:
## glm(formula = GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + 
##     PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz + STU_FINAIDz + 
##     DISTANCE_EDU + POST_GRAD_OFFER + TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + 
##     StateSupportz + (GRADRATE_NHWHITEz * STU_FINAIDz) + matched, 
##     data = comparison_data2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.5829  -0.2582  -0.0255   0.2553   2.5607  
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.27974    0.30636   0.913  0.36191    
## HSI_GRANT                     -0.02563    0.06890  -0.372  0.71012    
## GRADRATE_NHWHITEz              0.58072    0.04599  12.626  < 2e-16 ***
## PERC_PARTTIMEENROLLz          -0.19969    0.06162  -3.241  0.00132 ** 
## TYPEPub 2yr                   -0.10339    0.15137  -0.683  0.49513    
## TYPEPub 4yr                   -0.10903    0.13051  -0.835  0.40416    
## PELL_PERCz                     0.10555    0.05008   2.108  0.03587 *  
## STU_FINAIDz                   -0.11393    0.05335  -2.135  0.03352 *  
## DISTANCE_EDU                  -0.14788    0.15573  -0.950  0.34307    
## POST_GRAD_OFFER                0.29280    0.12106   2.419  0.01616 *  
## TOTAL_ENROLLMENTz             -0.02407    0.03763  -0.640  0.52290    
## HISP_INST_STAFFPERCz           0.09799    0.03416   2.868  0.00441 ** 
## StateSupportz                  0.10153    0.03753   2.705  0.00721 ** 
## matched                        0.05916    0.10979   0.539  0.59035    
## GRADRATE_NHWHITEz:STU_FINAIDz  0.01714    0.03324   0.516  0.60651    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.3109252)
## 
##     Null deviance: 310.943  on 319  degrees of freedom
## Residual deviance:  94.832  on 305  degrees of freedom
## AIC: 550.93
## 
## Number of Fisher Scoring iterations: 2
# Estimating treatment effects with original dataset
with(comparison_data2, t.test(GRADRATE_HISPz~ HSI_GRANT))
## 
##  Welch Two Sample t-test
## 
## data:  GRADRATE_HISPz by HSI_GRANT
## t = 1.1051, df = 312.96, p-value = 0.27
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.09315982  0.33190720
## sample estimates:
## mean in group 0 mean in group 1 
##      0.04022496     -0.07914873
lm_treat2 <- lm(GRADRATE_HISPz~ HSI_GRANT, data = comparison_data2)
summary(lm_treat2)
## 
## Call:
## lm(formula = GRADRATE_HISPz ~ HSI_GRANT, data = comparison_data2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8894 -0.7042 -0.2461  0.5956  3.0774 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.04022    0.07218   0.557    0.578
## HSI_GRANT   -0.11937    0.11196  -1.066    0.287
## 
## Residual standard error: 0.9871 on 318 degrees of freedom
## Multiple R-squared:  0.003562,   Adjusted R-squared:  0.0004284 
## F-statistic: 1.137 on 1 and 318 DF,  p-value: 0.2872
exp(coef(glm_compare))
##                   (Intercept)                     HSI_GRANT 
##                     1.3227870                     0.9746936 
##             GRADRATE_NHWHITEz          PERC_PARTTIMEENROLLz 
##                     1.7873231                     0.8189857 
##                   TYPEPub 2yr                   TYPEPub 4yr 
##                     0.9017774                     0.8967072 
##                    PELL_PERCz                   STU_FINAIDz 
##                     1.1113185                     0.8923198 
##                  DISTANCE_EDU               POST_GRAD_OFFER 
##                     0.8625387                     1.3401736 
##             TOTAL_ENROLLMENTz          HISP_INST_STAFFPERCz 
##                     0.9762163                     1.1029566 
##                 StateSupportz                       matched 
##                     1.1068606                     1.0609492 
## GRADRATE_NHWHITEz:STU_FINAIDz 
##                     1.0172858

This analysis shows the marginal effects of all HSI institutions as observed in the logistic regression model. We see that the treatment of having an HSI grant as an institution does not impact the graduation rates of Hispanics.

Step 8a. Estimate Treatment Effects & State Effects using matching data set: dta_m2

library(lme4)

#Test for variation of outcome accross states ALWAYS THE FIRST STEP. F value appears to be higher than 1.
fit0<-lm(GRADRATE_HISPz ~ factor(STATE),  data=dta_m2)
anova(fit0)
## Analysis of Variance Table
## 
## Response: GRADRATE_HISPz
##                Df  Sum Sq Mean Sq F value Pr(>F)  
## factor(STATE)  15  24.501 1.63343  1.9956 0.0161 *
## Residuals     250 204.629 0.81852                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Simple model no covariates using matched dataset
fit1<-lmer(GRADRATE_HISPz ~ 1 + (1|STATE), data=dta_m2)
summary(fit1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: GRADRATE_HISPz ~ 1 + (1 | STATE)
##    Data: dta_m2
## 
## REML criterion at convergence: 717.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7867 -0.7561 -0.2921  0.5511  3.4149 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATE    (Intercept) 0.05734  0.2395  
##  Residual             0.83448  0.9135  
## Number of obs: 266, groups:  STATE, 16
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.1824     0.1000  -1.823
#Model with Covariates
fit2<-lmer(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz  + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER  + TOTAL_ENROLLMENTz  + HISP_INST_STAFFPERCz + StateSupportz + (GRADRATE_NHWHITEz*STU_FINAIDz) + (1|STATE), data=dta_m2)
summary(fit2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz +  
##     TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER +  
##     TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz +  
##     (GRADRATE_NHWHITEz * STU_FINAIDz) + (1 | STATE)
##    Data: dta_m2
## 
## REML criterion at convergence: 347.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.1823 -0.5511 -0.0296  0.4905  4.1317 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATE    (Intercept) 0.03854  0.1963  
##  Residual             0.17335  0.4164  
## Number of obs: 266, groups:  STATE, 16
## 
## Fixed effects:
##                               Estimate Std. Error t value
## (Intercept)                    0.66005    0.32026   2.061
## HSI_GRANT                     -0.04002    0.05217  -0.767
## GRADRATE_NHWHITEz              0.53547    0.04329  12.368
## PERC_PARTTIMEENROLLz          -0.27993    0.06202  -4.514
## TYPEPub 2yr                    0.02550    0.14650   0.174
## TYPEPub 4yr                   -0.01226    0.12423  -0.099
## PELL_PERCz                     0.01257    0.04846   0.259
## STU_FINAIDz                   -0.04874    0.04936  -0.988
## DISTANCE_EDU                  -0.31002    0.17287  -1.793
## POST_GRAD_OFFER                0.26907    0.12020   2.239
## TOTAL_ENROLLMENTz             -0.05446    0.03472  -1.569
## HISP_INST_STAFFPERCz           0.02728    0.03029   0.901
## StateSupportz                  0.12884    0.08370   1.539
## GRADRATE_NHWHITEz:STU_FINAIDz  0.11888    0.03038   3.914
## 
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

Step 8a. Estimate Treatment Effects & State Effects using original data set: ipeds4

#Test for variation with the original dataset of the outcome accross states. F value appears to be higher than 1.
fit3<-lm(GRADRATE_HISPz ~ factor(STATE),  data=comparison_data2)
anova(fit3)
## Analysis of Variance Table
## 
## Response: GRADRATE_HISPz
##                Df  Sum Sq Mean Sq F value  Pr(>F)  
## factor(STATE)  17  26.907 1.58278  1.6829 0.04489 *
## Residuals     302 284.036 0.94052                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Simple model no covariates with the original dataset
fit4<-lmer(GRADRATE_HISPz ~ 1 + (1|STATE), data=comparison_data2)
summary(fit4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: GRADRATE_HISPz ~ 1 + (1 | STATE)
##    Data: comparison_data2
## 
## REML criterion at convergence: 901.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9575 -0.7699 -0.2857  0.6354  3.1476 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATE    (Intercept) 0.05044  0.2246  
##  Residual             0.94656  0.9729  
## Number of obs: 320, groups:  STATE, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.10966    0.09334  -1.175
#Model with Covariates
fit5<-lmer(GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz + TYPE + PELL_PERCz  + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER  + TOTAL_ENROLLMENTz  + HISP_INST_STAFFPERCz + StateSupportz + (GRADRATE_NHWHITEz*STU_FINAIDz) + matched + (1|STATE), data=comparison_data2)
summary(fit5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## GRADRATE_HISPz ~ HSI_GRANT + GRADRATE_NHWHITEz + PERC_PARTTIMEENROLLz +  
##     TYPE + PELL_PERCz + STU_FINAIDz + DISTANCE_EDU + POST_GRAD_OFFER +  
##     TOTAL_ENROLLMENTz + HISP_INST_STAFFPERCz + StateSupportz +  
##     (GRADRATE_NHWHITEz * STU_FINAIDz) + matched + (1 | STATE)
##    Data: comparison_data2
## 
## REML criterion at convergence: 577.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.5324 -0.4707 -0.0402  0.4574  4.6958 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  STATE    (Intercept) 0.01733  0.1317  
##  Residual             0.30315  0.5506  
## Number of obs: 320, groups:  STATE, 18
## 
## Fixed effects:
##                               Estimate Std. Error t value
## (Intercept)                    0.26485    0.31561   0.839
## HSI_GRANT                     -0.03014    0.06841  -0.441
## GRADRATE_NHWHITEz              0.57494    0.04647  12.371
## PERC_PARTTIMEENROLLz          -0.22424    0.06333  -3.541
## TYPEPub 2yr                   -0.03088    0.15857  -0.195
## TYPEPub 4yr                   -0.08880    0.13518  -0.657
## PELL_PERCz                     0.10373    0.05102   2.033
## STU_FINAIDz                   -0.10699    0.05447  -1.964
## DISTANCE_EDU                  -0.17808    0.16171  -1.101
## POST_GRAD_OFFER                0.30196    0.12206   2.474
## TOTAL_ENROLLMENTz             -0.02343    0.03908  -0.600
## HISP_INST_STAFFPERCz           0.08492    0.03509   2.420
## StateSupportz                  0.11169    0.06512   1.715
## matched                        0.05578    0.10945   0.510
## GRADRATE_NHWHITEz:STU_FINAIDz  0.01338    0.03367   0.397
## 
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

#GENERATE MAP WITH LOCATIONS OF HSI SCHOOLS AND PERCENTAGE OF HISPANICS BY STATE

#latlong<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Final Project/Data/lisofallschoolswithlatlong.csv")
#latlong2<-left_join(hsi, latlong, by="unitid")
#write.csv(latlong2,"C:/Users/PCMcC/Documents/Causal Inference/Final Project/map/locations")
#library(dplyr)
##census_api_key(key="95a91ddea073de2c8d473e622e3cb99007afdfee", install=T)

#MAPPING FOR PRESENTATION: OVERLAP OF DISTRIBUTION OF HSIs and DISTRIBUTION OF HISPANIC POPULATION IN BY STATE IN THE US Extract from ACS summary file data profile variables from 2015 for all states The data profile tables are very useful because they contain lots of pre-calculated variables.

#allstates_acs<-get_acs(geography = "state", year = 2017,
 #               variables=c( "DP05_0071P"),
  #              summary_var = "B01001_001",
   #             geometry = T, output = "wide")

#create a state FIPS code - 5 digit
#allstates_acs$state<-substr(allstates_acs$GEOID, 1, 5)

#SAVE AS SHAPEFILE

#library(sf)
#st_write(allstates_acs,dsn = "C:/Users/PCMcC/Documents/Causal Inference/Final Project/map", layer = "hsi_institutions_location", driver = "ESRI Shapefile", delete_layer = FALSE )