library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
enrollment <- read_excel("unenrolled_data.xlsx")

Dependent variable: enrollment (individual enrolls, 1) (individuals doesn’t enroll, 0) Independent variables: regions (NW=1, N=2, NE=3, C=4, W=5, S=6, E=7), race (hispanic=1, non=0)

enrollment_race <- enrollment %>% mutate(hispanic=ifelse(grepl("hispanic",race),1,0))
enrollment_model <- lm(enrollee~region_code, data=enrollment_race)
summary(enrollment_model)
## 
## Call:
## lm(formula = enrollee ~ region_code, data = enrollment_race)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8175  0.1825  0.1946  0.2189  0.2553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.82967    0.04114  20.169   <2e-16 ***
## region_code -0.01214    0.01215  -0.998    0.319    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4056 on 481 degrees of freedom
## Multiple R-squared:  0.002068,   Adjusted R-squared:  -6.25e-06 
## F-statistic: 0.997 on 1 and 481 DF,  p-value: 0.3185

p-value is not significant at 0.31, adjusted r-square is -6.25e-06

enrollment_2 <- read_excel("enrolled.xlsx")
enrollment_race_2 <- enrollment_2 %>% mutate(hispanic=ifelse(grepl("hispanic",race),1,0))
enrolled_multiple<-lm(hispanic~region_code,data=enrollment_race_2)

summary(enrolled_multiple)
## 
## Call:
## lm(formula = hispanic ~ region_code, data = enrollment_race_2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5153 -0.4452 -0.4101  0.5548  0.5899 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.39253    0.05503   7.133    5e-12 ***
## region_code  0.01755    0.01631   1.076    0.283    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4975 on 380 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.003038,   Adjusted R-squared:  0.0004144 
## F-statistic: 1.158 on 1 and 380 DF,  p-value: 0.2826
plot(enrolled_multiple, which=3)