library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(lubridate)
library(dplyr)
library(ggplot2)
zipcode <- read_excel("Zipcode Data.xlsx")
zipcode_income <- zipcode %>% select(er, wmhi, hmhi, bmhi) %>% na.omit(.)

zipcode_race <-zipcode %>% select (er, pwe, phe, pbe) %>% na.omit(.)
zipcode_model <- lm(er~wmhi+hmhi+bmhi, data=zipcode)
summary(zipcode_model)
## 
## Call:
## lm(formula = er ~ wmhi + hmhi + bmhi, data = zipcode)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.73958 -0.36028 -0.09582  0.39932  2.58900 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.973e-01  3.953e-01  -0.752  0.45638   
## wmhi         1.800e-05  5.405e-06   3.331  0.00187 **
## hmhi         6.271e-06  6.785e-06   0.924  0.36094   
## bmhi        -3.138e-06  4.505e-06  -0.697  0.49005   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7823 on 40 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.366,  Adjusted R-squared:  0.3185 
## F-statistic: 7.698 on 3 and 40 DF,  p-value: 0.0003557
plot(zipcode_model)

ggplot(zipcode_income,aes(x=er,y=wmhi)) + geom_point() + geom_smooth(method='lm',color='blue') + ggtitle("Enrollment rate vs. White Median Household Income",subtitle = "With linear regression line") + geom_segment(aes(x=er,xend=er,y=wmhi,yend=wmhi),linetype = 'dashed') 
## `geom_smooth()` using formula = 'y ~ x'

zipcode_df <- lm(er~pwe+phe+pbe, data=zipcode)
summary(zipcode_df)
## 
## Call:
## lm(formula = er ~ pwe + phe + pbe, data = zipcode)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3170 -0.6221 -0.2747  0.4309  3.2771 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.8536     0.2321   3.677 0.000595 ***
## pwe           8.4174     3.4636   2.430 0.018876 *  
## phe           9.5767     4.1391   2.314 0.025010 *  
## pbe           3.4664     2.5376   1.366 0.178301    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9833 on 48 degrees of freedom
## Multiple R-squared:  0.2224, Adjusted R-squared:  0.1738 
## F-statistic: 4.576 on 3 and 48 DF,  p-value: 0.006745
plot(zipcode_df)

ggplot(zipcode_df,aes(x=er,y=pwe+phe)) + geom_point() + geom_smooth(method='lm',color='blue') + ggtitle("Enrollment rate vs. Percent White and Percent Hispanic Enrolled",subtitle = "With linear regression line") + geom_segment(aes(x=er,xend=er,y=pwe+phe,yend=pwe+phe),linetype = 'dashed') 
## `geom_smooth()` using formula = 'y ~ x'