library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(lubridate)
library(dplyr)
library(ggplot2)
zipcode <- read_excel("Zipcode Data.xlsx")
zipcode_income <- zipcode %>% select(er, wmhi, hmhi, bmhi) %>% na.omit(.)
zipcode_race <-zipcode %>% select (er, pwe, phe, pbe) %>% na.omit(.)
zipcode_model <- lm(er~wmhi+hmhi+bmhi, data=zipcode)
summary(zipcode_model)
##
## Call:
## lm(formula = er ~ wmhi + hmhi + bmhi, data = zipcode)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.73958 -0.36028 -0.09582 0.39932 2.58900
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.973e-01 3.953e-01 -0.752 0.45638
## wmhi 1.800e-05 5.405e-06 3.331 0.00187 **
## hmhi 6.271e-06 6.785e-06 0.924 0.36094
## bmhi -3.138e-06 4.505e-06 -0.697 0.49005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7823 on 40 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.366, Adjusted R-squared: 0.3185
## F-statistic: 7.698 on 3 and 40 DF, p-value: 0.0003557
plot(zipcode_model)




ggplot(zipcode_income,aes(x=er,y=wmhi)) + geom_point() + geom_smooth(method='lm',color='blue') + ggtitle("Enrollment rate vs. White Median Household Income",subtitle = "With linear regression line") + geom_segment(aes(x=er,xend=er,y=wmhi,yend=wmhi),linetype = 'dashed')
## `geom_smooth()` using formula = 'y ~ x'

zipcode_df <- lm(er~pwe+phe+pbe, data=zipcode)
summary(zipcode_df)
##
## Call:
## lm(formula = er ~ pwe + phe + pbe, data = zipcode)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3170 -0.6221 -0.2747 0.4309 3.2771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8536 0.2321 3.677 0.000595 ***
## pwe 8.4174 3.4636 2.430 0.018876 *
## phe 9.5767 4.1391 2.314 0.025010 *
## pbe 3.4664 2.5376 1.366 0.178301
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9833 on 48 degrees of freedom
## Multiple R-squared: 0.2224, Adjusted R-squared: 0.1738
## F-statistic: 4.576 on 3 and 48 DF, p-value: 0.006745
plot(zipcode_df)




ggplot(zipcode_df,aes(x=er,y=pwe+phe)) + geom_point() + geom_smooth(method='lm',color='blue') + ggtitle("Enrollment rate vs. Percent White and Percent Hispanic Enrolled",subtitle = "With linear regression line") + geom_segment(aes(x=er,xend=er,y=pwe+phe,yend=pwe+phe),linetype = 'dashed')
## `geom_smooth()` using formula = 'y ~ x'
