library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)
library(dplyr)
library(ggrepel)
#1
project_data <- read_excel("texas federal funds.xlsx")
older_data <- project_data[, c(1, 236, 436, 243, 822)]
# =Column(PT1) ms excel formula
# 236 = ENVIRONMENTAL HEALTH // 436 = MATERNAL AND CHILD HEALTH SERVICES BLOCK GRANT TO THE STATES // 243 = EVEN START - STATE EDUCATIONAL AGENCIES // 834 = TITLE I GRANTS TO LOCAL EDUCATION AGENCIES
colnames(older_data) <- c("Time", "Environmental", "MaternalChild", "EvenStart", "Title1")
older_data[,2:4] <- lapply(older_data[,2:4], as.numeric)
#removing NA values
older_data <- na.omit(older_data)
older_data$Environmental <- as.numeric(as.character(older_data$Environmental))
older_data$MaternalChild <- as.numeric(as.character(older_data$MaternalChild))
older_data$EvenStart <- as.numeric(as.character(older_data$EvenStart))
older_data$Title1 <- as.numeric(as.character(older_data$Title1))
project_data_log<-older_data %>% mutate(LOG_ENV=log(Environmental)) %>% mutate(LOG_MC=log(MaternalChild)) %>% mutate(LOG_ES=log(EvenStart)) %>% mutate(LOG_T1=log(Title1)) %>% select(Time,LOG_ENV,LOG_MC,LOG_ES,LOG_T1)
head(project_data_log)
## # A tibble: 6 × 5
## Time LOG_ENV LOG_MC LOG_ES LOG_T1
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1996 13.6 17.4 15.9 19.9
## 2 1998 13.1 17.4 16.1 20.4
## 3 1999 13.6 17.4 16.2 18.6
## 4 2000 13.5 17.4 16.3 18.7
## 5 2001 14.7 17.4 16.7 20.4
## 6 2002 14.7 18.2 16.7 20.6
ggplot(project_data_log,aes(x=as.numeric(Time))) + geom_histogram(bins=4)
data_lm<-lm(LOG_ENV~LOG_MC+LOG_ES+LOG_T1,data=project_data_log)
summary(data_lm)
##
## Call:
## lm(formula = LOG_ENV ~ LOG_MC + LOG_ES + LOG_T1, data = project_data_log)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.37353 -0.00974 0.12679 0.24752 0.48862
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.939522 13.181175 -0.223 0.82851
## LOG_MC 0.194886 0.874997 0.223 0.82872
## LOG_ES 0.000745 0.405280 0.002 0.99857
## LOG_T1 0.686572 0.199819 3.436 0.00744 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5594 on 9 degrees of freedom
## Multiple R-squared: 0.5751, Adjusted R-squared: 0.4335
## F-statistic: 4.061 on 3 and 9 DF, p-value: 0.04433
A 1% increase in Title 1 funding is associated with a 0.68% increase in Environmental Health funding. These three variables together explain 43.35% of LOG_ENV funding pattern.
data_lm<-lm(LOG_ENV~LOG_T1,data=project_data_log)
summary(data_lm)
##
## Call:
## lm(formula = LOG_ENV ~ LOG_T1, data = project_data_log)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39213 0.02488 0.12332 0.22112 0.46588
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4245 3.6681 0.116 0.90995
## LOG_T1 0.6893 0.1799 3.832 0.00279 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.508 on 11 degrees of freedom
## Multiple R-squared: 0.5717, Adjusted R-squared: 0.5327
## F-statistic: 14.68 on 1 and 11 DF, p-value: 0.002786
The p-value dropped even lower, decrease of 0.005 When only looking at LOG_T1, it explains 53.27% of LOG_ENV funding pattern. R-squared increased by about 10%.