library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)
library(dplyr)
library(ggrepel)

#1
project_data <- read_excel("texas federal funds.xlsx")

older_data <- project_data[, c(1, 236, 436, 243, 822)]
# =Column(PT1) ms excel formula
# 236 = ENVIRONMENTAL HEALTH // 436 = MATERNAL AND CHILD HEALTH SERVICES BLOCK GRANT TO THE STATES // 243 = EVEN START - STATE EDUCATIONAL AGENCIES // 834 = TITLE I GRANTS TO LOCAL EDUCATION AGENCIES

colnames(older_data) <- c("Time", "Environmental", "MaternalChild", "EvenStart", "Title1") 

older_data[,2:4] <- lapply(older_data[,2:4], as.numeric) 


#removing NA values
older_data <- na.omit(older_data) 

older_data$Environmental <- as.numeric(as.character(older_data$Environmental)) 

older_data$MaternalChild <- as.numeric(as.character(older_data$MaternalChild)) 

older_data$EvenStart <- as.numeric(as.character(older_data$EvenStart)) 

older_data$Title1 <- as.numeric(as.character(older_data$Title1)) 
project_data_log<-older_data %>% mutate(LOG_ENV=log(Environmental)) %>% mutate(LOG_MC=log(MaternalChild)) %>% mutate(LOG_ES=log(EvenStart)) %>% mutate(LOG_T1=log(Title1)) %>% select(Time,LOG_ENV,LOG_MC,LOG_ES,LOG_T1)

head(project_data_log)
## # A tibble: 6 × 5
##   Time  LOG_ENV LOG_MC LOG_ES LOG_T1
##   <chr>   <dbl>  <dbl>  <dbl>  <dbl>
## 1 1996     13.6   17.4   15.9   19.9
## 2 1998     13.1   17.4   16.1   20.4
## 3 1999     13.6   17.4   16.2   18.6
## 4 2000     13.5   17.4   16.3   18.7
## 5 2001     14.7   17.4   16.7   20.4
## 6 2002     14.7   18.2   16.7   20.6
ggplot(project_data_log,aes(x=as.numeric(Time))) + geom_histogram(bins=4) 

data_lm<-lm(LOG_ENV~LOG_MC+LOG_ES+LOG_T1,data=project_data_log)
summary(data_lm)
## 
## Call:
## lm(formula = LOG_ENV ~ LOG_MC + LOG_ES + LOG_T1, data = project_data_log)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37353 -0.00974  0.12679  0.24752  0.48862 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.939522  13.181175  -0.223  0.82851   
## LOG_MC       0.194886   0.874997   0.223  0.82872   
## LOG_ES       0.000745   0.405280   0.002  0.99857   
## LOG_T1       0.686572   0.199819   3.436  0.00744 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5594 on 9 degrees of freedom
## Multiple R-squared:  0.5751, Adjusted R-squared:  0.4335 
## F-statistic: 4.061 on 3 and 9 DF,  p-value: 0.04433

A 1% increase in Title 1 funding is associated with a 0.68% increase in Environmental Health funding. These three variables together explain 43.35% of LOG_ENV funding pattern.

data_lm<-lm(LOG_ENV~LOG_T1,data=project_data_log)
summary(data_lm)
## 
## Call:
## lm(formula = LOG_ENV ~ LOG_T1, data = project_data_log)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39213  0.02488  0.12332  0.22112  0.46588 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.4245     3.6681   0.116  0.90995   
## LOG_T1        0.6893     0.1799   3.832  0.00279 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.508 on 11 degrees of freedom
## Multiple R-squared:  0.5717, Adjusted R-squared:  0.5327 
## F-statistic: 14.68 on 1 and 11 DF,  p-value: 0.002786

The p-value dropped even lower, decrease of 0.005 When only looking at LOG_T1, it explains 53.27% of LOG_ENV funding pattern. R-squared increased by about 10%.