library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(tidyverse)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(readxl)
read_excel("Next Data1.xls")
## # A tibble: 254 × 12
## County `Total Population` `NH-White Population` `NH-Black Population`
## <chr> <dbl> <dbl> <dbl>
## 1 Anderson 57900 32356 11503
## 2 Andrews 18791 7172 350
## 3 Angelina 87634 49650 13152
## 4 Aransas 25181 16063 402
## 5 Archer 9159 7820 43
## 6 Armstrong 1830 1549 17
## 7 Atascosa 50942 16243 483
## 8 Austin 32095 19077 2569
## 9 Bailey 6777 2087 61
## 10 Bandera 22448 16239 275
## # ℹ 244 more rows
## # ℹ 8 more variables: `Hispanic Population` <dbl>, `NH-Asian Population` <dbl>,
## # `GDP 2023` <dbl>, `Percentage Total Poverty` <dbl>,
## # `Number of Families Poverty` <dbl>, `Median Family Income $` <chr>,
## # `Total Crime` <dbl>, `Unemployment Rate` <dbl>
capstone_data <- read_excel("Next Data1.xls")
cor(capstone_data$`GDP 2023`,capstone_data$`Total Crime`, method ="pearson")
## [1] NA
cor(capstone_data$`GDP 2023`, capstone_data$`Total Crime`,
method = "pearson", use = "complete.obs")
## [1] 0.6984111
clean_data <- capstone_data %>%
drop_na(`GDP 2023`, `Total Crime`)
cor(clean_data$`GDP 2023`, clean_data$`Total Crime`)
## [1] 0.6984111
plot(clean_data$`GDP 2023`, clean_data$`Total Crime`)

model<-lm(clean_data$`GDP 2023`~clean_data$`Total Crime`, data=clean_data)
summary(model)
##
## Call:
## lm(formula = clean_data$`GDP 2023` ~ clean_data$`Total Crime`,
## data = clean_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -127646866 -3819317 4594717 8793918 280353972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10037888 2489942 -4.031 7.36e-05 ***
## clean_data$`Total Crime` 37042 2396 15.460 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33550000 on 251 degrees of freedom
## Multiple R-squared: 0.4878, Adjusted R-squared: 0.4857
## F-statistic: 239 on 1 and 251 DF, p-value: < 2.2e-16