library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(tidyverse)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(readxl)
read_excel("Next Data1.xls")
## # A tibble: 254 × 12
##    County    `Total Population` `NH-White Population` `NH-Black Population`
##    <chr>                  <dbl>                 <dbl>                 <dbl>
##  1 Anderson               57900                 32356                 11503
##  2 Andrews                18791                  7172                   350
##  3 Angelina               87634                 49650                 13152
##  4 Aransas                25181                 16063                   402
##  5 Archer                  9159                  7820                    43
##  6 Armstrong               1830                  1549                    17
##  7 Atascosa               50942                 16243                   483
##  8 Austin                 32095                 19077                  2569
##  9 Bailey                  6777                  2087                    61
## 10 Bandera                22448                 16239                   275
## # ℹ 244 more rows
## # ℹ 8 more variables: `Hispanic Population` <dbl>, `NH-Asian Population` <dbl>,
## #   `GDP 2023` <dbl>, `Percentage Total Poverty` <dbl>,
## #   `Number of Families Poverty` <dbl>, `Median Family Income $` <chr>,
## #   `Total Crime` <dbl>, `Unemployment Rate` <dbl>
capstone_data <- read_excel("Next Data1.xls")
cor(capstone_data$`GDP 2023`,capstone_data$`Total Crime`, method ="pearson")
## [1] NA
cor(capstone_data$`GDP 2023`, capstone_data$`Total Crime`, 
    method = "pearson", use = "complete.obs")
## [1] 0.6984111
clean_data <- capstone_data %>%
  drop_na(`GDP 2023`, `Total Crime`)

cor(clean_data$`GDP 2023`, clean_data$`Total Crime`)
## [1] 0.6984111
plot(clean_data$`GDP 2023`, clean_data$`Total Crime`)

model<-lm(clean_data$`GDP 2023`~clean_data$`Total Crime`, data=clean_data)
summary(model)
## 
## Call:
## lm(formula = clean_data$`GDP 2023` ~ clean_data$`Total Crime`, 
##     data = clean_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -127646866   -3819317    4594717    8793918  280353972 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -10037888    2489942  -4.031 7.36e-05 ***
## clean_data$`Total Crime`     37042       2396  15.460  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33550000 on 251 degrees of freedom
## Multiple R-squared:  0.4878, Adjusted R-squared:  0.4857 
## F-statistic:   239 on 1 and 251 DF,  p-value: < 2.2e-16