library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(tidyverse)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(readxl)
getwd()
## [1] "/Users/karinadominguez/Desktop/PAD - Applied Quanitative Methods"
read_excel("NewCapstone1.xlsx")
## # A tibble: 254 × 7
## County `Total Population` `GDP 2023` Percentage Total Pove…¹ `Total Crime`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Anderson 57900 2709645 12.4 399
## 2 Andrews 18791 5201771 11.1 272
## 3 Angelina 87634 4310685 11.3 573
## 4 Aransas 25181 942655 14.4 297
## 5 Archer 9159 409437 6.6 168
## 6 Armstrong 1830 79368 5.5 12
## 7 Atascosa 50942 4330189 13.5 334
## 8 Austin 32095 2213481 12.8 453
## 9 Bailey 6777 595427 4.6 1725
## 10 Bandera 22448 520256 9.5 85
## # ℹ 244 more rows
## # ℹ abbreviated name: ¹`Percentage Total Poverty`
## # ℹ 2 more variables: `Unemployment Rate` <dbl>,
## # `Percentage With Bachelor's Degree` <dbl>
capstone_data1 <- read_excel("NewCapstone1.xlsx")
cor(capstone_data1$`GDP 2023`,
capstone_data1$`Total Crime`,
method = "pearson",
use = "complete.obs")
## [1] 0.6984111
summary(capstone_data1)
## County Total Population GDP 2023
## Length:254 Min. : 52 Min. : 59177
## Class :character 1st Qu.: 6189 1st Qu.: 485518
## Mode :character Median : 18878 Median : 1428522
## Mean : 120965 Mean : 10382825
## 3rd Qu.: 54594 3rd Qu.: 4083550
## Max. :4870713 Max. :563988106
##
## Percentage Total Poverty Total Crime Unemployment Rate
## Min. : 0.00 Min. : 0.0 Min. :0.300
## 1st Qu.: 7.60 1st Qu.: 108.0 1st Qu.:3.300
## Median :10.80 Median : 267.0 Median :3.700
## Mean :11.32 Mean : 552.4 Mean :3.935
## 3rd Qu.:13.97 3rd Qu.: 583.0 3rd Qu.:4.400
## Max. :36.80 Max. :7928.0 Max. :9.300
## NA's :1
## Percentage With Bachelor's Degree
## Min. : 0.00
## 1st Qu.:15.60
## Median :19.70
## Mean :21.46
## 3rd Qu.:25.20
## Max. :55.50
##
model <- lm(`GDP 2023` ~ `Total Crime` + `Unemployment Rate` +
`Percentage With Bachelor's Degree` +
`Percentage Total Poverty`,
data = capstone_data1)
summary(model)
##
## Call:
## lm(formula = `GDP 2023` ~ `Total Crime` + `Unemployment Rate` +
## `Percentage With Bachelor's Degree` + `Percentage Total Poverty`,
## data = capstone_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -124459986 -6450115 3009080 9819724 282220694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18123107 11314044 -1.602 0.11047
## `Total Crime` 35554 2452 14.497 < 2e-16 ***
## `Unemployment Rate` -2376097 2142070 -1.109 0.26840
## `Percentage With Bachelor's Degree` 804905 263816 3.051 0.00253 **
## `Percentage Total Poverty` 87054 391185 0.223 0.82408
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32830000 on 248 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5153, Adjusted R-squared: 0.5075
## F-statistic: 65.92 on 4 and 248 DF, p-value: < 2.2e-16
capstone_data1$GDP_per_capita <- capstone_data1$`GDP 2023` / capstone_data1$`Total Population`
capstone_data1$Crime_per_capita <- capstone_data1$`Total Crime` / capstone_data1$`Total Population`
capstone_data1$Crime_per_100k <-
(capstone_data1$`Total Crime` / capstone_data1$`Total Population`) * 100000
summary(capstone_data1$GDP_per_capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.52 45.87 62.78 979.50 108.74 206247.61
summary(capstone_data1$Crime_per_capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000000 0.004287 0.010790 0.030459 0.024666 0.530710 1
model_pc <- lm(GDP_per_capita ~ Crime_per_capita + `Unemployment Rate` +
`Percentage With Bachelor's Degree` +
`Percentage Total Poverty`,
data = capstone_data1)
summary(model_pc)
##
## Call:
## lm(formula = GDP_per_capita ~ Crime_per_capita + `Unemployment Rate` +
## `Percentage With Bachelor's Degree` + `Percentage Total Poverty`,
## data = capstone_data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10500 -3039 -1130 1282 183767
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23235.70 4311.01 5.390 1.64e-07 ***
## Crime_per_capita 1376.39 13000.43 0.106 0.915769
## `Unemployment Rate` -2868.57 797.74 -3.596 0.000390 ***
## `Percentage With Bachelor's Degree` -378.04 96.81 -3.905 0.000121 ***
## `Percentage Total Poverty` -254.50 147.10 -1.730 0.084858 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12360 on 248 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1056, Adjusted R-squared: 0.09119
## F-statistic: 7.322 on 4 and 248 DF, p-value: 1.374e-05
summary(capstone_data1$GDP_per_capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.52 45.87 62.78 979.50 108.74 206247.61
summary(capstone_data1$Crime_per_capita)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000000 0.004287 0.010790 0.030459 0.024666 0.530710 1
sum(capstone_data1$Crime_per_capita == 0, na.rm = TRUE)
## [1] 1
clean_data <- capstone_data1[
capstone_data1$GDP_per_capita > 0 &
capstone_data1$Crime_per_capita > 0, ]
model_log <- lm(log(GDP_per_capita) ~ log(Crime_per_capita) +
`Unemployment Rate` +
`Percentage With Bachelor's Degree` +
`Percentage Total Poverty`,
data = clean_data)
clean_data <- capstone_data1[
capstone_data1$GDP_per_capita > 0 &
capstone_data1$Crime_per_capita > 0, ]
model_log <- lm(log(GDP_per_capita) ~ log(Crime_per_capita) +
`Unemployment Rate` +
`Percentage With Bachelor's Degree` +
`Percentage Total Poverty`,
data = clean_data)
summary(model_log)
##
## Call:
## lm(formula = log(GDP_per_capita) ~ log(Crime_per_capita) + `Unemployment Rate` +
## `Percentage With Bachelor's Degree` + `Percentage Total Poverty`,
## data = clean_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6548 -0.5791 -0.1691 0.3590 5.5984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.969824 0.349038 19.969 < 2e-16 ***
## log(Crime_per_capita) 0.089250 0.049220 1.813 0.071000 .
## `Unemployment Rate` -0.341503 0.064021 -5.334 2.17e-07 ***
## `Percentage With Bachelor's Degree` -0.027671 0.008067 -3.430 0.000707 ***
## `Percentage Total Poverty` -0.021210 0.011602 -1.828 0.068736 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9628 on 247 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1905, Adjusted R-squared: 0.1774
## F-statistic: 14.53 on 4 and 247 DF, p-value: 1.14e-10
library(car)
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
vif(model_log)
## log(Crime_per_capita) `Unemployment Rate`
## 1.178246 1.238191
## `Percentage With Bachelor's Degree` `Percentage Total Poverty`
## 1.337304 1.294463
cor.test(log(clean_data$GDP_per_capita),
log(clean_data$Crime_per_capita))
##
## Pearson's product-moment correlation
##
## data: log(clean_data$GDP_per_capita) and log(clean_data$Crime_per_capita)
## t = 3.269, df = 250, p-value = 0.001231
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0809188 0.3180817
## sample estimates:
## cor
## 0.2024671
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(capstone_data1, type = "text", summary = TRUE)
##
## =================================
## Statistic N Mean St. Dev. Min Max
## =================================
model_log <- lm(
log(GDP_per_capita + 1) ~ log(Crime_per_capita + 1) +
`Unemployment Rate` +
`Percentage With Bachelor's Degree` +
`Percentage Total Poverty`,
data = capstone_data1
)
capstone_clean <- na.omit(capstone_data1)
model_log <- lm(
log(GDP_per_capita + 1) ~ log(Crime_per_capita + 1) +
`Unemployment Rate` +
`Percentage With Bachelor's Degree` +
`Percentage Total Poverty`,
data = capstone_clean
)
library(lmtest)
bptest(model_log)
##
## studentized Breusch-Pagan test
##
## data: model_log
## BP = 13.435, df = 4, p-value = 0.009334
library(sandwich)
library(lmtest)
coeftest(model_log, vcov = vcovHC(model_log, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.723783 0.721286 9.3219 < 2.2e-16 ***
## log(Crime_per_capita + 1) 0.702643 1.058800 0.6636 0.507549
## `Unemployment Rate` -0.370138 0.128326 -2.8844 0.004267 **
## `Percentage With Bachelor's Degree` -0.031058 0.012645 -2.4563 0.014725 *
## `Percentage Total Poverty` -0.018067 0.015039 -1.2014 0.230760
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
raintest(model_log)
##
## Rainbow test
##
## data: model_log
## Rain = 0.482, df1 = 127, df2 = 121, p-value = 1