guns <- read_csv("Guns.csv")
## Rows: 1173 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state, law
## dbl (12): rownames, year, violent, murder, robbery, prisoners, afam, cauc, m...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Clean / Prepare Data
sum(is.na(guns)) # Total number of missing values
## [1] 0
colSums(is.na(guns)) # Number of missing values per column
## rownames year violent murder robbery prisoners afam
## 0 0 0 0 0 0 0
## cauc male population income density state law
## 0 0 0 0 0 0 0
Selecting Vairables of Interest for outlier removal
selected_data <- guns[, c("violent", "income", "density")]
print(selected_data)
## # A tibble: 1,173 × 3
## violent income density
## <dbl> <dbl> <dbl>
## 1 414. 9563. 0.0746
## 2 419. 9932 0.0756
## 3 413. 9877. 0.0762
## 4 448. 9541. 0.0768
## 5 470. 9548. 0.0772
## 6 448. 9479. 0.0773
## 7 416 9783 0.0775
## 8 431. 10357. 0.0778
## 9 458. 10726. 0.0782
## 10 558 11092. 0.0786
## # ℹ 1,163 more rows
# Function to identify outliers using the IQR method
identify_outliers <- function(data, variable) {
Q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
Q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
outliers <- data[data[[variable]] < lower_bound | data[[variable]] > upper_bound, ]
return(outliers)
}
# Function to remove outliers
remove_outliers <- function(data, variable) {
Q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
Q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
clean_data <- data[data[[variable]] >= lower_bound & data[[variable]] <= upper_bound, ]
return(clean_data)
}
# Function to perform sensitivity analysis
sensitivity_analysis <- function(original_data, cleaned_data, variable) {
original_mean <- mean(original_data[[variable]], na.rm = TRUE)
cleaned_mean <- mean(cleaned_data[[variable]], na.rm = TRUE)
original_sd <- sd(original_data[[variable]], na.rm = TRUE)
cleaned_sd <- sd(cleaned_data[[variable]], na.rm = TRUE)
impact <- data.frame(
Metric = c("Mean", "Standard Deviation"),
Original = c(original_mean, original_sd),
Cleaned = c(cleaned_mean, cleaned_sd)
)
return(impact)
}
# Apply functions to each selected variable
for (variable in names(selected_data)) {
cat("\nAnalyzing variable:", variable, "\n")
# Identify outliers
outliers <- identify_outliers(selected_data, variable)
cat("Number of outliers identified:", nrow(outliers), "\n")
# Remove outliers
cleaned_data <- remove_outliers(selected_data, variable)
# Perform sensitivity analysis
impact <- sensitivity_analysis(selected_data, cleaned_data, variable)
print(impact)
# Optionally visualize the distribution
p1 <- ggplot(selected_data, aes_string(x = variable)) +
geom_histogram(binwidth = 1, fill = "blue", alpha = 0.5, boundary = 0) +
labs(title = paste("Distribution of", variable, "with outliers"),
x = variable, y = "Count") +
theme_minimal()
p2 <- ggplot(cleaned_data, aes_string(x = variable)) +
geom_histogram(binwidth = 1, fill = "red", alpha = 0.5, boundary = 0) +
labs(title = paste("Distribution of", variable, "without outliers"),
x = variable, y = "Count") +
theme_minimal()
# Save plots
ggsave(paste0(variable, "_with_outliers.png"), plot = p1)
ggsave(paste0(variable, "_without_outliers.png"), plot = p2)
}
##
## Analyzing variable: violent
## Number of outliers identified: 26
## Metric Original Cleaned
## 1 Mean 503.0747 470.2028
## 2 Standard Deviation 334.2772 244.3601
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Saving 7 x 5 in image
## Saving 7 x 5 in image
##
## Analyzing variable: income
## Number of outliers identified: 25
## Metric Original Cleaned
## 1 Mean 13724.796 13557.084
## 2 Standard Deviation 2554.542 2307.462
## Saving 7 x 5 in image
## Saving 7 x 5 in image
##
## Analyzing variable: density
## Number of outliers identified: 138
## Metric Original Cleaned
## 1 Mean 0.3520382 0.09607251
## 2 Standard Deviation 1.3554719 0.08987374
## Saving 7 x 5 in image
## Saving 7 x 5 in image
Create new year_seps variable
# Create a new variable 'year_sep' to group years into intervals
guns <- guns %>%
mutate(year_sep = cut(year, breaks = seq(min(year, na.rm = TRUE), max(year, na.rm = TRUE), by = 3),
include.lowest = TRUE, right = FALSE))
guns <- guns %>%
filter(!is.na(year_sep))
# Check the new variable
table(guns$year_sep)
##
## [1977,1980) [1980,1983) [1983,1986) [1986,1989) [1989,1992) [1992,1995)
## 153 153 153 153 153 153
## [1995,1998]
## 204
Variables of Interest
# Select specific columns by name
guns_main <- guns[, c("law", "income", "density", "male", "violent")]
Descriptive Statistics
guns_main_renamed <- guns_main %>%
rename(
"Shall Carry Law" = law,
"Income Per Capita" = income,
"Population Density" = density,
"Male Population % 10-29" = male,
"Violent Crime Rate" = violent
)
datasummary_skim(guns_main_renamed)
tinytable_7p3da0gr7ks4jhk4qcqk
| |
Unique |
Missing Pct. |
Mean |
SD |
Min |
Median |
Max |
Histogram |
| Income Per Capita |
1120 |
0 |
13601.5 |
2483.8 |
8554.9 |
13284.5 |
23133.1 |
 |
| Population Density |
1122 |
0 |
0.4 |
1.4 |
0.0 |
0.1 |
11.1 |
 |
| Male Population % 10-29 |
1122 |
0 |
16.2 |
1.7 |
12.4 |
16.0 |
22.4 |
 |
| Violent Crime Rate |
1056 |
0 |
505.1 |
337.1 |
47.0 |
444.2 |
2921.8 |
 |
| Shall Carry Law |
N |
% |
|
|
|
|
|
|
| no |
866 |
77.2 |
|
|
|
|
|
|
| yes |
256 |
22.8 |
|
|
|
|
|
|
print(datasummary_skim(guns_main_renamed))
##
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | | Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram |
## +=========================+========+==============+=========+========+========+=========+=========+==================================================+
## | Income Per Capita | 1120 | 0 | 13601.5 | 2483.8 | 8554.9 | 13284.5 | 23133.1 |  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Population Density | 1122 | 0 | 0.4 | 1.4 | 0.0 | 0.1 | 11.1 |  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Male Population % 10-29 | 1122 | 0 | 16.2 | 1.7 | 12.4 | 16.0 | 22.4 |  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Violent Crime Rate | 1056 | 0 | 505.1 | 337.1 | 47.0 | 444.2 | 2921.8 |  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Shall Carry Law | N | % | | | | | | |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | no | 866 | 77.2 | | | | | | |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | yes | 256 | 22.8 | | | | | | |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
# Select specific columns by name
guns_main <- guns[, c("law", "income", "density", "male", "violent")]
Interaction plot between Violent Crime Rate and Year by Shall Carry
Law
interaction_plot <- ggplot(guns, aes(x = year, y = violent, color = as.factor(law), group = law)) +
#geom_point(alpha = 0.1) +
geom_smooth(method = "loess", se = FALSE) +
labs(title = "Figure 1: Interaction between Violent Crime Rate and Year by Shall Carry Law",
x = "Year",
y = "Violent Crime Rate per 100,000",
color = "Shall Carry Law") +
theme_minimal() +
scale_color_manual(values = c("blue", "red"), labels = c("No", "Yes"))
print(interaction_plot)
## `geom_smooth()` using formula = 'y ~ x'

Bar chart Violent Crime Rate and Year by Shall Carry Law
# Load necessary libraries
library(tidyverse)
# Assuming your data frame is named `guns` and contains the variables `violent`, `year_sep`, and `law`
# Create the bar chart
ggplot(guns, aes(x = year_sep, y = violent, fill = law)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Figure 2: Violent Crime Rate and Year Group by Shall Carry Law",
x = "Year Group",
y = "Violent Crime Rate per 100,000",
fill = "Shall Carry Law") +
theme_classic()

Initial Regression Analysis
Fitting Models
# Fit the simple regression model
model1 <- lm(violent ~ law, data = guns)
# Fit the multiple predictor regression model
model2 <- lm(violent ~ law + income + density + male, data = guns)
# Summarize the models
summary(model1)
##
## Call:
## lm(formula = violent ~ law, data = guns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -495.67 -229.16 -64.97 133.54 2379.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 542.67 11.22 48.380 < 2e-16 ***
## lawyes -164.66 23.48 -7.012 4.05e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 330.1 on 1120 degrees of freedom
## Multiple R-squared: 0.04206, Adjusted R-squared: 0.0412
## F-statistic: 49.17 on 1 and 1120 DF, p-value: 4.048e-12
summary(model2)
##
## Call:
## lm(formula = violent ~ law + income + density + male, data = guns)
##
## Residuals:
## Min 1Q Median 3Q Max
## -669.12 -188.85 -27.94 156.52 841.62
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.905e+02 1.158e+02 4.235 2.47e-05 ***
## lawyes -1.350e+02 1.770e+01 -7.625 5.21e-14 ***
## income 2.370e-02 3.556e-03 6.665 4.16e-11 ***
## density 1.437e+02 5.568e+00 25.804 < 2e-16 ***
## male -2.026e+01 5.051e+00 -4.011 6.44e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 235.3 on 1117 degrees of freedom
## Multiple R-squared: 0.5147, Adjusted R-squared: 0.513
## F-statistic: 296.2 on 4 and 1117 DF, p-value: < 2.2e-16
# Create detailed regression tables using sjPlot
tab_model(model1, model2,
show.ci = FALSE, # Show confidence intervals
show.se = TRUE, # Show standard errors
show.stat = TRUE, # Show statistics (t-value)
show.p = TRUE, # Show p-values
title = "Regression Analysis of Violent Crime Rates",
dv.labels = c("Simple Model", "Multiple Predictor Model"),
pred.labels = c("(Intercept)",
"Shall Carry Law Instituted",
"Income Per Capita",
"Population Density",
"Male Population % 10-29"),
string.ci = "Conf. Int.",
string.se = "Std. Error",
string.stat = "t value",
string.p = "p value")
Regression Analysis of Violent Crime Rates
|
|
Simple Model
|
Multiple Predictor Model
|
|
Predictors
|
Estimates
|
Std. Error
|
t value
|
p value
|
Estimates
|
Std. Error
|
t value
|
p value
|
|
(Intercept)
|
542.67
|
11.22
|
48.38
|
<0.001
|
490.47
|
115.81
|
4.24
|
<0.001
|
|
Shall Carry Law Instituted
|
-164.66
|
23.48
|
-7.01
|
<0.001
|
-134.99
|
17.70
|
-7.62
|
<0.001
|
|
Income Per Capita
|
|
|
|
|
0.02
|
0.00
|
6.66
|
<0.001
|
|
Population Density
|
|
|
|
|
143.67
|
5.57
|
25.80
|
<0.001
|
|
Male Population % 10-29
|
|
|
|
|
-20.26
|
5.05
|
-4.01
|
<0.001
|
|
Observations
|
1122
|
1122
|
|
R2 / R2 adjusted
|
0.042 / 0.041
|
0.515 / 0.513
|
#Appendix ChatGPT4 was used as a learning aid in this assingment to
troubleshoot error messages and give tips on code.