guns <- read_csv("Guns.csv")
## Rows: 1173 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): state, law
## dbl (12): rownames, year, violent, murder, robbery, prisoners, afam, cauc, m...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clean / Prepare Data

sum(is.na(guns))  # Total number of missing values
## [1] 0
colSums(is.na(guns))  # Number of missing values per column
##   rownames       year    violent     murder    robbery  prisoners       afam 
##          0          0          0          0          0          0          0 
##       cauc       male population     income    density      state        law 
##          0          0          0          0          0          0          0

Selecting Vairables of Interest for outlier removal

selected_data <- guns[, c("violent", "income", "density")]
print(selected_data)
## # A tibble: 1,173 × 3
##    violent income density
##      <dbl>  <dbl>   <dbl>
##  1    414.  9563.  0.0746
##  2    419.  9932   0.0756
##  3    413.  9877.  0.0762
##  4    448.  9541.  0.0768
##  5    470.  9548.  0.0772
##  6    448.  9479.  0.0773
##  7    416   9783   0.0775
##  8    431. 10357.  0.0778
##  9    458. 10726.  0.0782
## 10    558  11092.  0.0786
## # ℹ 1,163 more rows
# Function to identify outliers using the IQR method
identify_outliers <- function(data, variable) {
  Q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
  Q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  outliers <- data[data[[variable]] < lower_bound | data[[variable]] > upper_bound, ]
  return(outliers)
}

# Function to remove outliers
remove_outliers <- function(data, variable) {
  Q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
  Q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  clean_data <- data[data[[variable]] >= lower_bound & data[[variable]] <= upper_bound, ]
  return(clean_data)
}

# Function to perform sensitivity analysis
sensitivity_analysis <- function(original_data, cleaned_data, variable) {
  original_mean <- mean(original_data[[variable]], na.rm = TRUE)
  cleaned_mean <- mean(cleaned_data[[variable]], na.rm = TRUE)
  original_sd <- sd(original_data[[variable]], na.rm = TRUE)
  cleaned_sd <- sd(cleaned_data[[variable]], na.rm = TRUE)
  impact <- data.frame(
    Metric = c("Mean", "Standard Deviation"),
    Original = c(original_mean, original_sd),
    Cleaned = c(cleaned_mean, cleaned_sd)
  )
  return(impact)
}

# Apply functions to each selected variable
for (variable in names(selected_data)) {
  cat("\nAnalyzing variable:", variable, "\n")
  
  # Identify outliers
  outliers <- identify_outliers(selected_data, variable)
  cat("Number of outliers identified:", nrow(outliers), "\n")
  
  # Remove outliers
  cleaned_data <- remove_outliers(selected_data, variable)
  
  # Perform sensitivity analysis
  impact <- sensitivity_analysis(selected_data, cleaned_data, variable)
  print(impact)
  
  # Optionally visualize the distribution
  p1 <- ggplot(selected_data, aes_string(x = variable)) +
    geom_histogram(binwidth = 1, fill = "blue", alpha = 0.5, boundary = 0) +
    labs(title = paste("Distribution of", variable, "with outliers"),
         x = variable, y = "Count") +
    theme_minimal()
  
  p2 <- ggplot(cleaned_data, aes_string(x = variable)) +
    geom_histogram(binwidth = 1, fill = "red", alpha = 0.5, boundary = 0) +
    labs(title = paste("Distribution of", variable, "without outliers"),
         x = variable, y = "Count") +
    theme_minimal()
  
  # Save plots
  ggsave(paste0(variable, "_with_outliers.png"), plot = p1)
  ggsave(paste0(variable, "_without_outliers.png"), plot = p2)
}
## 
## Analyzing variable: violent 
## Number of outliers identified: 26 
##               Metric Original  Cleaned
## 1               Mean 503.0747 470.2028
## 2 Standard Deviation 334.2772 244.3601
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## 
## Analyzing variable: income 
## Number of outliers identified: 25 
##               Metric  Original   Cleaned
## 1               Mean 13724.796 13557.084
## 2 Standard Deviation  2554.542  2307.462
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## 
## Analyzing variable: density 
## Number of outliers identified: 138 
##               Metric  Original    Cleaned
## 1               Mean 0.3520382 0.09607251
## 2 Standard Deviation 1.3554719 0.08987374
## Saving 7 x 5 in image
## Saving 7 x 5 in image

Create new year_seps variable

# Create a new variable 'year_sep' to group years into intervals
guns <- guns %>%
  mutate(year_sep = cut(year, breaks = seq(min(year, na.rm = TRUE), max(year, na.rm = TRUE), by = 3),
                        include.lowest = TRUE, right = FALSE))

guns <- guns %>% 
  filter(!is.na(year_sep))

# Check the new variable
table(guns$year_sep)
## 
## [1977,1980) [1980,1983) [1983,1986) [1986,1989) [1989,1992) [1992,1995) 
##         153         153         153         153         153         153 
## [1995,1998] 
##         204

Variables of Interest

# Select specific columns by name
guns_main <- guns[, c("law", "income", "density", "male", "violent")]

Descriptive Statistics

guns_main_renamed <- guns_main %>%
  rename(
    "Shall Carry Law" = law,
    "Income Per Capita" = income,
    "Population Density" = density,
    "Male Population % 10-29" = male,
    "Violent Crime Rate" = violent
  )

datasummary_skim(guns_main_renamed)
tinytable_7p3da0gr7ks4jhk4qcqk
Unique Missing Pct. Mean SD Min Median Max Histogram
Income Per Capita 1120 0 13601.5 2483.8 8554.9 13284.5 23133.1
Population Density 1122 0 0.4 1.4 0.0 0.1 11.1
Male Population % 10-29 1122 0 16.2 1.7 12.4 16.0 22.4
Violent Crime Rate 1056 0 505.1 337.1 47.0 444.2 2921.8
Shall Carry Law N %
no 866 77.2
yes 256 22.8
print(datasummary_skim(guns_main_renamed))
## 
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## |                         | Unique | Missing Pct. | Mean    | SD     | Min    | Median  | Max     | Histogram                                        |
## +=========================+========+==============+=========+========+========+=========+=========+==================================================+
## | Income Per Capita       | 1120   | 0            | 13601.5 | 2483.8 | 8554.9 | 13284.5 | 23133.1 | ![](tinytable_assets/ide0i8s8qdm4pfm38f03ls.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Population Density      | 1122   | 0            | 0.4     | 1.4    | 0.0    | 0.1     | 11.1    | ![](tinytable_assets/idd2h3morjjqz12tl10v8n.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Male Population % 10-29 | 1122   | 0            | 16.2    | 1.7    | 12.4   | 16.0    | 22.4    | ![](tinytable_assets/idm35x7anqp169p5b9dfcy.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Violent Crime Rate      | 1056   | 0            | 505.1   | 337.1  | 47.0   | 444.2   | 2921.8  | ![](tinytable_assets/id5847sgrncq2uidgkg65f.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Shall Carry Law         | N      | %            |         |        |        |         |         |                                                  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | no                      | 866    | 77.2         |         |        |        |         |         |                                                  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | yes                     | 256    | 22.8         |         |        |        |         |         |                                                  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
# Select specific columns by name
guns_main <- guns[, c("law", "income", "density", "male", "violent")]

Interaction plot between Violent Crime Rate and Year by Shall Carry Law

interaction_plot <- ggplot(guns, aes(x = year, y = violent, color = as.factor(law), group = law)) +
  #geom_point(alpha = 0.1) +
  geom_smooth(method = "loess", se = FALSE) +
  labs(title = "Figure 1: Interaction between Violent Crime Rate and Year by Shall Carry Law",
       x = "Year",
       y = "Violent Crime Rate per 100,000",
       color = "Shall Carry Law") +
  theme_minimal() +
  scale_color_manual(values = c("blue", "red"), labels = c("No", "Yes"))

print(interaction_plot)
## `geom_smooth()` using formula = 'y ~ x'

Bar chart Violent Crime Rate and Year by Shall Carry Law

# Load necessary libraries
library(tidyverse)

# Assuming your data frame is named `guns` and contains the variables `violent`, `year_sep`, and `law`

# Create the bar chart
ggplot(guns, aes(x = year_sep, y = violent, fill = law)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Figure 2: Violent Crime Rate and Year Group by Shall Carry Law",
       x = "Year Group",
       y = "Violent Crime Rate per 100,000",
       fill = "Shall Carry Law") +
  theme_classic()

Initial Regression Analysis

Fitting Models

# Fit the simple regression model
model1 <- lm(violent ~ law, data = guns)

# Fit the multiple predictor regression model
model2 <- lm(violent ~ law + income + density + male, data = guns)

# Summarize the models
summary(model1)
## 
## Call:
## lm(formula = violent ~ law, data = guns)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -495.67 -229.16  -64.97  133.54 2379.13 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   542.67      11.22  48.380  < 2e-16 ***
## lawyes       -164.66      23.48  -7.012 4.05e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 330.1 on 1120 degrees of freedom
## Multiple R-squared:  0.04206,    Adjusted R-squared:  0.0412 
## F-statistic: 49.17 on 1 and 1120 DF,  p-value: 4.048e-12
summary(model2)
## 
## Call:
## lm(formula = violent ~ law + income + density + male, data = guns)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -669.12 -188.85  -27.94  156.52  841.62 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.905e+02  1.158e+02   4.235 2.47e-05 ***
## lawyes      -1.350e+02  1.770e+01  -7.625 5.21e-14 ***
## income       2.370e-02  3.556e-03   6.665 4.16e-11 ***
## density      1.437e+02  5.568e+00  25.804  < 2e-16 ***
## male        -2.026e+01  5.051e+00  -4.011 6.44e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 235.3 on 1117 degrees of freedom
## Multiple R-squared:  0.5147, Adjusted R-squared:  0.513 
## F-statistic: 296.2 on 4 and 1117 DF,  p-value: < 2.2e-16
# Create detailed regression tables using sjPlot
tab_model(model1, model2, 
          show.ci = FALSE,  # Show confidence intervals
          show.se = TRUE,   # Show standard errors
          show.stat = TRUE, # Show statistics (t-value)
          show.p = TRUE,    # Show p-values
          title = "Regression Analysis of Violent Crime Rates",
          dv.labels = c("Simple Model", "Multiple Predictor Model"),
          pred.labels = c("(Intercept)", 
                          "Shall Carry Law Instituted",
                          "Income Per Capita",
                          "Population Density",
                          "Male Population % 10-29"),
          string.ci = "Conf. Int.", 
          string.se = "Std. Error", 
          string.stat = "t value",
          string.p = "p value")
Regression Analysis of Violent Crime Rates
  Simple Model Multiple Predictor Model
Predictors Estimates Std. Error t value p value Estimates Std. Error t value p value
(Intercept) 542.67 11.22 48.38 <0.001 490.47 115.81 4.24 <0.001
Shall Carry Law Instituted -164.66 23.48 -7.01 <0.001 -134.99 17.70 -7.62 <0.001
Income Per Capita 0.02 0.00 6.66 <0.001
Population Density 143.67 5.57 25.80 <0.001
Male Population % 10-29 -20.26 5.05 -4.01 <0.001
Observations 1122 1122
R2 / R2 adjusted 0.042 / 0.041 0.515 / 0.513

#Appendix ChatGPT4 was used as a learning aid in this assingment to troubleshoot error messages and give tips on code.