Clean / Prepare Data

sum(is.na(guns))  # Total number of missing values

## [1] 0

colSums(is.na(guns))  # Number of missing values per column

##   rownames       year    violent     murder    robbery  prisoners       afam 
##          0          0          0          0          0          0          0 
##       cauc       male population     income    density      state        law 
##          0          0          0          0          0          0          0

Selecting Vairables of Interest for outlier removal

selected_data <- guns[, c("violent", "income", "density")]
print(selected_data)

## # A tibble: 1,173 × 3
##    violent income density
##      <dbl>  <dbl>   <dbl>
##  1    414.  9563.  0.0746
##  2    419.  9932   0.0756
##  3    413.  9877.  0.0762
##  4    448.  9541.  0.0768
##  5    470.  9548.  0.0772
##  6    448.  9479.  0.0773
##  7    416   9783   0.0775
##  8    431. 10357.  0.0778
##  9    458. 10726.  0.0782
## 10    558  11092.  0.0786
## # ℹ 1,163 more rows

# Function to identify outliers using the IQR method
identify_outliers <- function(data, variable) {
  Q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
  Q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  outliers <- data[data[[variable]] < lower_bound | data[[variable]] > upper_bound, ]
  return(outliers)
}

# Function to remove outliers
remove_outliers <- function(data, variable) {
  Q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
  Q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  clean_data <- data[data[[variable]] >= lower_bound & data[[variable]] <= upper_bound, ]
  return(clean_data)
}

# Function to perform sensitivity analysis
sensitivity_analysis <- function(original_data, cleaned_data, variable) {
  original_mean <- mean(original_data[[variable]], na.rm = TRUE)
  cleaned_mean <- mean(cleaned_data[[variable]], na.rm = TRUE)
  original_sd <- sd(original_data[[variable]], na.rm = TRUE)
  cleaned_sd <- sd(cleaned_data[[variable]], na.rm = TRUE)
  impact <- data.frame(
    Metric = c("Mean", "Standard Deviation"),
    Original = c(original_mean, original_sd),
    Cleaned = c(cleaned_mean, cleaned_sd)
  )
  return(impact)
}

# Apply functions to each selected variable
for (variable in names(selected_data)) {
  cat("\nAnalyzing variable:", variable, "\n")
  
  # Identify outliers
  outliers <- identify_outliers(selected_data, variable)
  cat("Number of outliers identified:", nrow(outliers), "\n")
  
  # Remove outliers
  cleaned_data <- remove_outliers(selected_data, variable)
  
  # Perform sensitivity analysis
  impact <- sensitivity_analysis(selected_data, cleaned_data, variable)
  print(impact)
  
  # Optionally visualize the distribution
  p1 <- ggplot(selected_data, aes_string(x = variable)) +
    geom_histogram(binwidth = 1, fill = "blue", alpha = 0.5, boundary = 0) +
    labs(title = paste("Distribution of", variable, "with outliers"),
         x = variable, y = "Count") +
    theme_minimal()
  
  p2 <- ggplot(cleaned_data, aes_string(x = variable)) +
    geom_histogram(binwidth = 1, fill = "red", alpha = 0.5, boundary = 0) +
    labs(title = paste("Distribution of", variable, "without outliers"),
         x = variable, y = "Count") +
    theme_minimal()
  
  # Save plots
  ggsave(paste0(variable, "_with_outliers.png"), plot = p1)
  ggsave(paste0(variable, "_without_outliers.png"), plot = p2)
}

## 
## Analyzing variable: violent 
## Number of outliers identified: 26 
##               Metric Original  Cleaned
## 1               Mean 503.0747 470.2028
## 2 Standard Deviation 334.2772 244.3601

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Saving 7 x 5 in image
## Saving 7 x 5 in image

## 
## Analyzing variable: income 
## Number of outliers identified: 25 
##               Metric  Original   Cleaned
## 1               Mean 13724.796 13557.084
## 2 Standard Deviation  2554.542  2307.462

## Saving 7 x 5 in image
## Saving 7 x 5 in image

## 
## Analyzing variable: density 
## Number of outliers identified: 138 
##               Metric  Original    Cleaned
## 1               Mean 0.3520382 0.09607251
## 2 Standard Deviation 1.3554719 0.08987374

## Saving 7 x 5 in image
## Saving 7 x 5 in image

Create new year_seps variable

# Create a new variable 'year_sep' to group years into intervals
guns <- guns %>%
  mutate(year_sep = cut(year, breaks = seq(min(year, na.rm = TRUE), max(year, na.rm = TRUE), by = 3),
                        include.lowest = TRUE, right = FALSE))

guns <- guns %>% 
  filter(!is.na(year_sep))

# Check the new variable
table(guns$year_sep)

## 
## [1977,1980) [1980,1983) [1983,1986) [1986,1989) [1989,1992) [1992,1995) 
##         153         153         153         153         153         153 
## [1995,1998] 
##         204

Variables of Interest

# Select specific columns by name
guns_main <- guns[, c("law", "income", "density", "male", "violent")]

Descriptive Statistics

guns_main_renamed <- guns_main %>%
  rename(
    "Shall Carry Law" = law,
    "Income Per Capita" = income,
    "Population Density" = density,
    "Male Population % 10-29" = male,
    "Violent Crime Rate" = violent
  )

datasummary_skim(guns_main_renamed)

tinytable_7p3da0gr7ks4jhk4qcqk

	Unique	Missing Pct.	Mean	SD	Min	Median	Max
Income Per Capita	1120	0	13601.5	2483.8	8554.9	13284.5	23133.1
Population Density	1122	0	0.4	1.4	0.0	0.1	11.1
Male Population % 10-29	1122	0	16.2	1.7	12.4	16.0	22.4
Violent Crime Rate	1056	0	505.1	337.1	47.0	444.2	2921.8
Shall Carry Law	N	%
no	866	77.2
yes	256	22.8

print(datasummary_skim(guns_main_renamed))

## 
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## |                         | Unique | Missing Pct. | Mean    | SD     | Min    | Median  | Max     | Histogram                                        |
## +=========================+========+==============+=========+========+========+=========+=========+==================================================+
## | Income Per Capita       | 1120   | 0            | 13601.5 | 2483.8 | 8554.9 | 13284.5 | 23133.1 | ![](tinytable_assets/ide0i8s8qdm4pfm38f03ls.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Population Density      | 1122   | 0            | 0.4     | 1.4    | 0.0    | 0.1     | 11.1    | ![](tinytable_assets/idd2h3morjjqz12tl10v8n.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Male Population % 10-29 | 1122   | 0            | 16.2    | 1.7    | 12.4   | 16.0    | 22.4    | ![](tinytable_assets/idm35x7anqp169p5b9dfcy.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Violent Crime Rate      | 1056   | 0            | 505.1   | 337.1  | 47.0   | 444.2   | 2921.8  | ![](tinytable_assets/id5847sgrncq2uidgkg65f.png) |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | Shall Carry Law         | N      | %            |         |        |        |         |         |                                                  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | no                      | 866    | 77.2         |         |        |        |         |         |                                                  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+
## | yes                     | 256    | 22.8         |         |        |        |         |         |                                                  |
## +-------------------------+--------+--------------+---------+--------+--------+---------+---------+--------------------------------------------------+

# Select specific columns by name
guns_main <- guns[, c("law", "income", "density", "male", "violent")]

Interaction plot between Violent Crime Rate and Year by Shall Carry Law

interaction_plot <- ggplot(guns, aes(x = year, y = violent, color = as.factor(law), group = law)) +
  #geom_point(alpha = 0.1) +
  geom_smooth(method = "loess", se = FALSE) +
  labs(title = "Figure 1: Interaction between Violent Crime Rate and Year by Shall Carry Law",
       x = "Year",
       y = "Violent Crime Rate per 100,000",
       color = "Shall Carry Law") +
  theme_minimal() +
  scale_color_manual(values = c("blue", "red"), labels = c("No", "Yes"))

print(interaction_plot)

## `geom_smooth()` using formula = 'y ~ x'

Bar chart Violent Crime Rate and Year by Shall Carry Law

# Load necessary libraries
library(tidyverse)

# Assuming your data frame is named `guns` and contains the variables `violent`, `year_sep`, and `law`

# Create the bar chart
ggplot(guns, aes(x = year_sep, y = violent, fill = law)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Figure 2: Violent Crime Rate and Year Group by Shall Carry Law",
       x = "Year Group",
       y = "Violent Crime Rate per 100,000",
       fill = "Shall Carry Law") +
  theme_classic()

Initial Regression Analysis

Fitting Models

# Fit the simple regression model
model1 <- lm(violent ~ law, data = guns)

# Fit the multiple predictor regression model
model2 <- lm(violent ~ law + income + density + male, data = guns)

# Summarize the models
summary(model1)

## 
## Call:
## lm(formula = violent ~ law, data = guns)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -495.67 -229.16  -64.97  133.54 2379.13 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   542.67      11.22  48.380  < 2e-16 ***
## lawyes       -164.66      23.48  -7.012 4.05e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 330.1 on 1120 degrees of freedom
## Multiple R-squared:  0.04206,    Adjusted R-squared:  0.0412 
## F-statistic: 49.17 on 1 and 1120 DF,  p-value: 4.048e-12

summary(model2)

## 
## Call:
## lm(formula = violent ~ law + income + density + male, data = guns)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -669.12 -188.85  -27.94  156.52  841.62 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.905e+02  1.158e+02   4.235 2.47e-05 ***
## lawyes      -1.350e+02  1.770e+01  -7.625 5.21e-14 ***
## income       2.370e-02  3.556e-03   6.665 4.16e-11 ***
## density      1.437e+02  5.568e+00  25.804  < 2e-16 ***
## male        -2.026e+01  5.051e+00  -4.011 6.44e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 235.3 on 1117 degrees of freedom
## Multiple R-squared:  0.5147, Adjusted R-squared:  0.513 
## F-statistic: 296.2 on 4 and 1117 DF,  p-value: < 2.2e-16

# Create detailed regression tables using sjPlot
tab_model(model1, model2, 
          show.ci = FALSE,  # Show confidence intervals
          show.se = TRUE,   # Show standard errors
          show.stat = TRUE, # Show statistics (t-value)
          show.p = TRUE,    # Show p-values
          title = "Regression Analysis of Violent Crime Rates",
          dv.labels = c("Simple Model", "Multiple Predictor Model"),
          pred.labels = c("(Intercept)", 
                          "Shall Carry Law Instituted",
                          "Income Per Capita",
                          "Population Density",
                          "Male Population % 10-29"),
          string.ci = "Conf. Int.", 
          string.se = "Std. Error", 
          string.stat = "t value",
          string.p = "p value")

Regression Analysis of Violent Crime Rates
	Simple Model				Multiple Predictor Model
Predictors	Estimates	Std. Error	t value	p value	Estimates	Std. Error	t value	p value
(Intercept)	542.67	11.22	48.38	<0.001	490.47	115.81	4.24	<0.001
Shall Carry Law Instituted	-164.66	23.48	-7.01	<0.001	-134.99	17.70	-7.62	<0.001
Income Per Capita					0.02	0.00	6.66	<0.001
Population Density					143.67	5.57	25.80	<0.001
Male Population % 10-29					-20.26	5.05	-4.01	<0.001
Observations	1122				1122
R² / R² adjusted	0.042 / 0.041				0.515 / 0.513

#Appendix ChatGPT4 was used as a learning aid in this assingment to troubleshoot error messages and give tips on code.

SS2

Benny Konishi

2024-07-31

Clean / Prepare Data

Selecting Vairables of Interest for outlier removal

Create new year_seps variable

Variables of Interest

Descriptive Statistics

Interaction plot between Violent Crime Rate and Year by Shall Carry Law

Bar chart Violent Crime Rate and Year by Shall Carry Law

Initial Regression Analysis

Fitting Models