Project 1

Jacob Kerby

2024-10-27

What Data Set are we going to explore? (Objective)

Step 1 - Import the Data

data("USArrests") 
summary(USArrests)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00

Steps neccesary to complete goal

  1. Assign Regions to Each State
  2. Calculate Average Crime Rates By Region
  3. Visualize Crime Rates By Region
  4. Conduct Tests for Significance
  5. Interpret Results

Step 1 - Assign Regions to Each State

Step 1A - Code for Step 1

data("USArrests")
USArrests$State <- rownames(USArrests)

northeast <- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "New Jersey", 
               "New York", "Pennsylvania", "Rhode Island", "Vermont")
midwest <- c("Illinois", "Indiana", "Iowa", "Kansas", "Michigan", "Minnesota", 
             "Missouri", "Nebraska", "North Dakota", "Ohio", "South Dakota", "Wisconsin")
south <- c("Alabama", "Arkansas", "Delaware", "Florida", "Georgia", "Kentucky", 
           "Louisiana", "Maryland", "Mississippi", "North Carolina", "Oklahoma", 
           "South Carolina", "Tennessee", "Texas", "Virginia", "West Virginia")
west <- c("Alaska", "Arizona", "California", "Colorado", "Hawaii", "Idaho", 
          "Montana", "Nevada", "New Mexico", "Oregon", "Utah", "Washington", "Wyoming")

USArrests$Region <- ifelse(USArrests$State %in% northeast, "Northeast",
                    ifelse(USArrests$State %in% midwest, "Midwest",
                    ifelse(USArrests$State %in% south, "South", "West")))

Step 2

library(dplyr)

regional_summary <- USArrests %>%
  group_by(Region) %>%
  summarise(
    Mean_Murder = mean(Murder),
    Mean_Assault = mean(Assault),
    Mean_Rape = mean(Rape)
  )

Step 2A - Regional Summary using knitr and kable

library(knitr)

kable(regional_summary, format = "markdown", caption = "Regional Crime Summary")
Regional Crime Summary
Region Mean_Murder Mean_Assault Mean_Rape
Midwest 5.700000 120.3333 18.44167
Northeast 4.700000 126.6667 13.77778
South 11.706250 220.0000 21.16250
West 7.030769 187.2308 29.05385

Step 3 - Visualize Crime Rates By Region

Step 3A - Code for Step 3

+ Here we can see a clear breakdown of the data by region

Step 4 - Conduct Statistical Tests for Significance

# Perform ANOVA tests
anova_murder <- summary(aov(Murder ~ Region, data = USArrests))
anova_assault <- summary(aov(Assault ~ Region, data = USArrests))
anova_rape <- summary(aov(Rape ~ Region, data = USArrests))

# Extract results from each ANOVA summary and convert them to data frames
murder_df <- as.data.frame(anova_murder[[1]])
murder_df$Crime <- "Murder"

assault_df <- as.data.frame(anova_assault[[1]])
assault_df$Crime <- "Assault"

rape_df <- as.data.frame(anova_rape[[1]])
rape_df$Crime <- "Rape"

Step 4A - Formatted Table of ANOVA Statistical Tests

ANOVA Results by Crime Type (Between Regions Only)
Crime Df Sum Sq Mean Sq F value Pr(>F)
Between Regions Murder 391.2357 3 130.41191 11.143893 0.0000128
Residuals Murder 538.3171 46 11.70254 NA NA
Between Regions 1 Assault 90332.1456 3 30110.71521 5.540793 0.0024782
Residuals 1 Assault 249980.9744 46 5434.36901 NA NA
Between Regions 2 Rape 1388.9543 3 462.98476 7.319227 0.0004119
Residuals 2 Rape 2909.7745 46 63.25597 NA NA

Step 5 - Anova Interpretation