Jacob Kerby
2024-10-27
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
data("USArrests")
USArrests$State <- rownames(USArrests)
northeast <- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "New Jersey",
"New York", "Pennsylvania", "Rhode Island", "Vermont")
midwest <- c("Illinois", "Indiana", "Iowa", "Kansas", "Michigan", "Minnesota",
"Missouri", "Nebraska", "North Dakota", "Ohio", "South Dakota", "Wisconsin")
south <- c("Alabama", "Arkansas", "Delaware", "Florida", "Georgia", "Kentucky",
"Louisiana", "Maryland", "Mississippi", "North Carolina", "Oklahoma",
"South Carolina", "Tennessee", "Texas", "Virginia", "West Virginia")
west <- c("Alaska", "Arizona", "California", "Colorado", "Hawaii", "Idaho",
"Montana", "Nevada", "New Mexico", "Oregon", "Utah", "Washington", "Wyoming")
USArrests$Region <- ifelse(USArrests$State %in% northeast, "Northeast",
ifelse(USArrests$State %in% midwest, "Midwest",
ifelse(USArrests$State %in% south, "South", "West")))| Region | Mean_Murder | Mean_Assault | Mean_Rape |
|---|---|---|---|
| Midwest | 5.700000 | 120.3333 | 18.44167 |
| Northeast | 4.700000 | 126.6667 | 13.77778 |
| South | 11.706250 | 220.0000 | 21.16250 |
| West | 7.030769 | 187.2308 | 29.05385 |
+
Here we can see a clear breakdown of the data by region
# Perform ANOVA tests
anova_murder <- summary(aov(Murder ~ Region, data = USArrests))
anova_assault <- summary(aov(Assault ~ Region, data = USArrests))
anova_rape <- summary(aov(Rape ~ Region, data = USArrests))
# Extract results from each ANOVA summary and convert them to data frames
murder_df <- as.data.frame(anova_murder[[1]])
murder_df$Crime <- "Murder"
assault_df <- as.data.frame(anova_assault[[1]])
assault_df$Crime <- "Assault"
rape_df <- as.data.frame(anova_rape[[1]])
rape_df$Crime <- "Rape"| Crime | Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|---|
| Between Regions | Murder | 391.2357 | 3 | 130.41191 | 11.143893 | 0.0000128 |
| Residuals | Murder | 538.3171 | 46 | 11.70254 | NA | NA |
| Between Regions 1 | Assault | 90332.1456 | 3 | 30110.71521 | 5.540793 | 0.0024782 |
| Residuals 1 | Assault | 249980.9744 | 46 | 5434.36901 | NA | NA |
| Between Regions 2 | Rape | 1388.9543 | 3 | 462.98476 | 7.319227 | 0.0004119 |
| Residuals 2 | Rape | 2909.7745 | 46 | 63.25597 | NA | NA |