Tutorial (US Arrests)
library(ggplot2)
library(dplyr)
library(gridExtra)
library(corrplot)
data(USArrests)
## Describe and summarize your assigned data set.
head(USArrests) # view first 6 rows of data
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
str(USArrests) # view data structure
## 'data.frame': 50 obs. of 4 variables:
## $ Murder : num 13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
## $ Assault : int 236 263 294 190 276 204 110 238 335 211 ...
## $ UrbanPop: int 58 48 80 50 91 78 77 72 80 60 ...
## $ Rape : num 21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
summary(USArrests) # view mean, min, max etc of each variable
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
boxplot(USArrests)

cor(USArrests)
## Murder Assault UrbanPop Rape
## Murder 1.00000000 0.8018733 0.06957262 0.5635788
## Assault 0.80187331 1.0000000 0.25887170 0.6652412
## UrbanPop 0.06957262 0.2588717 1.00000000 0.4113412
## Rape 0.56357883 0.6652412 0.41134124 1.0000000
correlation <- cor(USArrests)
corrplot(correlation, method = "color",
addCoef.col = "black",
tl.col = "black")

hist(USArrests$Murder, breaks = 20, main = "Murder")

hist(USArrests$Assault, breaks = 20, main = "Assault")

hist(USArrests$Rape, breaks = 20, main = "Rape")

## Which states have the highest murder and assault rates?
head(USArrests[order(-USArrests$Murder), ])
## Murder Assault UrbanPop Rape
## Georgia 17.4 211 60 25.8
## Mississippi 16.1 259 44 17.1
## Florida 15.4 335 80 31.9
## Louisiana 15.4 249 66 22.2
## South Carolina 14.4 279 48 22.5
## Alabama 13.2 236 58 21.2
head(USArrests[order(-USArrests$Assault), ])
## Murder Assault UrbanPop Rape
## North Carolina 13.0 337 45 16.1
## Florida 15.4 335 80 31.9
## Maryland 11.3 300 67 27.8
## Arizona 8.1 294 80 31.0
## New Mexico 11.4 285 70 32.1
## South Carolina 14.4 279 48 22.5
## Is there a relationship between urban population percentage and crime rates? Graph your data.
us_murder <- USArrests %>%
ggplot(aes(x = UrbanPop, y = Murder)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(x = "Percent Urban Population", y = "Murder Arrests per 100,000") +
theme_minimal()
us_assault <- USArrests %>%
ggplot(aes(x = UrbanPop, y = Assault)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(x = "Percent Urban Population", y = "Assault Arrests per 100,000") +
theme_minimal()
us_rape <- USArrests %>%
ggplot(aes(x = UrbanPop, y = Rape)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "yellow") +
labs(x = "Percent Urban Population", y = "Rape Arrests per 100,000") +
theme_minimal()
grid.arrange(us_murder, us_assault, us_rape, ncol = 2)
