Overview
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(murders)
## Warning in data(murders): data set 'murders' not found
library(dslabs)
Problem 1
library(dslabs)
### Question 2a
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
Question 2b
ind <- order(murders$total)
murders$abb[ind]
## [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"
Question 2c
murder_rate <- murders$total / murders$population * 100000
Question 2d
murders_plus <- mutate(murders,
murder_rate = total / population * 100000)
Question 2e
summary(murders_plus)
## state abb region population
## Length:51 Length:51 Northeast : 9 Min. : 563626
## Class :character Class :character South :17 1st Qu.: 1696962
## Mode :character Mode :character North Central:12 Median : 4339367
## West :13 Mean : 6075769
## 3rd Qu.: 6636084
## Max. :37253956
## total murder_rate
## Min. : 2.0 Min. : 0.3196
## 1st Qu.: 24.5 1st Qu.: 1.2526
## Median : 97.0 Median : 2.6871
## Mean : 184.4 Mean : 2.7791
## 3rd Qu.: 268.0 3rd Qu.: 3.3861
## Max. :1257.0 Max. :16.4528
Question 3a
library(readxl)
X2018_UCR_PA <- read_excel("2018.UCR.PA.xlsx")
Question 3b
names(X2018_UCR_PA)
## [1] "City"
## [2] "Population"
## [3] "Violent\r\ncrime"
## [4] "Murder and\r\nnonnegligent\r\nmanslaughter"
## [5] "Rape"
## [6] "Robbery"
## [7] "Aggravated\r\nassault"
## [8] "Property\r\ncrime"
## [9] "Burglary"
## [10] "Larceny-\r\ntheft"
## [11] "Motor\r\nvehicle\r\ntheft"
## [12] "Arson"
names(X2018_UCR_PA) <- make.names(names(X2018_UCR_PA))
X2018_UCR_PA_cleaned <- X2018_UCR_PA
summary(X2018_UCR_PA_cleaned)
## City Population Violent..crime
## Length:989 Min. : 132 Min. : 0.00
## Class :character 1st Qu.: 2066 1st Qu.: 1.00
## Mode :character Median : 4320 Median : 5.00
## Mean : 10054 Mean : 34.16
## 3rd Qu.: 9088 3rd Qu.: 15.00
## Max. :1586916 Max. :14420.00
## Murder.and..nonnegligent..manslaughter Rape Robbery
## Min. : 0.0000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.0000 Median : 0.000 Median : 0.000
## Mean : 0.6977 Mean : 2.971 Mean : 9.449
## 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 2.000
## Max. :351.0000 Max. :1095.000 Max. :5262.000
## Aggravated..assault Property..crime Burglary Larceny...theft
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 1.00 1st Qu.: 9.0 1st Qu.: 1.00 1st Qu.: 7.0
## Median : 4.00 Median : 40.0 Median : 5.00 Median : 32.0
## Mean : 21.05 Mean : 164.6 Mean : 21.42 Mean : 131.3
## 3rd Qu.: 11.00 3rd Qu.: 105.0 3rd Qu.: 12.00 3rd Qu.: 89.0
## Max. :7712.00 Max. :49145.0 Max. :6497.00 Max. :36968.0
## Motor..vehicle..theft Arson
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 1.00 Median : 0.000
## Mean : 11.84 Mean : 1.147
## 3rd Qu.: 4.00 3rd Qu.: 0.000
## Max. :5680.00 Max. :430.000
Question 3c
X2018_UCR_PA_cleaned$violent_crime_rate <-
X2018_UCR_PA_cleaned$Violent..crime /
X2018_UCR_PA_cleaned$Population * 100000
Question 3d
hist(X2018_UCR_PA_cleaned$violent_crime_rate)
