Overview

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(murders)
## Warning in data(murders): data set 'murders' not found
library(dslabs)

Problem 1

library(dslabs)
### Question 2a
str(murders)
## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...

Question 2b

ind <- order(murders$total)
murders$abb[ind]
##  [1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE"
## [16] "OR" "DE" "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA"
## [31] "MS" "AL" "IN" "SC" "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA"
## [46] "MI" "PA" "NY" "FL" "TX" "CA"

Question 2c

murder_rate <- murders$total / murders$population * 100000

Question 2d

murders_plus <- mutate(murders,
                       murder_rate = total / population * 100000)

Question 2e

summary(murders_plus)
##     state               abb                      region     population      
##  Length:51          Length:51          Northeast    : 9   Min.   :  563626  
##  Class :character   Class :character   South        :17   1st Qu.: 1696962  
##  Mode  :character   Mode  :character   North Central:12   Median : 4339367  
##                                        West         :13   Mean   : 6075769  
##                                                           3rd Qu.: 6636084  
##                                                           Max.   :37253956  
##      total         murder_rate     
##  Min.   :   2.0   Min.   : 0.3196  
##  1st Qu.:  24.5   1st Qu.: 1.2526  
##  Median :  97.0   Median : 2.6871  
##  Mean   : 184.4   Mean   : 2.7791  
##  3rd Qu.: 268.0   3rd Qu.: 3.3861  
##  Max.   :1257.0   Max.   :16.4528

Question 3a

library(readxl)
X2018_UCR_PA <- read_excel("2018.UCR.PA.xlsx")

Question 3b

names(X2018_UCR_PA)
##  [1] "City"                                      
##  [2] "Population"                                
##  [3] "Violent\r\ncrime"                          
##  [4] "Murder and\r\nnonnegligent\r\nmanslaughter"
##  [5] "Rape"                                      
##  [6] "Robbery"                                   
##  [7] "Aggravated\r\nassault"                     
##  [8] "Property\r\ncrime"                         
##  [9] "Burglary"                                  
## [10] "Larceny-\r\ntheft"                         
## [11] "Motor\r\nvehicle\r\ntheft"                 
## [12] "Arson"
names(X2018_UCR_PA) <- make.names(names(X2018_UCR_PA))

X2018_UCR_PA_cleaned <- X2018_UCR_PA

summary(X2018_UCR_PA_cleaned)
##      City             Population      Violent..crime    
##  Length:989         Min.   :    132   Min.   :    0.00  
##  Class :character   1st Qu.:   2066   1st Qu.:    1.00  
##  Mode  :character   Median :   4320   Median :    5.00  
##                     Mean   :  10054   Mean   :   34.16  
##                     3rd Qu.:   9088   3rd Qu.:   15.00  
##                     Max.   :1586916   Max.   :14420.00  
##  Murder.and..nonnegligent..manslaughter      Rape             Robbery        
##  Min.   :  0.0000                       Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:  0.0000                       1st Qu.:   0.000   1st Qu.:   0.000  
##  Median :  0.0000                       Median :   0.000   Median :   0.000  
##  Mean   :  0.6977                       Mean   :   2.971   Mean   :   9.449  
##  3rd Qu.:  0.0000                       3rd Qu.:   1.000   3rd Qu.:   2.000  
##  Max.   :351.0000                       Max.   :1095.000   Max.   :5262.000  
##  Aggravated..assault Property..crime      Burglary       Larceny...theft  
##  Min.   :   0.00     Min.   :    0.0   Min.   :   0.00   Min.   :    0.0  
##  1st Qu.:   1.00     1st Qu.:    9.0   1st Qu.:   1.00   1st Qu.:    7.0  
##  Median :   4.00     Median :   40.0   Median :   5.00   Median :   32.0  
##  Mean   :  21.05     Mean   :  164.6   Mean   :  21.42   Mean   :  131.3  
##  3rd Qu.:  11.00     3rd Qu.:  105.0   3rd Qu.:  12.00   3rd Qu.:   89.0  
##  Max.   :7712.00     Max.   :49145.0   Max.   :6497.00   Max.   :36968.0  
##  Motor..vehicle..theft     Arson        
##  Min.   :   0.00       Min.   :  0.000  
##  1st Qu.:   0.00       1st Qu.:  0.000  
##  Median :   1.00       Median :  0.000  
##  Mean   :  11.84       Mean   :  1.147  
##  3rd Qu.:   4.00       3rd Qu.:  0.000  
##  Max.   :5680.00       Max.   :430.000

Question 3c

X2018_UCR_PA_cleaned$violent_crime_rate <-
  X2018_UCR_PA_cleaned$Violent..crime /
  X2018_UCR_PA_cleaned$Population * 100000

Question 3d

hist(X2018_UCR_PA_cleaned$violent_crime_rate)