### Load the packages necessary
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
data(murders)
#### The name of the state with the maximum population is found by doing the following

murders$state[which.max(murders$population)]
## [1] "California"
#### How to obtain the murder rate per capita

murder_rate <- murders$total / murders$population * 100000
#### Ordering the states by murder rate, in decreasing order

murders$state[order(murder_rate, decreasing=TRUE)]
##  [1] "District of Columbia" "Louisiana"            "Missouri"            
##  [4] "Maryland"             "South Carolina"       "Delaware"            
##  [7] "Michigan"             "Mississippi"          "Georgia"             
## [10] "Arizona"              "Pennsylvania"         "Tennessee"           
## [13] "Florida"              "California"           "New Mexico"          
## [16] "Texas"                "Arkansas"             "Virginia"            
## [19] "Nevada"               "North Carolina"       "Oklahoma"            
## [22] "Illinois"             "Alabama"              "New Jersey"          
## [25] "Connecticut"          "Ohio"                 "Alaska"              
## [28] "Kentucky"             "New York"             "Kansas"              
## [31] "Indiana"              "Massachusetts"        "Nebraska"            
## [34] "Wisconsin"            "Rhode Island"         "West Virginia"       
## [37] "Washington"           "Colorado"             "Montana"             
## [40] "Minnesota"            "South Dakota"         "Oregon"              
## [43] "Wyoming"              "Maine"                "Utah"                
## [46] "Idaho"                "Iowa"                 "North Dakota"        
## [49] "Hawaii"               "New Hampshire"        "Vermont"
####Defining murder rate as before

murder_rate <- murders$total / murders$population * 100000

#### Creating a logical vector that specifies if the murder rate in that state is less than or equal to 0.71

index <- murder_rate <= 0.71
#### Calculating how many states have a murder rate less than or equal to 0.71

sum(index)
## [1] 5
#### Determining which states have murder rates less than or equal to 0.71

murders$state[index]
## [1] "Hawaii"        "Iowa"          "New Hampshire" "North Dakota" 
## [5] "Vermont"
#### Creating the two logical vectors representing our conditions
west <- murders$region == "West"
safe <- murder_rate <= 1
#### Defining an index and identifying states with both conditions true

index <- safe & west
murders$state[index]
## [1] "Hawaii"  "Idaho"   "Oregon"  "Utah"    "Wyoming"
x <- c(FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)
which(x)    #### Returns indices that are TRUE
## [1] 2 4 5
#### To determine the murder rate in Massachusetts we may do the following
index <- which(murders$state == "Massachusetts")
index
## [1] 22
murder_rate[index]
## [1] 1.802179
#### To obtain the indices and subsequent murder rates of New York, Florida, Texas, we do:

index <- match(c("New York", "Florida", "Texas"), murders$state)
index
## [1] 33 10 44
murders$state[index]
## [1] "New York" "Florida"  "Texas"
murder_rate[index]
## [1] 2.667960 3.398069 3.201360
x <- c("a", "b", "c", "d", "e")
y <- c("a", "d", "f")
y %in% x
## [1]  TRUE  TRUE FALSE
#### To see if Boston, Dakota, and Washington are states

c("Boston", "Dakota", "Washington") %in% murders$state
## [1] FALSE FALSE  TRUE
#### Load plot package
library(dplyr)
#### A histogram of murder rates
murders <- mutate(murders, rate = total / population * 100000)
hist(murders$rate)

#### Boxplots of murder rates by region
boxplot(rate~region, data = murders)