### Load the packages necessary
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
data(murders)
#### The name of the state with the maximum population is found by doing the following
murders$state[which.max(murders$population)]
## [1] "California"
#### How to obtain the murder rate per capita
murder_rate <- murders$total / murders$population * 100000
#### Ordering the states by murder rate, in decreasing order
murders$state[order(murder_rate, decreasing=TRUE)]
## [1] "District of Columbia" "Louisiana" "Missouri"
## [4] "Maryland" "South Carolina" "Delaware"
## [7] "Michigan" "Mississippi" "Georgia"
## [10] "Arizona" "Pennsylvania" "Tennessee"
## [13] "Florida" "California" "New Mexico"
## [16] "Texas" "Arkansas" "Virginia"
## [19] "Nevada" "North Carolina" "Oklahoma"
## [22] "Illinois" "Alabama" "New Jersey"
## [25] "Connecticut" "Ohio" "Alaska"
## [28] "Kentucky" "New York" "Kansas"
## [31] "Indiana" "Massachusetts" "Nebraska"
## [34] "Wisconsin" "Rhode Island" "West Virginia"
## [37] "Washington" "Colorado" "Montana"
## [40] "Minnesota" "South Dakota" "Oregon"
## [43] "Wyoming" "Maine" "Utah"
## [46] "Idaho" "Iowa" "North Dakota"
## [49] "Hawaii" "New Hampshire" "Vermont"
####Defining murder rate as before
murder_rate <- murders$total / murders$population * 100000
#### Creating a logical vector that specifies if the murder rate in that state is less than or equal to 0.71
index <- murder_rate <= 0.71
#### Calculating how many states have a murder rate less than or equal to 0.71
sum(index)
## [1] 5
#### Determining which states have murder rates less than or equal to 0.71
murders$state[index]
## [1] "Hawaii" "Iowa" "New Hampshire" "North Dakota"
## [5] "Vermont"
#### Creating the two logical vectors representing our conditions
west <- murders$region == "West"
safe <- murder_rate <= 1
#### Defining an index and identifying states with both conditions true
index <- safe & west
murders$state[index]
## [1] "Hawaii" "Idaho" "Oregon" "Utah" "Wyoming"
x <- c(FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)
which(x) #### Returns indices that are TRUE
## [1] 2 4 5
#### To determine the murder rate in Massachusetts we may do the following
index <- which(murders$state == "Massachusetts")
index
## [1] 22
murder_rate[index]
## [1] 1.802179
#### To obtain the indices and subsequent murder rates of New York, Florida, Texas, we do:
index <- match(c("New York", "Florida", "Texas"), murders$state)
index
## [1] 33 10 44
murders$state[index]
## [1] "New York" "Florida" "Texas"
murder_rate[index]
## [1] 2.667960 3.398069 3.201360
x <- c("a", "b", "c", "d", "e")
y <- c("a", "d", "f")
y %in% x
## [1] TRUE TRUE FALSE
#### To see if Boston, Dakota, and Washington are states
c("Boston", "Dakota", "Washington") %in% murders$state
## [1] FALSE FALSE TRUE
#### Load plot package
library(dplyr)
#### A histogram of murder rates
murders <- mutate(murders, rate = total / population * 100000)
hist(murders$rate)

#### Boxplots of murder rates by region
boxplot(rate~region, data = murders)
