library(dslabs)
data(murders)

#Q:1. Compute the per 100,000 murder rate for each state and store it in an object called murder_rate. Then use logical operators to create a logical vector named low that tells us which entries of murder_rate are lower than 1. #Answer

murder_rate <- (murders$total / murders$population) * 100000
low <- murder_rate < 1
low
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [13]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE  TRUE

#Q: 2. Now use the results from the previous exercise and the function which to determine the indices of murder_rate associated with values lower than 1. #Answer

low_murder_rate <- which(low)
low_murder_rate
##  [1] 12 13 16 20 24 30 35 38 42 45 46 51

#Q: 3 Use the results from the previous exercise to report the names of the states with murder rates lower than 1. #Answer

low_murder_rate <- murders$state[low_murder_rate]
print("States with murder rates lower than 1:\n")
## [1] "States with murder rates lower than 1:\n"
print(low_murder_rate, sep = "\n")
##  [1] "Hawaii"        "Idaho"         "Iowa"          "Maine"        
##  [5] "Minnesota"     "New Hampshire" "North Dakota"  "Oregon"       
##  [9] "South Dakota"  "Utah"          "Vermont"       "Wyoming"

#Q: 4. Now extend the code from exercise 2 and 3 to report the states in the Northeast with murder rates lower than 1. Hint: use the previously defined logical vector low and the logical operator &.

northeast_low_murder_rate <- murders$state[low & murders$region == "Northeast"]
print("States in the Northeast with murder rates lower than 1:\n")
## [1] "States in the Northeast with murder rates lower than 1:\n"
print(northeast_low_murder_rate, sep = "\n")
## [1] "Maine"         "New Hampshire" "Vermont"

#Q: 5. In a previous exercise we computed the murder rate for each state and the average of these numbers.How many states are below the average?

murder_rate <- (murders$total / murders$population) * 100000
average_murder_rate <- mean(murder_rate)
states_below_average <- sum(murder_rate < average_murder_rate)
print(paste("Number of states below the average murder rate:", states_below_average))
## [1] "Number of states below the average murder rate: 27"

#Q: 6. Use the match function to identify the states with abbreviations AK, MI, and IA. Hint: start by defining an index of the entries of murders$abb that match the three abbreviations, then use the [ operator to extract the states.

abbreviations_match <- c("AK", "MI", "IA")
matching_indices <- match(abbreviations_match, murders$abb)
matching_states <- murders$state[matching_indices]
print("States with abbreviations AK, MI, and IA:")
## [1] "States with abbreviations AK, MI, and IA:"
print(matching_states)
## [1] "Alaska"   "Michigan" "Iowa"

#Q: 7.Use the %in% operator to create a logical vector that answers the question: which of the following are actual abbreviations: MA, ME, MI, MO, MU ?

abbreviations_check <- c("MA", "ME", "MI", "MO", "MU")
abbreviations_exist <- abbreviations_check %in% murders$abb
print("Abbreviations that exist in the dataset:")
## [1] "Abbreviations that exist in the dataset:"
print(abbreviations_exist)
## [1]  TRUE  TRUE  TRUE  TRUE FALSE

#Q: 8.Extend the code you used in exercise 7 to report the one entry that is not an actual abbreviation. (Hint: use the ! operator, which turns FALSE into TRUE and vice versa, then which to obtain an index.)

abbreviations_check <- c("MA", "ME", "MI", "MO", "MU")
abbreviations_exist <- abbreviations_check %in% murders$abb
index_not_abbreviation <- which(!abbreviations_exist)
if (length(index_not_abbreviation) == 0) {
  print("All entries are actual abbreviations.")
} else {
  print("The entry that is not an actual abbreviation:")
  print(abbreviations_check[index_not_abbreviation])
}
## [1] "The entry that is not an actual abbreviation:"
## [1] "MU"