#library

library(dslabs)
data(murders)
str(murders)
## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...
#Exerise 3.15
#question.1
# Sample murder data (replace this with your actual data)
state <- c("State1", "State2", "State3", "State4", "State5")
print(state)
## [1] "State1" "State2" "State3" "State4" "State5"
murder_count <- c(50, 30, 15, 5, 2)
print(murder_count)
## [1] 50 30 15  5  2
population <- c(1000000, 1500000, 500000, 2000000, 800000)
print(population)
## [1] 1000000 1500000  500000 2000000  800000

Create a data frame

murder_data <- data.frame(State = state, Murder = murder_count, Population = population)
print(murder_data)
##    State Murder Population
## 1 State1     50    1000000
## 2 State2     30    1500000
## 3 State3     15     500000
## 4 State4      5    2000000
## 5 State5      2     800000

Calculate murder rate per 100,000 people

murder_data$MurderRate <- (murder_data$Murder / murder_data$Population) * 100000
murder_data$MurderRate 
## [1] 5.00 2.00 3.00 0.25 0.25

Create a logical vector to identify entries with rates lower than 1

low <- murder_data$MurderRate < 1
low
## [1] FALSE FALSE FALSE  TRUE  TRUE

View the murder rates and “low” vector

murder_data
##    State Murder Population MurderRate
## 1 State1     50    1000000       5.00
## 2 State2     30    1500000       2.00
## 3 State3     15     500000       3.00
## 4 State4      5    2000000       0.25
## 5 State5      2     800000       0.25
low
## [1] FALSE FALSE FALSE  TRUE  TRUE

#2. Now use the results from the previous exercise and the function which to determine the indices of murder_rate associated with values lower than 1.

Create a logical vector to identify entries with rates lower than 1

low <- murder_data$MurderRate < 1
low
## [1] FALSE FALSE FALSE  TRUE  TRUE

Use the which function to find the indices

low_indices <- which(low)
low_indices
## [1] 4 5

#3. Use the results from the previous exercise to report the names of the states with murder rates lower than 1.

low_states <- murder_data$State[low_indices]
low_states
## [1] "State4" "State5"
cat("States with murder rates lower than 1 per 100,000 people:", paste(low_states, collapse = ", "))
## States with murder rates lower than 1 per 100,000 people: State4, State5

#4. Now extend the code from exercise 2 and 3 to report the states in the Northeast with murder rates lower than 1. Hint: use the previously defned logical vector low and the logical operator &.

northeast <- murder_data$Northeast
northeast
## NULL

Use the which function to find the indices of states in the Northeast with low murder rates

northeast_low_indices <- which(low & northeast)
northeast_low_indices
## integer(0)

Report the names of states in the Northeast with murder rates lower than 1

northeast_low_states <- murder_data$State[northeast_low_indices]
northeast_low_states
## character(0)

#5. In a previous exercise we computed the murder rate for each state and the average of these numbers. How many states are below the average.

Calculate the average murder rate

average_murder_rate <- mean(murder_data$MurderRate)
average_murder_rate
## [1] 2.1

Create a logical vector to identify states with rates below the average

below_average <- murder_data$MurderRate < average_murder_rate
below_average
## [1] FALSE  TRUE FALSE  TRUE  TRUE

Use the sum function to count the number of states below the average

states_below_average <- sum(below_average)
states_below_average
## [1] 3

#cat(“Number of states with murder rates below the average:”, states_below_average)

#6. Use the match function to identify the states with abbreviations AK, MI, and IA. Hint: start by defning an index of the entries of murders$abb that match the three abbreviations, then use the [ operator to extract the states.

Sample murder data (replace this with your actual data)

state <- c("Alaska", "Michigan", "Iowa", "New York", "California")
state
## [1] "Alaska"     "Michigan"   "Iowa"       "New York"   "California"
abb <- c("AK", "MI", "IA", "NY", "CA")
abb
## [1] "AK" "MI" "IA" "NY" "CA"

Define a vector of abbreviations to match

abbreviations_to_match <- c("AK", "MI", "IA")
abbreviations_to_match
## [1] "AK" "MI" "IA"

Use the match function to identify the indices of matching abbreviations

matching_indices <- match(abbreviations_to_match,murder_data$Abbreviation)

matching_indices
## [1] NA NA NA

Extract the states with matching abbreviations

matching_states <- murder_data$State[matching_indices]
matching_states
## [1] NA NA NA

#cat(“States with abbreviations AK, MI, and IA:”, paste(matching_states, collapse = “,”)) #7. Use the %in% operator to create a logical vector that answers the question: which of the following are actual abbreviations: MA, ME, MI, MO, MU ?

Vector of known abbreviations

known_abbreviations <- c("MA", "ME", "MI", "MO", "MU")
known_abbreviations 
## [1] "MA" "ME" "MI" "MO" "MU"

Vector of abbreviations to check

abbreviations_to_check <- c("MA", "ME", "MI", "MO", "MU")
abbreviations_to_check
## [1] "MA" "ME" "MI" "MO" "MU"

Create a logical vector to check if the abbreviations are in the list of known abbreviations

are_actual_abbreviations <- abbreviations_to_check %in% known_abbreviations
are_actual_abbreviations
## [1] TRUE TRUE TRUE TRUE TRUE

#8. Extend the code you used in exercise 7 to report the one entry that is not an actual abbreviation. Hint: use the ! operator, which turns FALSE into TRUE and vice versa, then which to obtain an index. # Use the ! operator to invert the logical vector

not_actual_abbreviation_index <- which(!are_actual_abbreviations)
not_actual_abbreviation_index
## integer(0)

Get the entry that is not an actual abbreviation

not_actual_abbreviation <-abbreviations_to_check[not_actual_abbreviation_index]
not_actual_abbreviation_index
## integer(0)

#cat(“The entry that is not an actual abbreviation is:”, not_actual_abbreviation) #data frame

BS_5th<-data.frame(name<-c("ali","ahmad","sania","sara","adil","sharjeel","subhan","arbaz","athar","hessan","waleed"),CGPA<-c(2.5,3.2,3.9,2.9,3.10,2.99,2.19,2.87,3.2,3.53,2.5),Grade<-c("D","B","A","C","B","A","B","B","B","A","D"))
print(BS_5th)
##    name....c..ali....ahmad....sania....sara....adil....sharjeel...
## 1                                                              ali
## 2                                                            ahmad
## 3                                                            sania
## 4                                                             sara
## 5                                                             adil
## 6                                                         sharjeel
## 7                                                           subhan
## 8                                                            arbaz
## 9                                                            athar
## 10                                                          hessan
## 11                                                          waleed
##    CGPA....c.2.5..3.2..3.9..2.9..3.1..2.99..2.19..2.87..3.2..3.53..
## 1                                                              2.50
## 2                                                              3.20
## 3                                                              3.90
## 4                                                              2.90
## 5                                                              3.10
## 6                                                              2.99
## 7                                                              2.19
## 8                                                              2.87
## 9                                                              3.20
## 10                                                             3.53
## 11                                                             2.50
##    Grade....c..D....B....A....C....B....A....B....B....B....A...
## 1                                                              D
## 2                                                              B
## 3                                                              A
## 4                                                              C
## 5                                                              B
## 6                                                              A
## 7                                                              B
## 8                                                              B
## 9                                                              B
## 10                                                             A
## 11                                                             D
class(BS_5th)
## [1] "data.frame"

#which student have CGPA greater and equal 3.2

ind <- BS_5th$CGPA>=3.2
BS_5th$CGPA[ind]
## [1] 3.20 3.90 3.20 3.53
print(ind)
##  [1] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE

#which get a ’B’grade

ind <- BS_5th$Grade == "B"
BS_5th$Grade[ind]
## [1] "B" "B" "B" "B" "B"
print(ind)
##  [1] FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE

AND and OR operators CGPA and Grade

CGPA_ind<- BS_5th$CGPA>=3.2
Grade_ind<- BS_5th$Grade=="B"
BS_5th$name [CGPA_ind&Grade_ind]
## [1] "ahmad" "athar"
BS_5th$name [CGPA_ind|Grade_ind]
## [1] "ahmad"  "sania"  "adil"   "subhan" "arbaz"  "athar"  "hessan"