#library
library(dslabs)
data(murders)
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
#Exerise 3.15
#question.1
# Sample murder data (replace this with your actual data)
state <- c("State1", "State2", "State3", "State4", "State5")
print(state)
## [1] "State1" "State2" "State3" "State4" "State5"
murder_count <- c(50, 30, 15, 5, 2)
print(murder_count)
## [1] 50 30 15 5 2
population <- c(1000000, 1500000, 500000, 2000000, 800000)
print(population)
## [1] 1000000 1500000 500000 2000000 800000
murder_data <- data.frame(State = state, Murder = murder_count, Population = population)
print(murder_data)
## State Murder Population
## 1 State1 50 1000000
## 2 State2 30 1500000
## 3 State3 15 500000
## 4 State4 5 2000000
## 5 State5 2 800000
murder_data$MurderRate <- (murder_data$Murder / murder_data$Population) * 100000
murder_data$MurderRate
## [1] 5.00 2.00 3.00 0.25 0.25
low <- murder_data$MurderRate < 1
low
## [1] FALSE FALSE FALSE TRUE TRUE
murder_data
## State Murder Population MurderRate
## 1 State1 50 1000000 5.00
## 2 State2 30 1500000 2.00
## 3 State3 15 500000 3.00
## 4 State4 5 2000000 0.25
## 5 State5 2 800000 0.25
low
## [1] FALSE FALSE FALSE TRUE TRUE
#2. Now use the results from the previous exercise and the function which to determine the indices of murder_rate associated with values lower than 1.
low <- murder_data$MurderRate < 1
low
## [1] FALSE FALSE FALSE TRUE TRUE
which function to find the indiceslow_indices <- which(low)
low_indices
## [1] 4 5
#3. Use the results from the previous exercise to report the names of the states with murder rates lower than 1.
low_states <- murder_data$State[low_indices]
low_states
## [1] "State4" "State5"
cat("States with murder rates lower than 1 per 100,000 people:", paste(low_states, collapse = ", "))
## States with murder rates lower than 1 per 100,000 people: State4, State5
#4. Now extend the code from exercise 2 and 3 to report the states in the Northeast with murder rates lower than 1. Hint: use the previously defned logical vector low and the logical operator &.
northeast <- murder_data$Northeast
northeast
## NULL
which function to find the indices of states in
the Northeast with low murder ratesnortheast_low_indices <- which(low & northeast)
northeast_low_indices
## integer(0)
northeast_low_states <- murder_data$State[northeast_low_indices]
northeast_low_states
## character(0)
#5. In a previous exercise we computed the murder rate for each state and the average of these numbers. How many states are below the average.
average_murder_rate <- mean(murder_data$MurderRate)
average_murder_rate
## [1] 2.1
below_average <- murder_data$MurderRate < average_murder_rate
below_average
## [1] FALSE TRUE FALSE TRUE TRUE
sum function to count the number of states
below the averagestates_below_average <- sum(below_average)
states_below_average
## [1] 3
#cat(“Number of states with murder rates below the average:”, states_below_average)
#6. Use the match function to identify the states with abbreviations AK, MI, and IA. Hint: start by defning an index of the entries of murders$abb that match the three abbreviations, then use the [ operator to extract the states.
state <- c("Alaska", "Michigan", "Iowa", "New York", "California")
state
## [1] "Alaska" "Michigan" "Iowa" "New York" "California"
abb <- c("AK", "MI", "IA", "NY", "CA")
abb
## [1] "AK" "MI" "IA" "NY" "CA"
abbreviations_to_match <- c("AK", "MI", "IA")
abbreviations_to_match
## [1] "AK" "MI" "IA"
matching_indices <- match(abbreviations_to_match,murder_data$Abbreviation)
matching_indices
## [1] NA NA NA
matching_states <- murder_data$State[matching_indices]
matching_states
## [1] NA NA NA
#cat(“States with abbreviations AK, MI, and IA:”, paste(matching_states, collapse = “,”)) #7. Use the %in% operator to create a logical vector that answers the question: which of the following are actual abbreviations: MA, ME, MI, MO, MU ?
known_abbreviations <- c("MA", "ME", "MI", "MO", "MU")
known_abbreviations
## [1] "MA" "ME" "MI" "MO" "MU"
abbreviations_to_check <- c("MA", "ME", "MI", "MO", "MU")
abbreviations_to_check
## [1] "MA" "ME" "MI" "MO" "MU"
are_actual_abbreviations <- abbreviations_to_check %in% known_abbreviations
are_actual_abbreviations
## [1] TRUE TRUE TRUE TRUE TRUE
#8. Extend the code you used in exercise 7 to report the one entry
that is not an actual abbreviation. Hint: use the ! operator, which
turns FALSE into TRUE and vice versa, then which to obtain an index. #
Use the ! operator to invert the logical vector
not_actual_abbreviation_index <- which(!are_actual_abbreviations)
not_actual_abbreviation_index
## integer(0)
not_actual_abbreviation <-abbreviations_to_check[not_actual_abbreviation_index]
not_actual_abbreviation_index
## integer(0)
#cat(“The entry that is not an actual abbreviation is:”, not_actual_abbreviation) #data frame
BS_5th<-data.frame(name<-c("ali","ahmad","sania","sara","adil","sharjeel","subhan","arbaz","athar","hessan","waleed"),CGPA<-c(2.5,3.2,3.9,2.9,3.10,2.99,2.19,2.87,3.2,3.53,2.5),Grade<-c("D","B","A","C","B","A","B","B","B","A","D"))
print(BS_5th)
## name....c..ali....ahmad....sania....sara....adil....sharjeel...
## 1 ali
## 2 ahmad
## 3 sania
## 4 sara
## 5 adil
## 6 sharjeel
## 7 subhan
## 8 arbaz
## 9 athar
## 10 hessan
## 11 waleed
## CGPA....c.2.5..3.2..3.9..2.9..3.1..2.99..2.19..2.87..3.2..3.53..
## 1 2.50
## 2 3.20
## 3 3.90
## 4 2.90
## 5 3.10
## 6 2.99
## 7 2.19
## 8 2.87
## 9 3.20
## 10 3.53
## 11 2.50
## Grade....c..D....B....A....C....B....A....B....B....B....A...
## 1 D
## 2 B
## 3 A
## 4 C
## 5 B
## 6 A
## 7 B
## 8 B
## 9 B
## 10 A
## 11 D
class(BS_5th)
## [1] "data.frame"
#which student have CGPA greater and equal 3.2
ind <- BS_5th$CGPA>=3.2
BS_5th$CGPA[ind]
## [1] 3.20 3.90 3.20 3.53
print(ind)
## [1] FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE
#which get a ’B’grade
ind <- BS_5th$Grade == "B"
BS_5th$Grade[ind]
## [1] "B" "B" "B" "B" "B"
print(ind)
## [1] FALSE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE
CGPA_ind<- BS_5th$CGPA>=3.2
Grade_ind<- BS_5th$Grade=="B"
BS_5th$name [CGPA_ind&Grade_ind]
## [1] "ahmad" "athar"
BS_5th$name [CGPA_ind|Grade_ind]
## [1] "ahmad" "sania" "adil" "subhan" "arbaz" "athar" "hessan"