#Data Frame
BS5th<-data.frame(name=c("Ali","Ahmad","Sania","Sara","Adil","Yousaf","Subhan","Arbaz","Asad","Hassan","Waleed"),cgpa=c(2.5,3.2,3.9,2.99,3.10,2.99,2.91,2.87,3.2,3.52,2.50),grade=c("D","B","A","C","B","A","B","B","B","A","D"))
BS5th
## name cgpa grade
## 1 Ali 2.50 D
## 2 Ahmad 3.20 B
## 3 Sania 3.90 A
## 4 Sara 2.99 C
## 5 Adil 3.10 B
## 6 Yousaf 2.99 A
## 7 Subhan 2.91 B
## 8 Arbaz 2.87 B
## 9 Asad 3.20 B
## 10 Hassan 3.52 A
## 11 Waleed 2.50 D
Question 1
Which students have CGPA greater or equal to 3.10
ind<-BS5th$cgpa>=3.10
BS5th$name[ind]
## [1] "Ahmad" "Sania" "Adil" "Asad" "Hassan"
Question 2
Which student got B grade
ind<-BS5th$grade=="B"
BS5th$name[ind]
## [1] "Ahmad" "Adil" "Subhan" "Arbaz" "Asad"
Question 3
Which students got CGPA>3 and got A grade
cgpa.ind<-BS5th$cgpa >3
grade.ind<-BS5th$grade=="A"
BS5th$name[cgpa.ind & grade.ind]
## [1] "Sania" "Hassan"
Question 4
Which students got cgpa >3 or got A grade
cgpa.ind<-BS5th$cgpa >3
grade.ind<-BS5th$grade=="A"
BS5th$name[cgpa.ind | grade.ind]
## [1] "Ahmad" "Sania" "Adil" "Yousaf" "Asad" "Hassan"
EXERCISE 3.15
Loading library
library(dslabs)
Loading data
data(murders)
Question 1
Compute the per 100,000 murder rate for each state and store it in
an object called murder_rate. Then use logical operators to create a
logical vector named low that tells us which entries of murder_rate are
lower than 1.
# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low<-murders_rate<1
print(low)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [13] TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## [25] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## [37] FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE TRUE
Question 2
Now use the results from the previous exercise and the function
which to determine the indices of murder_rate associated with values
lower than 1.
#Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low<-murders_rate < 1
print(low)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [13] TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## [25] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## [37] FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE TRUE
low_indices<-which(low)
print(low_indices)
## [1] 12 13 16 20 24 30 35 38 42 45 46 51
Question 3
Now extend the code from exercise 2 and 3 to report the states in
the Northeast with murder rates lower than 1. Hint: use the previously
defined logical vector low and the logical operator &.
# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low.ind<-murders_rate<1
location.ind<-(murders$region=="Northeast")
# Find indices of states that satisfy both conditions (rates lower than 1 and in the Northeast)
low_northeast_indices <- which(low.ind & location.ind)
# Report the states in the Northeast with murder rates lower than 1
states_low_northeast <-murders$state[low_northeast_indices]
# Display the results
print(states_low_northeast)
## [1] "Maine" "New Hampshire" "Vermont"
Question 4
In a previous exercise we computed the murder rate for each state
and the average of these numbers.How many states are below the
average?
# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# Compute the average murder rate for the US
average_murder_rate <- mean(murders$total)
low<-murders_rate<average_murder_rate
print(low)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE
Question 5
Use the results from the previous exercise to report the names of
the states with murder rates lower than 1.
# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low.ind<-murders_rate<1
print(low.ind)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [13] TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## [25] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## [37] FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE TRUE
murders$state[low.ind]
## [1] "Hawaii" "Idaho" "Iowa" "Maine"
## [5] "Minnesota" "New Hampshire" "North Dakota" "Oregon"
## [9] "South Dakota" "Utah" "Vermont" "Wyoming"
Question 6
Use the match function to identify the states with abbreviations AK,
MI, and IA. Hint: start by defining an index of the entries of
murders$abb that match the three abbreviations, then use the [ operator
to extract the states.
ind<-match(c("AK","MI","IA") ,murders$abb)
ind
## [1] 2 23 16
Question 7
Use the %in% operator to create a logical vector that answers the
question: which of the following are actual abbreviations: MA, ME, MI,
MO, MU
c( "MA","ME","MI","MO", "MU") %in% murders$abb
## [1] TRUE TRUE TRUE TRUE FALSE
Question 8
Extend the code you used in exercise 7 to report the one entry that
is not an actual abbreviation. Hint: use the ! operator, which turns
FALSE into TRUE and vice versa, then which to obtain an index.
! c( "MA","ME","MI","MO", "MU") %in% murders$abb
## [1] FALSE FALSE FALSE FALSE TRUE