#Data Frame

BS5th<-data.frame(name=c("Ali","Ahmad","Sania","Sara","Adil","Yousaf","Subhan","Arbaz","Asad","Hassan","Waleed"),cgpa=c(2.5,3.2,3.9,2.99,3.10,2.99,2.91,2.87,3.2,3.52,2.50),grade=c("D","B","A","C","B","A","B","B","B","A","D"))
BS5th
##      name cgpa grade
## 1     Ali 2.50     D
## 2   Ahmad 3.20     B
## 3   Sania 3.90     A
## 4    Sara 2.99     C
## 5    Adil 3.10     B
## 6  Yousaf 2.99     A
## 7  Subhan 2.91     B
## 8   Arbaz 2.87     B
## 9    Asad 3.20     B
## 10 Hassan 3.52     A
## 11 Waleed 2.50     D

Question 1

Which students have CGPA greater or equal to 3.10

ind<-BS5th$cgpa>=3.10
BS5th$name[ind]
## [1] "Ahmad"  "Sania"  "Adil"   "Asad"   "Hassan"

Question 2

Which student got B grade

ind<-BS5th$grade=="B"
BS5th$name[ind]
## [1] "Ahmad"  "Adil"   "Subhan" "Arbaz"  "Asad"

Question 3

Which students got CGPA>3 and got A grade

cgpa.ind<-BS5th$cgpa >3
grade.ind<-BS5th$grade=="A"
BS5th$name[cgpa.ind & grade.ind]
## [1] "Sania"  "Hassan"

Question 4

Which students got cgpa >3 or got A grade

cgpa.ind<-BS5th$cgpa >3
grade.ind<-BS5th$grade=="A"
BS5th$name[cgpa.ind | grade.ind]
## [1] "Ahmad"  "Sania"  "Adil"   "Yousaf" "Asad"   "Hassan"

EXERCISE 3.15

Loading library

library(dslabs)

Loading data

data(murders)

Question 1

Compute the per 100,000 murder rate for each state and store it in an object called murder_rate. Then use logical operators to create a logical vector named low that tells us which entries of murder_rate are lower than 1.

# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low<-murders_rate<1
print(low)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [13]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE  TRUE

Question 2

Now use the results from the previous exercise and the function which to determine the indices of murder_rate associated with values lower than 1.

#Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low<-murders_rate < 1
print(low)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [13]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE  TRUE
low_indices<-which(low)
print(low_indices)
##  [1] 12 13 16 20 24 30 35 38 42 45 46 51

Question 3

Now extend the code from exercise 2 and 3 to report the states in the Northeast with murder rates lower than 1. Hint: use the previously defined logical vector low and the logical operator &.

# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low.ind<-murders_rate<1
location.ind<-(murders$region=="Northeast")
# Find indices of states that satisfy both conditions (rates lower than 1 and in the Northeast)
low_northeast_indices <- which(low.ind & location.ind)
# Report the states in the Northeast with murder rates lower than 1
states_low_northeast <-murders$state[low_northeast_indices]
# Display the results
print(states_low_northeast)
## [1] "Maine"         "New Hampshire" "Vermont"

Question 4

In a previous exercise we computed the murder rate for each state and the average of these numbers.How many states are below the average?

# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# Compute the average murder rate for the US
average_murder_rate <- mean(murders$total)
low<-murders_rate<average_murder_rate
print(low)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE

Question 5

Use the results from the previous exercise to report the names of the states with murder rates lower than 1.

# Calculate the murder rate per 100,000 for each state
murders_rate <- (murders$total / murders$population) * 100000
# make object low to apply condition
low.ind<-murders_rate<1
print(low.ind)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [13]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
## [25] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [37] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE  TRUE
murders$state[low.ind]
##  [1] "Hawaii"        "Idaho"         "Iowa"          "Maine"        
##  [5] "Minnesota"     "New Hampshire" "North Dakota"  "Oregon"       
##  [9] "South Dakota"  "Utah"          "Vermont"       "Wyoming"

Question 6

Use the match function to identify the states with abbreviations AK, MI, and IA. Hint: start by defining an index of the entries of murders$abb that match the three abbreviations, then use the [ operator to extract the states.

ind<-match(c("AK","MI","IA") ,murders$abb)
ind
## [1]  2 23 16

Question 7

Use the %in% operator to create a logical vector that answers the question: which of the following are actual abbreviations: MA, ME, MI, MO, MU

c( "MA","ME","MI","MO", "MU") %in% murders$abb
## [1]  TRUE  TRUE  TRUE  TRUE FALSE

Question 8

Extend the code you used in exercise 7 to report the one entry that is not an actual abbreviation. Hint: use the ! operator, which turns FALSE into TRUE and vice versa, then which to obtain an index.

 ! c( "MA","ME","MI","MO", "MU") %in% murders$abb
## [1] FALSE FALSE FALSE FALSE  TRUE