Class Work

Creating a dataframe and entering values in it

BSDS <- data.frame(name = c("Ali","Ahmad","Sania","Sara","Adil","Wajahat","Subhan","Abuzar","Asad","Hassan","Waleed"),cgpa=c(2.5,3.2,3.9,2.99,3.10,2.99,2.91,2.87,3.2,3.5,2.50),grade=c("D","B","A","C","B","A","B","B","B","A","D"))
BSDS
##       name cgpa grade
## 1      Ali 2.50     D
## 2    Ahmad 3.20     B
## 3    Sania 3.90     A
## 4     Sara 2.99     C
## 5     Adil 3.10     B
## 6  Wajahat 2.99     A
## 7   Subhan 2.91     B
## 8   Abuzar 2.87     B
## 9     Asad 3.20     B
## 10  Hassan 3.50     A
## 11  Waleed 2.50     D

Q1-Which students have CGPA greater equals to 3.10?

BSDS$name[BSDS$cgpa>=3.10]
## [1] "Ahmad"  "Sania"  "Adil"   "Asad"   "Hassan"

Q2-Which students have got B grade?

BSDS$name[BSDS$grade =="B"]
## [1] "Ahmad"  "Adil"   "Subhan" "Abuzar" "Asad"

Q3-Which students have cgpa more than 3.10 & grade “A”?

BSDS$name[BSDS$grade =="A"& BSDS$cgpa>3.10]
## [1] "Sania"  "Hassan"

Exercise 3.15

Importing Library named ‘dslabs’

library(dslabs)

Loading Dataset

data(murders)

Q-01 Compute the per 100,000 murder rate for each state and store it in an object called murder_rate. Then use logical operators to create a logical vector named low that tells us which entries of murder_rate are lower than 1.

# Compute the murder rate per 100,000 for each state
murder_rate <- murders$total / (murders$population * 100000)

# Create a logical vector 'low' for rates lower than 1
low <- murder_rate < 1

# Print the logical vector
print(low)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE

Q-02 Now use the results from the previous exercise and the function which to determine the indices of murder_rate associated with values lower than 1.

# Compute the murder rate per 100,000 for each state
murder_rate <- murders$total / (murders$population*100000)

# Create a logical vector 'low' for rates lower than 1
low <- murder_rate < 1

# Use the which() function to determine the indices of values lower than 1
ind_low <- which(low)

# Print the indices associated with values lower than 1
print(ind_low)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51

Q-03 Use the results from the previous exercise to report the names of the states with murder rates lower than 1.

# Compute the murder rate per 100,000 for each state
murder_rate <- murders$total / (murders$population*100000)

# Create a logical vector 'low' for rates lower than 1
low <- murder_rate < 1

# Use the which() function to determine the indices of values lower than 1
ind_low <- which(low)

# Report the names of the states with murder rates lower than 1
low_murders_state <- murders$state[ind_low]

# Print the names of the states
print(low_murders_state)
##  [1] "Alabama"              "Alaska"               "Arizona"             
##  [4] "Arkansas"             "California"           "Colorado"            
##  [7] "Connecticut"          "Delaware"             "District of Columbia"
## [10] "Florida"              "Georgia"              "Hawaii"              
## [13] "Idaho"                "Illinois"             "Indiana"             
## [16] "Iowa"                 "Kansas"               "Kentucky"            
## [19] "Louisiana"            "Maine"                "Maryland"            
## [22] "Massachusetts"        "Michigan"             "Minnesota"           
## [25] "Mississippi"          "Missouri"             "Montana"             
## [28] "Nebraska"             "Nevada"               "New Hampshire"       
## [31] "New Jersey"           "New Mexico"           "New York"            
## [34] "North Carolina"       "North Dakota"         "Ohio"                
## [37] "Oklahoma"             "Oregon"               "Pennsylvania"        
## [40] "Rhode Island"         "South Carolina"       "South Dakota"        
## [43] "Tennessee"            "Texas"                "Utah"                
## [46] "Vermont"              "Virginia"             "Washington"          
## [49] "West Virginia"        "Wisconsin"            "Wyoming"

Q-04 Now extend the code from exercise 2 and 3 to report the states in the Northeast with murder rates lower than 1. Hint: use the previously defned logical vector low and the logical operator &.

# Compute the murder rate per 100,000 for each state
murder_rate <- murders$total / (murders$population*100000)

# Create a logical vector 'low' for rates lower than 1
low <- murder_rate < 1

# Determine the states in the Northeast
northeast_states <- murders$region == "Northeast"

# Combine the logical vectors to find states with low murder rates in the Northeast
states_low_murder_rate_in_northeast <- murders$state[low & northeast_states]

# Print the names of the states in the Northeast with murder rates lower than 1
print(states_low_murder_rate_in_northeast)
## [1] "Connecticut"   "Maine"         "Massachusetts" "New Hampshire"
## [5] "New Jersey"    "New York"      "Pennsylvania"  "Rhode Island" 
## [9] "Vermont"

Q-05 In a previous exercise we computed the murder rate for each state and the average of these numbers. How many states are below the average?

# Compute the murder rate per 100,000 for each state
murder_rate <- murders$total / (murders$population*100000)

# Compute the average murder rate for all states
average_murder_rate <- mean(murder_rate, na.rm = TRUE)

# Count the number of states with murder rates below the average
states_below_average <- sum(murder_rate < average_murder_rate)

# Print the number of states below the average
print(states_below_average)
## [1] 27

Q-06 Use the match function to identify the states with abbreviations AK, MI, and IA. Hint: start by defining an index of the entries of murders$abb that match the three abbreviations, then use the [ operator to extract the states.

# Abbreviations to match
abbreviations_to_match <- c("AK", "MI", "IA")

# Use the match function to find the indices of matching abbreviations
matching_indices <- match(abbreviations_to_match, murders$abb)

# Extract the states with matching abbreviations
matching_states <- murders$state[matching_indices]

# Print the states with matching abbreviations
print(matching_states)
## [1] "Alaska"   "Michigan" "Iowa"

Q-07 Use the %in% operator to create a logical vector that answers the question: which of the following are actual abbreviations: MA, ME, MI, MO, MU ?

# Abbreviations to check
abbreviation <- c("MA", "ME", "MI", "MO", "MU")

# Create a logical vector to check if the abbreviations are in the dataset
abbreviations_exist <- abbreviation %in% murders$abb

# Print the logical vector
print(abbreviations_exist)
## [1]  TRUE  TRUE  TRUE  TRUE FALSE

Q-08 Extend the code you used in exercise 7 to report the one entry that is not an actual abbreviation. Hint: use the ! operator, which turns FALSE into TRUE and vice versa, then which to obtain an index.

# Abbreviations to check
abbreviations_to_check <- c("MA", "ME", "MI", "MO", "MU")

# Create a logical vector to check if the abbreviations are in the dataset
abbreviations_exist <- abbreviations_to_check %in% murders$abb

# Use the ! operator to find the entry that is not an actual abbreviation
index_not_abbreviation <- which(!abbreviations_exist)

# Print the entry that is not an actual abbreviation
print(abbreviations_to_check[index_not_abbreviation])
## [1] "MU"