#library

library(dslabs)
data(murders)
str(murders)

## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...

#Exerise 3.15
#question.1
# Sample murder data (replace this with your actual data)

state <- c("State1", "State2", "State3", "State4", "State5")
print(state)

## [1] "State1" "State2" "State3" "State4" "State5"

murder_count <- c(50, 30, 15, 5, 2)
print(murder_count)

## [1] 50 30 15  5  2

population <- c(1000000, 1500000, 500000, 2000000, 800000)
print(population)

## [1] 1000000 1500000  500000 2000000  800000

Create a data frame

murder_data <- data.frame(State = state, Murder = murder_count, Population = population)
print(murder_data)

##    State Murder Population
## 1 State1     50    1000000
## 2 State2     30    1500000
## 3 State3     15     500000
## 4 State4      5    2000000
## 5 State5      2     800000

Calculate murder rate per 100,000 people

murder_data$MurderRate <- (murder_data$Murder / murder_data$Population) * 100000
murder_data$MurderRate

## [1] 5.00 2.00 3.00 0.25 0.25

Create a logical vector to identify entries with rates lower than 1

low <- murder_data$MurderRate < 1
low

## [1] FALSE FALSE FALSE  TRUE  TRUE

View the murder rates and “low” vector

murder_data

##    State Murder Population MurderRate
## 1 State1     50    1000000       5.00
## 2 State2     30    1500000       2.00
## 3 State3     15     500000       3.00
## 4 State4      5    2000000       0.25
## 5 State5      2     800000       0.25

low

## [1] FALSE FALSE FALSE  TRUE  TRUE

#2. Now use the results from the previous exercise and the function which to determine the indices of murder_rate associated with values lower than 1.

Create a logical vector to identify entries with rates lower than 1

low <- murder_data$MurderRate < 1
low

## [1] FALSE FALSE FALSE  TRUE  TRUE

Use the `which` function to find the indices

low_indices <- which(low)
low_indices

## [1] 4 5

#3. Use the results from the previous exercise to report the names of the states with murder rates lower than 1.

low_states <- murder_data$State[low_indices]
low_states

## [1] "State4" "State5"

cat("States with murder rates lower than 1 per 100,000 people:", paste(low_states, collapse = ", "))

## States with murder rates lower than 1 per 100,000 people: State4, State5

#4. Now extend the code from exercise 2 and 3 to report the states in the Northeast with murder rates lower than 1. Hint: use the previously defned logical vector low and the logical operator &.

northeast <- murder_data$Northeast
northeast

## NULL

Use the `which` function to find the indices of states in the Northeast with low murder rates

northeast_low_indices <- which(low & northeast)
northeast_low_indices

## integer(0)

Report the names of states in the Northeast with murder rates lower than 1

northeast_low_states <- murder_data$State[northeast_low_indices]
northeast_low_states

## character(0)

#5. In a previous exercise we computed the murder rate for each state and the average of these numbers. How many states are below the average.

Calculate the average murder rate

average_murder_rate <- mean(murder_data$MurderRate)
average_murder_rate

## [1] 2.1

Create a logical vector to identify states with rates below the average

below_average <- murder_data$MurderRate < average_murder_rate
below_average

## [1] FALSE  TRUE FALSE  TRUE  TRUE

Use the `sum` function to count the number of states below the average

states_below_average <- sum(below_average)
states_below_average

## [1] 3

#cat(“Number of states with murder rates below the average:”, states_below_average)

#6. Use the match function to identify the states with abbreviations AK, MI, and IA. Hint: start by defning an index of the entries of murders$abb that match the three abbreviations, then use the [ operator to extract the states.

Sample murder data (replace this with your actual data)

state <- c("Alaska", "Michigan", "Iowa", "New York", "California")
state

## [1] "Alaska"     "Michigan"   "Iowa"       "New York"   "California"

abb <- c("AK", "MI", "IA", "NY", "CA")
abb

## [1] "AK" "MI" "IA" "NY" "CA"

Define a vector of abbreviations to match

abbreviations_to_match <- c("AK", "MI", "IA")
abbreviations_to_match

## [1] "AK" "MI" "IA"

Use the match function to identify the indices of matching abbreviations

matching_indices <- match(abbreviations_to_match,murder_data$Abbreviation)

matching_indices

## [1] NA NA NA

Extract the states with matching abbreviations

matching_states <- murder_data$State[matching_indices]
matching_states

## [1] NA NA NA

#cat(“States with abbreviations AK, MI, and IA:”, paste(matching_states, collapse = “,”)) #7. Use the %in% operator to create a logical vector that answers the question: which of the following are actual abbreviations: MA, ME, MI, MO, MU ?

Vector of known abbreviations

known_abbreviations <- c("MA", "ME", "MI", "MO", "MU")
known_abbreviations

## [1] "MA" "ME" "MI" "MO" "MU"

Vector of abbreviations to check

abbreviations_to_check <- c("MA", "ME", "MI", "MO", "MU")
abbreviations_to_check

## [1] "MA" "ME" "MI" "MO" "MU"

Create a logical vector to check if the abbreviations are in the list of known abbreviations

are_actual_abbreviations <- abbreviations_to_check %in% known_abbreviations
are_actual_abbreviations

## [1] TRUE TRUE TRUE TRUE TRUE

#8. Extend the code you used in exercise 7 to report the one entry that is not an actual abbreviation. Hint: use the ! operator, which turns FALSE into TRUE and vice versa, then which to obtain an index. # Use the ! operator to invert the logical vector

not_actual_abbreviation_index <- which(!are_actual_abbreviations)
not_actual_abbreviation_index

## integer(0)

Get the entry that is not an actual abbreviation

not_actual_abbreviation <-abbreviations_to_check[not_actual_abbreviation_index]
not_actual_abbreviation_index

## integer(0)

#cat(“The entry that is not an actual abbreviation is:”, not_actual_abbreviation) #data frame

BS_5th<-data.frame(name<-c("ali","ahmad","sania","sara","adil","sharjeel","subhan","arbaz","athar","hessan","waleed"),CGPA<-c(2.5,3.2,3.9,2.9,3.10,2.99,2.19,2.87,3.2,3.53,2.5),Grade<-c("D","B","A","C","B","A","B","B","B","A","D"))
print(BS_5th)

##    name....c..ali....ahmad....sania....sara....adil....sharjeel...
## 1                                                              ali
## 2                                                            ahmad
## 3                                                            sania
## 4                                                             sara
## 5                                                             adil
## 6                                                         sharjeel
## 7                                                           subhan
## 8                                                            arbaz
## 9                                                            athar
## 10                                                          hessan
## 11                                                          waleed
##    CGPA....c.2.5..3.2..3.9..2.9..3.1..2.99..2.19..2.87..3.2..3.53..
## 1                                                              2.50
## 2                                                              3.20
## 3                                                              3.90
## 4                                                              2.90
## 5                                                              3.10
## 6                                                              2.99
## 7                                                              2.19
## 8                                                              2.87
## 9                                                              3.20
## 10                                                             3.53
## 11                                                             2.50
##    Grade....c..D....B....A....C....B....A....B....B....B....A...
## 1                                                              D
## 2                                                              B
## 3                                                              A
## 4                                                              C
## 5                                                              B
## 6                                                              A
## 7                                                              B
## 8                                                              B
## 9                                                              B
## 10                                                             A
## 11                                                             D

class(BS_5th)

## [1] "data.frame"

#which student have CGPA greater and equal 3.2

ind <- BS_5th$CGPA>=3.2
BS_5th$CGPA[ind]

## [1] 3.20 3.90 3.20 3.53

print(ind)

##  [1] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE

#which get a ’B’grade

ind <- BS_5th$Grade == "B"
BS_5th$Grade[ind]

## [1] "B" "B" "B" "B" "B"

print(ind)

##  [1] FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE

AND and OR operators CGPA and Grade

CGPA_ind<- BS_5th$CGPA>=3.2
Grade_ind<- BS_5th$Grade=="B"
BS_5th$name [CGPA_ind&Grade_ind]

## [1] "ahmad" "athar"

BS_5th$name [CGPA_ind|Grade_ind]

## [1] "ahmad"  "sania"  "adil"   "subhan" "arbaz"  "athar"  "hessan"

execise 3.15 & subsetting with logicals

Tehreem Azhar 04

2023-10-24

Create a data frame

Calculate murder rate per 100,000 people

Create a logical vector to identify entries with rates lower than 1

View the murder rates and “low” vector

Create a logical vector to identify entries with rates lower than 1

Use the `which` function to find the indices

Use the `which` function to find the indices of states in the Northeast with low murder rates

Report the names of states in the Northeast with murder rates lower than 1

Calculate the average murder rate

Create a logical vector to identify states with rates below the average

Use the `sum` function to count the number of states below the average

Sample murder data (replace this with your actual data)

Define a vector of abbreviations to match

Use the match function to identify the indices of matching abbreviations

Extract the states with matching abbreviations

Vector of known abbreviations

Vector of abbreviations to check

Create a logical vector to check if the abbreviations are in the list of known abbreviations

Get the entry that is not an actual abbreviation

AND and OR operators CGPA and Grade

execise 3.15 & subsetting with logicals

Tehreem Azhar 04

2023-10-24

Create a data frame

Calculate murder rate per 100,000 people

Create a logical vector to identify entries with rates lower than 1

View the murder rates and “low” vector

Create a logical vector to identify entries with rates lower than 1

Use the which function to find the indices

Use the which function to find the indices of states in the Northeast with low murder rates

Report the names of states in the Northeast with murder rates lower than 1

Calculate the average murder rate

Create a logical vector to identify states with rates below the average

Use the sum function to count the number of states below the average

Sample murder data (replace this with your actual data)

Define a vector of abbreviations to match

Use the match function to identify the indices of matching abbreviations

Extract the states with matching abbreviations

Vector of known abbreviations

Vector of abbreviations to check

Create a logical vector to check if the abbreviations are in the list of known abbreviations

Get the entry that is not an actual abbreviation

AND and OR operators CGPA and Grade

Use the `which` function to find the indices

Use the `which` function to find the indices of states in the Northeast with low murder rates

Use the `sum` function to count the number of states below the average