*********** Exercise = 3.11 ***********

Load DSLABS Library & Call the Murders Data :-

library(dslabs) 
data(murders)

*********** QUESTION = 1 ***********

1: Use the $ operator to access the population size data and store it as the object pop. Then use the sort function to redefine pop so that it is sorted. Finally, use the [ operator to report the smallest population size
pop <- murders$population
pop <- sort(pop)
pop[1]  # The smallest population size
## [1] 563626

*********** QUESTION = 2 ***********

2: Now instead of the smallest population size, fnd the index of the entry with the smallest population size. Hint: use order instead of sort.
smallest_index <- order(pop)[1]
smallest_index
## [1] 1

*********** QUESTION = 3 ***********

3:We can actually perform the same operation as in the previous exercise using the function which.min. Write one line of code that does this
# Find the index of the smallest population size using which.min
smallest_index <- which.min(pop)
smallest_index
## [1] 1

*********** QUESTION = 4 ***********

4:Now we know how small the smallest state is and we know which row represents it. Which state is it? Define a variable states to be the state names from the murders data frame. Report the name of the state with the smallest population.
# Define a variable 'states' to store the state names
states <- murders$state

# Report the name of the state with the smallest population
states[smallest_index]
## [1] "Alabama"

*********** QUESTION = 5 ***********

5: You can create a data frame using the data.frame function. Here is a quick example:

temp <- c(35, 88, 42, 84, 81, 30) city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto") city_temps <- data.frame(name = city, temperature = temp) Use the rank function to determine the population rank of each state from smallest population size to biggest. Save these ranks in an object called ranks, then create a data frame with the state name and its rank. Call the data frame my_df

# Create a data frame with state names and their populations
state_populations <- data.frame(state = murders$state, population = murders$population)

# Use the 'rank' function to determine the population rank
ranks <- rank(state_populations$population)

# Create a data frame with state names and their ranks
my_df <- data.frame(state = state_populations$state, rank = ranks)

# View the resulting data frame
my_df
##                   state rank
## 1               Alabama   29
## 2                Alaska    5
## 3               Arizona   36
## 4              Arkansas   20
## 5            California   51
## 6              Colorado   30
## 7           Connecticut   23
## 8              Delaware    7
## 9  District of Columbia    2
## 10              Florida   49
## 11              Georgia   44
## 12               Hawaii   12
## 13                Idaho   13
## 14             Illinois   47
## 15              Indiana   37
## 16                 Iowa   22
## 17               Kansas   19
## 18             Kentucky   26
## 19            Louisiana   27
## 20                Maine   11
## 21             Maryland   33
## 22        Massachusetts   38
## 23             Michigan   43
## 24            Minnesota   31
## 25          Mississippi   21
## 26             Missouri   34
## 27              Montana    8
## 28             Nebraska   14
## 29               Nevada   17
## 30        New Hampshire   10
## 31           New Jersey   41
## 32           New Mexico   16
## 33             New York   48
## 34       North Carolina   42
## 35         North Dakota    4
## 36                 Ohio   45
## 37             Oklahoma   24
## 38               Oregon   25
## 39         Pennsylvania   46
## 40         Rhode Island    9
## 41       South Carolina   28
## 42         South Dakota    6
## 43            Tennessee   35
## 44                Texas   50
## 45                 Utah   18
## 46              Vermont    3
## 47             Virginia   40
## 48           Washington   39
## 49        West Virginia   15
## 50            Wisconsin   32
## 51              Wyoming    1

*********** QUESTION = 6 ***********

6 : Repeat the previous exercise, but this time order my_df so that the states are ordered from least populous to most populous. Hint: create an object ind that stores the indexes needed to order the population values. Then use the bracket operator [ to re-order each column in the data frame
# Create an object 'ind' to store the indexes needed to order the population values
ind <- order(ranks)

# Re-order each column in the data frame using the 'ind' vector
my_df <- my_df[ind, ]
my_df
##                   state rank
## 51              Wyoming    1
## 9  District of Columbia    2
## 46              Vermont    3
## 35         North Dakota    4
## 2                Alaska    5
## 42         South Dakota    6
## 8              Delaware    7
## 27              Montana    8
## 40         Rhode Island    9
## 30        New Hampshire   10
## 20                Maine   11
## 12               Hawaii   12
## 13                Idaho   13
## 28             Nebraska   14
## 49        West Virginia   15
## 32           New Mexico   16
## 29               Nevada   17
## 45                 Utah   18
## 17               Kansas   19
## 4              Arkansas   20
## 25          Mississippi   21
## 16                 Iowa   22
## 7           Connecticut   23
## 37             Oklahoma   24
## 38               Oregon   25
## 18             Kentucky   26
## 19            Louisiana   27
## 41       South Carolina   28
## 1               Alabama   29
## 6              Colorado   30
## 24            Minnesota   31
## 50            Wisconsin   32
## 21             Maryland   33
## 26             Missouri   34
## 43            Tennessee   35
## 3               Arizona   36
## 15              Indiana   37
## 22        Massachusetts   38
## 48           Washington   39
## 47             Virginia   40
## 31           New Jersey   41
## 34       North Carolina   42
## 23             Michigan   43
## 11              Georgia   44
## 36                 Ohio   45
## 39         Pennsylvania   46
## 14             Illinois   47
## 33             New York   48
## 10              Florida   49
## 44                Texas   50
## 5            California   51

*********** QUESTION = 7 ***********

7 : The is.na function returns a logical vector that tells us which entries are NA. Assign this logical vector to an object called ind and determine how many NAs does na_example have
data("na_example")
str(na_example)
##  int [1:1000] 2 1 3 2 1 3 1 4 3 2 ...
na_example
##    [1]  2  1  3  2  1  3  1  4  3  2  2 NA  2  2  1  4 NA  1  1  2  1  2  2  1
##   [25]  2  5 NA  2  2  3  1  2  4  1  1  1  4  5  2  3  4  1  2  4  1  1  2  1
##   [49]  5 NA NA NA  1  1  5  1  3  1 NA  4  4  7  3  2 NA NA  1 NA  4  1  2  2
##   [73]  3  2  1  2  2  4  3  4  2  3  1  3  2  1  1  1  3  1 NA  3  1  2  2  1
##   [97]  2  2  1  1  4  1  1  2  3  3  2  2  3  3  3  4  1  1  1  2 NA  4  3  4
##  [121]  3  1  2  1 NA NA NA NA  1  5  1  2  1  3  5  3  2  2 NA NA NA NA  3  5
##  [145]  3  1  1  4  2  4  3  3 NA  2  3  2  6 NA  1  1  2  2  1  3  1  1  5 NA
##  [169] NA  2  4 NA  2  5  1  4  3  3 NA  4  3  1  4  1  1  3  1  1 NA NA  3  5
##  [193]  2  2  2  3  1  2  2  3  2  1 NA  2 NA  1 NA NA  2  1  1 NA  3 NA  1  2
##  [217]  2  1  3  2  2  1  1  2  3  1  1  1  4  3  4  2  2  1  4  1 NA  5  1  4
##  [241] NA  3 NA NA  1  1  5  2  3  3  2  4 NA  3  2  5 NA  2  3  4  6  2  2  2
##  [265] NA  2 NA  2 NA  3  3  2  2  4  3  1  4  2 NA  2  4 NA  6  2  3  1 NA  2
##  [289]  2 NA  1  1  3  2  3  3  1 NA  1  4  2  1  1  3  2  1  2  3  1 NA  2  3
##  [313]  3  2  1  2  3  5  5  1  2  3  3  1 NA NA  1  2  4 NA  2  1  1  1  3  2
##  [337]  1  1  3  4 NA  1  2  1  1  3  3 NA  1  1  3  5  3  2  3  4  1  4  3  1
##  [361] NA  2  1  2  2  1  2  2  6  1  2  4  5 NA  3  4  2  1  1  4  2  1  1  1
##  [385]  1  2  1  4  4  1  3 NA  3  3 NA  2 NA  1  2  1  1  4  2  1  4  4 NA  1
##  [409]  2 NA  3  2  2  2  1  4  3  6  1  2  3  1  3  2  2  2  1  1  3  2  1  1
##  [433]  1  3  2  2 NA  4  4  4  1  1 NA  4  3 NA  1  3  1  3  2  4  2  2  2  3
##  [457]  2  1  4  3 NA  1  4  3  1  3  2 NA  3 NA  1  3  1  4  1  1  1  2  4  3
##  [481]  1  2  2  2  3  2  3  1  1 NA  3  2  1  1  2 NA  2  2  2  3  3  1  1  2
##  [505] NA  1  2  1  1  3  3  1  3  1  1  1  1  1  2  5  1  1  2  2  1  1 NA  1
##  [529]  4  1  2  4  1  3  2 NA  1  1 NA  2  1  1  4  2  3  3  1  5  3  1  1  2
##  [553] NA  1  1  3  1  3  2  4 NA  2  3  2  1  2  1  1  1  2  2  3  1  5  2 NA
##  [577]  2 NA  3  2  2  2  1  5  3  2  3  1 NA  3  1  2  2  2  1  2  2  4 NA  6
##  [601]  1  2 NA  1  1  2  2  3 NA  3  2  3  3  4  2 NA  2 NA  4 NA  1  1  2  2
##  [625]  3  1  1  1  3 NA  2  5 NA  7  1 NA  4  3  3  1 NA  1  1  1  1  3  2  4
##  [649]  2  2  3 NA NA  1  4  3  2  2  2  3  2  4  2  2  4 NA NA NA  6  3  3  1
##  [673]  4  4  2  1 NA  1  6 NA  3  3  2  1  1  6 NA  1  5  1 NA  2  6  2 NA  4
##  [697]  1  3  1  2 NA  1  1  3  1  2  4  2  1  3  2  4  3  2  2  1  1  5  6  4
##  [721]  2  2  2  2  4 NA  1  2  2  2  2  4  5 NA NA NA  4  3  3  3  2  4  2  4
##  [745] NA NA NA NA  2  1 NA  2  4  3  2 NA  2  3  1  3  4 NA  1  2  1  2 NA  3
##  [769]  1  2  1  2  1  2  1  2  2  2  2  1  1  3  3  1  3  4  3 NA NA  4  2  3
##  [793]  2  1  3  2  4  2  2  3  1  2  4  3  3  4 NA  1  4  2  1  1  1  3  1  5
##  [817]  2  2  4  2 NA  1  3  1  2 NA  1  2  1  2  1 NA  1  3  2  3  2 NA  2  1
##  [841]  4  2 NA NA NA  2  4  2 NA NA  3  1 NA  5  5  2  2  2 NA  2  1  3  1  3
##  [865]  2  4  2  4 NA  4  1  2  3  2  3  3  2  3  2  2  2  1  3  2  4  2 NA  3
##  [889]  3  2  2 NA NA  3  2  1  2  4  1  1  1  1  4  3  2 NA  3  2 NA  1 NA  3
##  [913]  2  1  1  1  2 NA  2  2  3  3  2 NA NA  4  5  2  2  2  1  2  3  1  3  3
##  [937]  4  3 NA  1  1  1 NA  4  3  5  1  1  2 NA  2  2  2  2  5  2  2  3  1  2
##  [961]  3 NA  1  2 NA NA  2 NA  3  1  1  2  5  3  5  1  1  4 NA  2  1  3  1  1
##  [985]  2  4  3  3  3 NA  1  1  2  2  1  1  2  2 NA  2
# Find the number of NAs
ind <- is.na(na_example)
sum(ind)
## [1] 145

*********** QUESTION = 8 ***********

8 : Now compute the average again, but only for the entries that are not NA. Hint: remember the ! operator
# Compute the average for non-NA entries
sum(na_example[!ind])
## [1] 1968
mean(na_example[!ind])
## [1] 2.301754

*********** Exercise = 3.13 ***********

*********** QUESTION = 1 ***********

1 : Remake the data frame using the code above, but add a line that converts the temperature from Fahrenheit to Celsius. The conversion is C = 5/9 × (F − 32).
# Temperature data in Fahrenheit
temp <- c(35, 88, 42, 84, 81, 30)
city <- c("Beijing", "Lagos", "Paris", "Rio de Janeiro", "San Juan", "Toronto")

# Convert Fahrenheit to Celsius
temp_celsius <- (5/9) * (temp - 32)

# Create the data frame with city names and temperatures in Celsius
city_temps <- data.frame(name = city, temperature_celsius = temp_celsius)

# View the resulting data frame
city_temps
##             name temperature_celsius
## 1        Beijing            1.666667
## 2          Lagos           31.111111
## 3          Paris            5.555556
## 4 Rio de Janeiro           28.888889
## 5       San Juan           27.222222
## 6        Toronto           -1.111111

*********** QUESTION = 2 ***********

2 : What is the following sum 1+1/22 + 1/32 + … 1/1002? Hint: thanks to Euler, we know it should be close to π2/6
# Calculate the sum of the series
result <- sum(1 / (1:21)^2)

# Display the result
result
## [1] 1.598431

*********** QUESTION = 3 ***********

3 : Compute the per 100,000 murder rate for each state and store it in the object murder_rate. Then compute the average murder rate for the US using the function mean. What is the average?
# Calculate the murder rate per 100,000 for each state
murder_rate <- (murders$murder / murders$population) * 100000

# Compute the average murder rate for the US
average_murder_rate <- mean(murder_rate)

# Display the average murder rate
average_murder_rate
## [1] NaN