library(nycflights13)
head(flights)
## # A tibble: 6 x 19
##    year month   day dep_time sched_dep_time dep_delay arr_time
##   <int> <int> <int>    <int>          <int>     <dbl>    <int>
## 1  2013     1     1      517            515         2      830
## 2  2013     1     1      533            529         4      850
## 3  2013     1     1      542            540         2      923
## 4  2013     1     1      544            545        -1     1004
## 5  2013     1     1      554            600        -6      812
## 6  2013     1     1      554            558        -4      740
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>

1. Write a function that takes a single numerical vector and returns three values, the minimum number, the median, and the maximum number of the vector. Test your function using the month column of the flights dataset.

MinMedMax <- function(x){
c(min(x, na.rm = TRUE), median(x, na.rm = TRUE), max(x, na.rm = TRUE))
}
 
MinMedMax(flights$month)
## [1]  1  7 12

1b. Explain your reasoning for choosing your function’s name:

Using a camel case and abbreviation of each value, I named the function to be MinMedMax for its clarity.

2. Write a function that categorizes a numerical variable in the flights data into four categories:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(nycflights13)

MT_period <- function(x, y) {
  x %>%
  mutate(period = case_when (499 < y & y < 1200 ~ "Morning",
                        1159 < y & y < 1700 ~ "Afternoon",
                        1659 < y & y < 2100 ~ "Evening",
                        2059 < y & y <= 2359 | 0000 <= y & y < 500 ~ "Night"))
  }
head(MT_period(flights, flights$dep_time))
## # A tibble: 6 x 20
##    year month   day dep_time sched_dep_time dep_delay arr_time
##   <int> <int> <int>    <int>          <int>     <dbl>    <int>
## 1  2013     1     1      517            515         2      830
## 2  2013     1     1      533            529         4      850
## 3  2013     1     1      542            540         2      923
## 4  2013     1     1      544            545        -1     1004
## 5  2013     1     1      554            600        -6      812
## 6  2013     1     1      554            558        -4      740
## # … with 13 more variables: sched_arr_time <int>, arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>, period <chr>

2b. Explain your reasoning for choosing your function’s name

Because the function is changing the military time (MT) to a certain period of a day, I named the function MT_period.

3. Write a function that calculates the median of all numeric variables in the flights dataset using a for loop.

num_flights <- flights[ ,sapply(flights,is.numeric)]
output <- vector("list", ncol(num_flights))
names(output) <-  names(num_flights)
get_median <- for (i in names(num_flights)) {            
  output[[i]] <- median(num_flights[[i]], na.rm = TRUE )      
}
output
## $year
## [1] 2013
## 
## $month
## [1] 7
## 
## $day
## [1] 16
## 
## $dep_time
## [1] 1401
## 
## $sched_dep_time
## [1] 1359
## 
## $dep_delay
## [1] -2
## 
## $arr_time
## [1] 1535
## 
## $sched_arr_time
## [1] 1556
## 
## $arr_delay
## [1] -5
## 
## $flight
## [1] 1496
## 
## $air_time
## [1] 129
## 
## $distance
## [1] 872
## 
## $hour
## [1] 13
## 
## $minute
## [1] 29

3b. Explain your reasoning for choosing your function’s name

I named the function get_median using snake_case this time to represent the goal of the function: getting the median of each column from the dataset.