Load necessary libraries and datasets

library(jsonlite)
library(dplyr)
library(tidyverse)
data(cars)

Question 1

Finding the median of 1st column

median(cars$speed)
## [1] 15

Question 2

Number of observations of OTP.csv

CSV_File <- read_csv("~/Desktop/On_Time_Performance.csv")
## New names:
## Rows: 570131 Columns: 110
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (28): UniqueCarrier, Carrier, TailNum, Origin, OriginCityName, OriginSt... dbl
## (54): Year, Quarter, Month, DayofMonth, DayOfWeek, AirlineID, FlightNum... lgl
## (27): Div2WheelsOff, Div2TailNum, Div3Airport, Div3AirportID, Div3Airpo... date
## (1): FlightDate
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...110`
dim(CSV_File)
## [1] 570131    110

Question 3

Number of missing values within the column Div2WheelsOff

sum(is.na(CSV_File$Div2WheelsOff))
## [1] 570131

Question 4

The average departure delay by carrier

average_delay_time <- CSV_File %>%
  group_by(Carrier) %>%
  summarize(mean_dep_delay = mean(DepDelay, na.rm = TRUE), n = n())
print(average_delay_time)
## # A tibble: 18 × 3
##    Carrier mean_dep_delay      n
##    <chr>            <dbl>  <int>
##  1 9E               12.4   18234
##  2 AA                6.93  73598
##  3 AS               -2.25  15312
##  4 B6               20.4   24871
##  5 DL                9.74  71254
##  6 EV               13.6   20166
##  7 F9               16.0    9707
##  8 G4               10.4    6814
##  9 HA                1.72   6627
## 10 MQ                8.82  22502
## 11 NK                5.61  14180
## 12 OH               13.8   22210
## 13 OO               15.1   62207
## 14 UA                5.87  45384
## 15 VX                2.83   5824
## 16 WN                8.03 109676
## 17 YV                8.86  16353
## 18 YX                7.26  25212

The carrier with the largest departure delay

largest_delay <- average_delay_time %>%
  arrange(desc(mean_dep_delay)) %>%  
  slice(1)
print(largest_delay)
## # A tibble: 1 × 3
##   Carrier mean_dep_delay     n
##   <chr>            <dbl> <int>
## 1 B6                20.4 24871

Question 5

The maximum daily close price for BTC in the dataset

url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
btc_data <- fromJSON(url)
btc_data_price <- btc_data$Data$Data
max_close_price <- max(btc_data_price$close, na.rm = TRUE)
print(max_close_price)
## [1] 106155.6