Load necessary libraries and datasets
library(jsonlite)
library(dplyr)
library(tidyverse)
data(cars)
Question 2
Number of observations of OTP.csv
CSV_File <- read_csv("~/Desktop/On_Time_Performance.csv")
## New names:
## Rows: 570131 Columns: 110
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (28): UniqueCarrier, Carrier, TailNum, Origin, OriginCityName, OriginSt... dbl
## (54): Year, Quarter, Month, DayofMonth, DayOfWeek, AirlineID, FlightNum... lgl
## (27): Div2WheelsOff, Div2TailNum, Div3Airport, Div3AirportID, Div3Airpo... date
## (1): FlightDate
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...110`
dim(CSV_File)
## [1] 570131 110
Question 3
Number of missing values within the column Div2WheelsOff
sum(is.na(CSV_File$Div2WheelsOff))
## [1] 570131
Question 4
The average departure delay by carrier
average_delay_time <- CSV_File %>%
group_by(Carrier) %>%
summarize(mean_dep_delay = mean(DepDelay, na.rm = TRUE), n = n())
print(average_delay_time)
## # A tibble: 18 × 3
## Carrier mean_dep_delay n
## <chr> <dbl> <int>
## 1 9E 12.4 18234
## 2 AA 6.93 73598
## 3 AS -2.25 15312
## 4 B6 20.4 24871
## 5 DL 9.74 71254
## 6 EV 13.6 20166
## 7 F9 16.0 9707
## 8 G4 10.4 6814
## 9 HA 1.72 6627
## 10 MQ 8.82 22502
## 11 NK 5.61 14180
## 12 OH 13.8 22210
## 13 OO 15.1 62207
## 14 UA 5.87 45384
## 15 VX 2.83 5824
## 16 WN 8.03 109676
## 17 YV 8.86 16353
## 18 YX 7.26 25212
The carrier with the largest departure delay
largest_delay <- average_delay_time %>%
arrange(desc(mean_dep_delay)) %>%
slice(1)
print(largest_delay)
## # A tibble: 1 × 3
## Carrier mean_dep_delay n
## <chr> <dbl> <int>
## 1 B6 20.4 24871
Question 5
The maximum daily close price for BTC in the dataset
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
btc_data <- fromJSON(url)
btc_data_price <- btc_data$Data$Data
max_close_price <- max(btc_data_price$close, na.rm = TRUE)
print(max_close_price)
## [1] 106155.6