Code:
datas <- mutate(flights, speed = distance/air_time*60)
data_arr <- arrange(datas, desc(speed), dest)
Lab1_data <- select(data_arr, speed, dest, everything())
Answer: The destination is Hartsfield-Jackson
Atlanta International Airport (ATL).
Code:
jan1 <- filter(flights, month == 1, day == 1)
flights_2 <- mutate(jan1, avg_dep = sum(dep_time)/842)
Answer:
Code:
groups_carat <- mutate(diamonds, carat_group = case_when(
carat <= 1 ~ "<1 carat",
carat > 1 & carat <= 2 ~ "1-2 carat",
carat > 2 & carat <= 3 ~ "2-3 carat",
carat > 3 ~ ">3 carat"))
Code:
NYC_air_jfk <- filter(flights, origin == "JFK")
JFK_delay <- summarize(NYC_air_jfk, delay = mean(dep_delay, na.rm = T))
NYC_air_lga <- filter(flights, origin == "LGA")
LGA_delay <- summarise(NYC_air_lga, delay = mean(dep_delay, na.rm = T))
NYC_air_ewr <- filter(flights, origin == "EWR")
EWR_delay <- summarise(NYC_air_ewr, delay = mean(dep_delay, na.rm = T))
Answer: The average delay from JFK is 12.1, LGA is
10.3, and EWR is 15.1. Therefore, EWR had the worst delay according to
the data.
Code:
JFK_Christ_eve <- filter(NYC_air_jfk, month == 12, day == 24)
JFK1224 <- summarize(JFK_Christ_eve, delay = mean(dep_delay, na.rm = T))
LGA_Christ_eve <- filter(NYC_air_lga, month == 12, day == 24)
LGA1224 <- summarize(LGA_Christ_eve, delay = mean(dep_delay, na.rm = T))
EWR_Christ_eve <- filter(NYC_air_ewr, month == 12, day == 24)
EWR1224 <- summarize(EWR_Christ_eve, delay = mean(dep_delay, na.rm = T))
Answer: On December 24, 2013, the average delay of
JFK is 6.09 hours, LGA is 4.54 hours, and EWR is 9.45 hours. Therefore,
EWR had the worst average delay among the NYC airports on December 24,
2013.
Code:
# Creating a vector
hehe <- c(1:20)
# Calling mad() function
mad(hehe)
## [1] 7.413
# Then your results will be equal to median(|xi – x)|, which is the MAD.
# Or you can calculate MAD for a single column in a data set
library(nycflights13)
# Calculate the mad for single columns
mad(flights$arr_time)
## [1] NA
Code:
not_canceled <- filter(flights, !is.na(dep_delay), !is.na(arr_delay))
my_data <- group_by(not_canceled, origin)
my_sum <- summarise(my_data, destss = n_distinct(dest))
arrange(my_sum, desc(destss))
## # A tibble: 3 × 2
## origin destss
## <chr> <int>
## 1 EWR 85
## 2 JFK 70
## 3 LGA 68
Answer: EWR serves 85 destinations, the highest
number among the three origins, surpassing JFK and LGA.
Orginal Code:
my_data <- filter(diamonds, carat < 2)
my_data <- group_by(my_data, color)
my_summary <- summarise(my_data, mean_carat = mean(carat), mean_price = mean(price))
my_summary <- mutate(my_summary, mean_unit_price = mean_price/mean_carat)
print(my_summary)
Pipe Code:
my_data <- diamonds %>%
filter(carat < 2) %>%
group_by(color) %>%
summarise(mean_carat = mean(carat), mean_price = mean(price)) %>%
mutate(mean_unit_price = mean_price/mean_carat) %>%
print()
## # A tibble: 7 × 4
## color mean_carat mean_price mean_unit_price
## <ord> <dbl> <dbl> <dbl>
## 1 D 0.646 3063. 4743.
## 2 E 0.643 2944. 4577.
## 3 F 0.716 3547. 4952.
## 4 G 0.737 3720. 5047.
## 5 H 0.828 3765. 4548.
## 6 I 0.884 3814. 4314.
## 7 J 0.980 3838. 3917.
preg_gest <- babies %>%
mutate(preg_label = fct_recode(as.factor(parity), "first-pregnancy" = "0", "nonfirst-pregnancy" = "1")) %>%
group_by(preg_label) %>%
summarise(count = n(), median_gest = median(gestation, na.rm = T), mean_gest = mean(gestation, na.rm = T)) %>%
print()
## # A tibble: 2 × 4
## preg_label count median_gest mean_gest
## <fct> <int> <dbl> <dbl>
## 1 first-pregnancy 921 279 279.
## 2 nonfirst-pregnancy 315 282 281.
Answer: According to the data, the median and mean
values for first-time and non-first-time pregnancies do not show a
significant difference. Therefore, we cannot determine if there is a
correlation.