NYC Flights Homework
Load the libraries and view the “flights” dataset
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(nycflights13)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
view(flights)
head(flights)
## # A tibble: 6 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## # … with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
library(dplyr)
library(ggplot2)
flights$carrier<-factor(flights$carrier, levels=c("AA","B6","DL","US","WN"))
Practice Plot
flights %>%
ggplot(aes(x=air_time, fill=carrier)) +
geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "black")+
scale_fill_discrete(name = "Airline", labels = c("AA","B6","DL","US","WN"))
## Warning: Removed 9430 rows containing non-finite values (stat_bin).

I tried to plot a bar graph with the distance by air line carrier but the numbers was too large so I needed to find the average.
p <-ggplot(data=flights, aes(x=carrier, y=distance)) +
geom_bar(stat="identity")+
theme_minimal()
p

Bar Plot of Average Distance Airplane Carriers Travel
Plot1 <- flights %>%
group_by(distance,carrier) %>%
summarize(mean_distance = mean(distance, na.rm = TRUE)) %>%
ggplot(aes(x = carrier, y = mean_distance, fill =carrier)) +
geom_bar(stat="identity")+
ggtitle("Average Distance by Airplane Carrier")+
theme (plot.title = element_text(hjust = .01, size=15)) +
theme(legend.justification = -20,
legend.position="bottom",
legend.text = element_text(size=6) ,
) +
xlab("Airplane Carrier") +
ylab("Average Distance")
## `summarise()` regrouping output by 'distance' (override with `.groups` argument)
Plot1

Bar Plot of Average Airtime by Airplane Carriers.
Plot2 <- flights %>%
group_by(air_time,carrier) %>%
summarize(mean_air_time = mean(air_time, na.rm = TRUE)) %>%
ggplot(aes(x = carrier, y = mean_air_time, fill =carrier)) +
geom_bar(stat="identity")+
ggtitle("Average Air Time by Airline Carrier")+
theme (plot.title = element_text(hjust = .01, size=15)) +
theme(legend.justification = -20,
legend.position="bottom",
legend.text = element_text(size=6) ,
) +
xlab("AirPlane Carriers") +
ylab("Average Air Time")
## `summarise()` regrouping output by 'air_time' (override with `.groups` argument)
Plot2
## Warning: Removed 6 rows containing missing values (position_stack).

Histogram
p2 <- qplot(data = flights,distance,fill = carrier,geom = "histogram", bins = 20)
p2
