NYC Flights Homework

Load the libraries and view the “flights” dataset

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nycflights13)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
view(flights)
head(flights)
## # A tibble: 6 x 19
##    year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##   <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
## 1  2013     1     1      517            515         2      830            819
## 2  2013     1     1      533            529         4      850            830
## 3  2013     1     1      542            540         2      923            850
## 4  2013     1     1      544            545        -1     1004           1022
## 5  2013     1     1      554            600        -6      812            837
## 6  2013     1     1      554            558        -4      740            728
## # … with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
library(dplyr)
library(ggplot2)
flights$carrier<-factor(flights$carrier, levels=c("AA","B6","DL","US","WN"))

Practice Plot

flights %>%
  ggplot(aes(x=air_time, fill=carrier)) +
  geom_histogram(position="identity", alpha=0.5, binwidth = 5, color = "black")+
  scale_fill_discrete(name = "Airline", labels = c("AA","B6","DL","US","WN"))
## Warning: Removed 9430 rows containing non-finite values (stat_bin).

I tried to plot a bar graph with the distance by air line carrier but the numbers was too large so I needed to find the average.

p <-ggplot(data=flights, aes(x=carrier, y=distance)) +
  geom_bar(stat="identity")+
   theme_minimal()
p

Bar Plot of Average Distance Airplane Carriers Travel

Plot1 <- flights %>% 
  
    group_by(distance,carrier) %>%   
    summarize(mean_distance = mean(distance, na.rm = TRUE)) %>% 
    ggplot(aes(x = carrier, y = mean_distance, fill =carrier)) +
    geom_bar(stat="identity")+
    ggtitle("Average Distance by Airplane Carrier")+
  theme (plot.title = element_text(hjust = .01, size=15)) +
  theme(legend.justification = -20, 
        legend.position="bottom", 
        legend.text = element_text(size=6) ,
        )  +
xlab("Airplane Carrier") +
ylab("Average Distance") 
## `summarise()` regrouping output by 'distance' (override with `.groups` argument)
Plot1

Bar Plot of Average Airtime by Airplane Carriers.

Plot2 <- flights %>% 
  
    group_by(air_time,carrier) %>%   
    summarize(mean_air_time = mean(air_time, na.rm = TRUE)) %>% 
    ggplot(aes(x = carrier, y = mean_air_time, fill =carrier)) +
    geom_bar(stat="identity")+
    ggtitle("Average Air Time by Airline Carrier")+
  theme (plot.title = element_text(hjust = .01, size=15)) +
  theme(legend.justification = -20, 
        legend.position="bottom", 
        legend.text = element_text(size=6) ,
        )  +
xlab("AirPlane Carriers") +
ylab("Average Air Time") 
## `summarise()` regrouping output by 'air_time' (override with `.groups` argument)
Plot2
## Warning: Removed 6 rows containing missing values (position_stack).

Histogram

p2 <- qplot(data = flights,distance,fill = carrier,geom = "histogram", bins = 20)
p2