ggplot

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Loading in library

You can also embed plots, for example:

library(tidyverse)
library(gridExtra)
library(MASS)
data("mtcars")
college = read_csv("/Users/farre/Documents/data_set/college_history.csv")
states = read_csv("/Users/farre/Documents/data_set/introductory_state_example.csv", col_names = FALSE)
library(corrplot)
library(nycflights13)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

simple scatterplot

ggplot(data = mpg)+
 geom_point(mapping = aes(x = displ, y = hwy))

Scatterplot with changing transparency

ggplot(data = mpg)+
  geom_point(mapping = aes(x=displ, y = hwy, alpha = cyl))

Scatterplot with data as shapes

ggplot(data = mpg)+
  geom_point(mapping = aes(x=displ, y = hwy, shape = as.factor(cyl)))

Scatter plot with more labels

ggplot(data = mpg)+
  geom_point(mapping = aes(x=displ, y = hwy, color = cyl))+
  labs(x = "Engine size[liters]",y = "Mileage[mpg]", title = "Efficeny", caption = "(Based on data from mpg)", tag = "a)")

Subplots based on class

ggplot(data = mpg)+
  geom_point(mapping = aes(x=displ, y = hwy))+
  facet_wrap(~ class, nrow = 2)

Subplot with rows(Class), Columns(Cyl)

ggplot(data = mpg)+
  geom_point(mapping = aes(x=displ, y = hwy))+
  facet_grid(class ~ cyl)

Code to insert line of best fit

ggplot(data = mpg)+
  geom_smooth(mapping = aes(x=displ, y = hwy))

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Creates lines of best fit for each drive type of car

Works well with categorical variables in data.

ggplot(data = mpg)+
  geom_smooth(mapping = aes(x=displ, y = hwy, linetype = drv))

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Code to map points and a line of best fit for data

ggplot(data = mpg, mapping = aes(x=displ, y = hwy))+
  geom_point()+
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Can change the color of only the points by class of car

ggplot(data = mpg, mapping = aes(x=displ, y = hwy))+
  geom_point(mapping = aes(color = class))+
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

se = false turns off shading for standard error

ggplot(data = mpg, mapping = aes(x=displ, y = hwy))+
  geom_point(mapping = aes(color = class))+
  geom_smooth(data = filter(mpg, class == "suv"), se = FALSE)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Creates a bar chart of one varaible

ggplot(data = mpg)+
  geom_bar(mapping = aes(x = class))

Computes proportions of data

ggplot(data = mpg)+
  geom_bar(mapping = aes(x = class,y = stat(prop), group = 1))

Plots a summary of each class(Median)

ggplot(data = mpg)+
  stat_summary(mapping = aes(x = class,y = hwy),fun.ymin = min, fun.ymax = max, fun.y = median)

## Warning: `fun.y` is deprecated. Use `fun` instead.

## Warning: `fun.ymin` is deprecated. Use `fun.min` instead.

## Warning: `fun.ymax` is deprecated. Use `fun.max` instead.

Creates a bar chart with color coded boundaries

ggplot(data = mpg)+
  geom_bar(mapping = aes(x = class, color = class))

Creates a stacked bar chart comparing class and cylinders

ggplot(data = mpg)+
  geom_bar(mapping = aes(x = class, fill = as.factor(cyl)))

Creates bar chart side by side

ggplot(data = mpg)+
  geom_bar(mapping = aes(x = class, fill = as.factor(cyl)),position = "dodge")

Creates histogram charts and can put them side by side

plot1 = ggplot(data = mpg)+
  geom_histogram(binwidth = .5, mapping = aes(x = hwy))+
  labs(title = "binwidth = .5")

plot2 = ggplot(data = mpg)+
  geom_histogram(binwidth = 1, mapping = aes(x = hwy))+
  labs(title = "binwidth = 1")

plot3 = ggplot(data = mpg)+
  geom_histogram(binwidth = 2, mapping = aes(x = hwy))+
  labs(title = "binwidth = 2")

plot4 = ggplot(data = mpg)+
  geom_histogram(binwidth = 3, mapping = aes(x = hwy))+
  labs(title = "binwidth = 3")

grid.arrange(plot1, plot2, plot3, plot4, ncol = 4)

Creates a density plot

ggplot(data = mpg) +
  geom_density(mapping = aes(x = hwy))

Creates a jitter plot

This type of plot helps see where the overlap is occuring.

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")

Creates a flipped boxplot

ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
  geom_boxplot()+
  coord_flip()

Flipping a histogram, changing the legend name and stacking it

ggplot(data = birthwt, mapping = aes(x = lwt, fill = as.factor(race))) +
  geom_histogram(binwidth = 10, alpha = .5)+
  scale_fill_discrete(name = "Race")+
  coord_flip()

Using facet wrap to create two histograms based on smokers side by side

ggplot(data = birthwt, mapping = aes(x = lwt, fill = as.factor(smoke)))+
  geom_histogram(binwidth = 10)+
  scale_fill_discrete(name = "Smoker")+
  facet_wrap(~ smoke, nrow = 1)

This function allows you to view the first 6 observations

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Stores the rownames as a varaible

carnames = rownames(mtcars)
print(carnames)

##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Valiant"            
##  [7] "Duster 360"          "Merc 240D"           "Merc 230"           
## [10] "Merc 280"            "Merc 280C"           "Merc 450SE"         
## [13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
## [16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"           
## [19] "Honda Civic"         "Toyota Corolla"      "Toyota Corona"      
## [22] "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"         
## [25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"      
## [28] "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
## [31] "Maserati Bora"       "Volvo 142E"

Creates a tibble of the data

does not convert strings to factors tibble column names are more flexible

mtcars =as_tibble(mtcars)
print(mtcars)

## # A tibble: 32 x 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
##  2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
##  3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
##  4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
##  5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
##  6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
##  7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
##  8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2
##  9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
## 10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
## # ... with 22 more rows

Creates a tibble of diffrent currencys

`` function creates different column headers

tibble(`$$` = c("USD","AUD","PES","Yen"), `1` = 20)

## # A tibble: 4 x 2
##   `$$`    `1`
##   <chr> <dbl>
## 1 USD      20
## 2 AUD      20
## 3 PES      20
## 4 Yen      20

Converts data to a tibble and shows less observations upfront

tib_iris = as_tibble(iris)
print(tib_iris)

## # A tibble: 150 x 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ... with 140 more rows

Filters the data by carb, and gears

assigns the data the a variable and prints the data from the set that matches that criteria. Must include a == to create a testing criteria.

car_reduced = filter(mtcars, carb ==1, gear == 4)
print(car_reduced)

## # A tibble: 4 x 11
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
## 2  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
## 3  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
## 4  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1

Filters cars with either 6 cylinders or 5 gears

filter(mtcars, cyl==6 | gear==5)

## # A tibble: 11 x 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
##  2  21       6 160     110  3.9   2.88  17.0     0     1     4     4
##  3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
##  4  18.1     6 225     105  2.76  3.46  20.2     1     0     3     1
##  5  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
##  6  17.8     6 168.    123  3.92  3.44  18.9     1     0     4     4
##  7  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
##  8  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2
##  9  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
## 10  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
## 11  15       8 301     335  3.54  3.57  14.6     0     1     5     8

Filters cars with 6 cylinders and 5 gears

filter(mtcars, cyl==6 & gear==5)

## # A tibble: 1 x 11
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  19.7     6   145   175  3.62  2.77  15.5     0     1     5     6

Filters cars with mileage ebtween 20 and 25 mpg

mtcars[between(mtcars$mpg, 20, 25),]

## # A tibble: 8 x 11
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
## 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
## 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
## 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
## 5  24.4     4  147.    62  3.69  3.19  20       1     0     4     2
## 6  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
## 7  21.5     4  120.    97  3.7   2.46  20.0     1     0     3     1
## 8  21.4     4  121    109  4.11  2.78  18.6     1     1     4     2

Sorts cars by cylinder, then gears, then carburetor

arrange(mtcars, cyl, gear, carb)

## # A tibble: 32 x 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  21.5     4 120.     97  3.7   2.46  20.0     1     0     3     1
##  2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
##  3  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
##  4  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
##  5  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
##  6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
##  7  22.8     4 141.     95  3.92  3.15  22.9     1     0     4     2
##  8  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
##  9  21.4     4 121     109  4.11  2.78  18.6     1     1     4     2
## 10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
## # ... with 22 more rows

Sorts by above, but gears are in descending order

arrange(mtcars, cyl, desc(gear), carb)

## # A tibble: 32 x 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
##  2  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2
##  3  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
##  4  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
##  5  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
##  6  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
##  7  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
##  8  22.8     4 141.     95  3.92  3.15  22.9     1     0     4     2
##  9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
## 10  21.4     4 121     109  4.11  2.78  18.6     1     1     4     2
## # ... with 22 more rows

Sorts through the data of secular colleges and finds which data has a name

The !is.na will display the data that has a value in the cell rather than NA.

secular_cols = filter(college, sponsorship == "Secular")
print(secular_cols)

## # A tibble: 12 x 6
##    college                  original_name  city    state established sponsorship
##    <chr>                    <chr>          <chr>   <chr>       <dbl> <chr>      
##  1 Georgia, Univ. of        <NA>           Athens  GA           1785 Secular    
##  2 North Carolina, Univ. of <NA>           Chapel~ NC           1789 Secular    
##  3 Tennessee, Univ. of      Blount College Knoxvi~ TN           1794 Secular    
##  4 U.S. Military Academy    <NA>           West P~ NY           1802 Secular    
##  5 Ohio Univ.               <NA>           Athens  OH           1804 Secular    
##  6 Miami Univ.              <NA>           Oxford  OH           1809 Secular    
##  7 Maryland, Univ. of       <NA>           Baltim~ MD           1812 Secular    
##  8 Missouri, Univ. of       <NA>           Columb~ MO           1839 Secular    
##  9 Mississipps, Univ. of    <NA>           Oxford  MI           1844 Secular    
## 10 Louisiana, Univ. of      <NA>           New Or~ LA           1845 Secular    
## 11 U.S. Naval Academy       <NA>           Annapo~ MD           1845 Secular    
## 12 Wisconsin, Univ. of      <NA>           Madison WI           1848 Secular

Secular_cols_srtd = arrange(filter(college, sponsorship == "Secular"), established)
print(Secular_cols_srtd)

## # A tibble: 12 x 6
##    college                  original_name  city    state established sponsorship
##    <chr>                    <chr>          <chr>   <chr>       <dbl> <chr>      
##  1 Georgia, Univ. of        <NA>           Athens  GA           1785 Secular    
##  2 North Carolina, Univ. of <NA>           Chapel~ NC           1789 Secular    
##  3 Tennessee, Univ. of      Blount College Knoxvi~ TN           1794 Secular    
##  4 U.S. Military Academy    <NA>           West P~ NY           1802 Secular    
##  5 Ohio Univ.               <NA>           Athens  OH           1804 Secular    
##  6 Miami Univ.              <NA>           Oxford  OH           1809 Secular    
##  7 Maryland, Univ. of       <NA>           Baltim~ MD           1812 Secular    
##  8 Missouri, Univ. of       <NA>           Columb~ MO           1839 Secular    
##  9 Mississipps, Univ. of    <NA>           Oxford  MI           1844 Secular    
## 10 Louisiana, Univ. of      <NA>           New Or~ LA           1845 Secular    
## 11 U.S. Naval Academy       <NA>           Annapo~ MD           1845 Secular    
## 12 Wisconsin, Univ. of      <NA>           Madison WI           1848 Secular

original_name = Secular_cols_srtd$original_name[!is.na(Secular_cols_srtd$original_name)]
print(original_name)

## [1] "Blount College"

Creates a new variable and puts it at the end of the object

mutate(mtcars, MileagePerCylinder = mpg/cyl)

## # A tibble: 32 x 12
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
##  2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
##  3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
##  4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
##  5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
##  6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
##  7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
##  8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2
##  9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
## 10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
## # ... with 22 more rows, and 1 more variable: MileagePerCylinder <dbl>

The lag and lead function

The lag function skips the first variable and the lead function skips the last variable

a = c(1:10)
lag(a)

##  [1] NA  1  2  3  4  5  6  7  8  9

lead(a)

##  [1]  2  3  4  5  6  7  8  9 10 NA

Computes a correlation matrix of mpg, disp, hp, drat, wt, qsec

mtcars %>%
  dplyr:: select(mpg, disp, hp, drat, wt, qsec)

## # A tibble: 32 x 6
##      mpg  disp    hp  drat    wt  qsec
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  21    160    110  3.9   2.62  16.5
##  2  21    160    110  3.9   2.88  17.0
##  3  22.8  108     93  3.85  2.32  18.6
##  4  21.4  258    110  3.08  3.22  19.4
##  5  18.7  360    175  3.15  3.44  17.0
##  6  18.1  225    105  2.76  3.46  20.2
##  7  14.3  360    245  3.21  3.57  15.8
##  8  24.4  147.    62  3.69  3.19  20  
##  9  22.8  141.    95  3.92  3.15  22.9
## 10  19.2  168.   123  3.92  3.44  18.3
## # ... with 22 more rows

my_data = mtcars %>%
  dplyr:: select(mpg, disp, hp, drat, wt, qsec) 
rc = cor(my_data)
corrplot(rc)

Makes an efficient variable if cars mpg is greater than 23

Then goes on to compute the mean of the efficent cars

eff_mtcars = mutate(mtcars, efficient = if_else(mpg > 23, 1, 0))
eff_cars = filter(eff_mtcars, efficient == 1)
mean(eff_cars$mpg)

## [1] 29.25714

Groups the data by cyl,gear, and carb

Then the function will compute the average mpg, min mpg, and max mpg.

grp_data = group_by(mtcars, cyl, gear, carb)
summarise(grp_data, avempg=mean(mpg), minmpg = min(mpg), maxmpg = max(mpg))

## `summarise()` has grouped output by 'cyl', 'gear'. You can override using the `.groups` argument.

## # A tibble: 12 x 6
## # Groups:   cyl, gear [8]
##      cyl  gear  carb avempg minmpg maxmpg
##    <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>
##  1     4     3     1   21.5   21.5   21.5
##  2     4     4     1   29.1   22.8   33.9
##  3     4     4     2   24.8   21.4   30.4
##  4     4     5     2   28.2   26     30.4
##  5     6     3     1   19.8   18.1   21.4
##  6     6     4     4   19.8   17.8   21  
##  7     6     5     6   19.7   19.7   19.7
##  8     8     3     2   17.2   15.2   19.2
##  9     8     3     3   16.3   15.2   17.3
## 10     8     3     4   12.6   10.4   14.7
## 11     8     5     4   15.8   15.8   15.8
## 12     8     5     8   15     15     15

Takes random sample of 5 observations

sample_n(mtcars, 5)

## # A tibble: 5 x 11
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3
## 2  14.7     8  440    230  3.23  5.34  17.4     0     0     3     4
## 3  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
## 4  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
## 5  26       4  120.    91  4.43  2.14  16.7     0     1     5     2

Takes random sample of 50 percent of the data

sample_frac(mtcars, .5)

## # A tibble: 16 x 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
##  2  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
##  3  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
##  4  21.5     4 120.     97  3.7   2.46  20.0     1     0     3     1
##  5  21.4     4 121     109  4.11  2.78  18.6     1     1     4     2
##  6  14.7     8 440     230  3.23  5.34  17.4     0     0     3     4
##  7  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
##  8  18.1     6 225     105  2.76  3.46  20.2     1     0     3     1
##  9  15.2     8 276.    180  3.07  3.78  18       0     0     3     3
## 10  15.5     8 318     150  2.76  3.52  16.9     0     0     3     2
## 11  10.4     8 472     205  2.93  5.25  18.0     0     0     3     4
## 12  15.2     8 304     150  3.15  3.44  17.3     0     0     3     2
## 13  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
## 14  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
## 15  17.8     6 168.    123  3.92  3.44  18.9     1     0     4     4
## 16  10.4     8 460     215  3     5.42  17.8     0     0     3     4

Filters all flights that occured on Jan.1st

jan1flights = filter(flights, month == 1, day ==1)
print(jan1flights)

## # A tibble: 842 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ... with 832 more rows, and 11 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>