R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

install.packages("nycflights13")
## Installing package into '/home/testRstudioagain/R/x86_64-pc-linux-gnu-library/4.1'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/home/testRstudioagain/R/x86_64-pc-linux-gnu-library/4.1'
## (as 'lib' is unspecified)
rm(list=ls())
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nycflights13)
library(dplyr)
data(flights)
data(airlines)
data(airports)
data(weather)
force(flights)
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # … with 336,766 more rows, and 11 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
force(airlines)
## # A tibble: 16 × 2
##    carrier name                       
##    <chr>   <chr>                      
##  1 9E      Endeavor Air Inc.          
##  2 AA      American Airlines Inc.     
##  3 AS      Alaska Airlines Inc.       
##  4 B6      JetBlue Airways            
##  5 DL      Delta Air Lines Inc.       
##  6 EV      ExpressJet Airlines Inc.   
##  7 F9      Frontier Airlines Inc.     
##  8 FL      AirTran Airways Corporation
##  9 HA      Hawaiian Airlines Inc.     
## 10 MQ      Envoy Air                  
## 11 OO      SkyWest Airlines Inc.      
## 12 UA      United Air Lines Inc.      
## 13 US      US Airways Inc.            
## 14 VX      Virgin America             
## 15 WN      Southwest Airlines Co.     
## 16 YV      Mesa Airlines Inc.
force(airports)
## # A tibble: 1,458 × 8
##    faa   name                             lat    lon   alt    tz dst   tzone    
##    <chr> <chr>                          <dbl>  <dbl> <dbl> <dbl> <chr> <chr>    
##  1 04G   Lansdowne Airport               41.1  -80.6  1044    -5 A     America/…
##  2 06A   Moton Field Municipal Airport   32.5  -85.7   264    -6 A     America/…
##  3 06C   Schaumburg Regional             42.0  -88.1   801    -6 A     America/…
##  4 06N   Randall Airport                 41.4  -74.4   523    -5 A     America/…
##  5 09J   Jekyll Island Airport           31.1  -81.4    11    -5 A     America/…
##  6 0A9   Elizabethton Municipal Airport  36.4  -82.2  1593    -5 A     America/…
##  7 0G6   Williams County Airport         41.5  -84.5   730    -5 A     America/…
##  8 0G7   Finger Lakes Regional Airport   42.9  -76.8   492    -5 A     America/…
##  9 0P2   Shoestring Aviation Airfield    39.8  -76.6  1000    -5 U     America/…
## 10 0S9   Jefferson County Intl           48.1 -123.    108    -8 A     America/…
## # … with 1,448 more rows
force(weather)
## # A tibble: 26,115 × 15
##    origin  year month   day  hour  temp  dewp humid wind_dir wind_speed
##    <chr>  <int> <int> <int> <int> <dbl> <dbl> <dbl>    <dbl>      <dbl>
##  1 EWR     2013     1     1     1  39.0  26.1  59.4      270      10.4 
##  2 EWR     2013     1     1     2  39.0  27.0  61.6      250       8.06
##  3 EWR     2013     1     1     3  39.0  28.0  64.4      240      11.5 
##  4 EWR     2013     1     1     4  39.9  28.0  62.2      250      12.7 
##  5 EWR     2013     1     1     5  39.0  28.0  64.4      260      12.7 
##  6 EWR     2013     1     1     6  37.9  28.0  67.2      240      11.5 
##  7 EWR     2013     1     1     7  39.0  28.0  64.4      240      15.0 
##  8 EWR     2013     1     1     8  39.9  28.0  62.2      250      10.4 
##  9 EWR     2013     1     1     9  39.9  28.0  62.2      260      15.0 
## 10 EWR     2013     1     1    10  41    28.0  59.6      260      13.8 
## # … with 26,105 more rows, and 5 more variables: wind_gust <dbl>, precip <dbl>,
## #   pressure <dbl>, visib <dbl>, time_hour <dttm>
select(flights,year,month,day,arr_delay,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,carrier)
## # A tibble: 336,776 × 10
##     year month   day arr_delay dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>     <dbl>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1        11      517            515         2      830
##  2  2013     1     1        20      533            529         4      850
##  3  2013     1     1        33      542            540         2      923
##  4  2013     1     1       -18      544            545        -1     1004
##  5  2013     1     1       -25      554            600        -6      812
##  6  2013     1     1        12      554            558        -4      740
##  7  2013     1     1        19      555            600        -5      913
##  8  2013     1     1       -14      557            600        -3      709
##  9  2013     1     1        -8      557            600        -3      838
## 10  2013     1     1         8      558            600        -2      753
## # … with 336,766 more rows, and 2 more variables: sched_arr_time <int>,
## #   carrier <chr>
filter(flights, month == 1, day == 1)
## # A tibble: 842 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # … with 832 more rows, and 11 more variables: arr_delay <dbl>, carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, !(arr_delay > 120 ))
## # A tibble: 317,312 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # … with 317,302 more rows, and 11 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
arrange(flights, desc(arr_delay))
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     9      641            900      1301     1242           1530
##  2  2013     6    15     1432           1935      1137     1607           2120
##  3  2013     1    10     1121           1635      1126     1239           1810
##  4  2013     9    20     1139           1845      1014     1457           2210
##  5  2013     7    22      845           1600      1005     1044           1815
##  6  2013     4    10     1100           1900       960     1342           2211
##  7  2013     3    17     2321            810       911      135           1020
##  8  2013     7    22     2257            759       898      121           1026
##  9  2013    12     5      756           1700       896     1058           2020
## 10  2013     5     3     1133           2055       878     1250           2215
## # … with 336,766 more rows, and 11 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
by_day <- group_by(flights, carrier)
arrdelay<-summarise(by_day, delay = mean(arr_delay, na.rm = TRUE))
arrange(arrdelay, desc(delay))
## # A tibble: 16 × 2
##    carrier  delay
##    <chr>    <dbl>
##  1 F9      21.9  
##  2 FL      20.1  
##  3 EV      15.8  
##  4 YV      15.6  
##  5 OO      11.9  
##  6 MQ      10.8  
##  7 WN       9.65 
##  8 B6       9.46 
##  9 9E       7.38 
## 10 UA       3.56 
## 11 US       2.13 
## 12 VX       1.76 
## 13 DL       1.64 
## 14 AA       0.364
## 15 HA      -6.92 
## 16 AS      -9.93
by_hour <- group_by(flights, hour)
arrivaldelay<-summarise(by_hour, delay = mean(arr_delay, na.rm = TRUE))
arrange(arrivaldelay, desc(delay))
## # A tibble: 20 × 2
##     hour   delay
##    <dbl>   <dbl>
##  1    21  18.4  
##  2    20  16.7  
##  3    19  16.7  
##  4    17  16.0  
##  5    22  16.0  
##  6    18  14.8  
##  7    16  12.6  
##  8    15  12.3  
##  9    23  11.8  
## 10    14   9.20 
## 11    13   6.54 
## 12    12   3.49 
## 13    11   1.48 
## 14    10   0.954
## 15     8  -1.11 
## 16     9  -1.45 
## 17     6  -3.38 
## 18     5  -4.80 
## 19     7  -5.30 
## 20     1 NaN
ans6<-head(flights,100)%>%select(year, month,day,hour,origin,dest,tailnum,carrier)%>%left_join(airlines)
## Joining, by = "carrier"
ans6%>%left_join(weather)%>%left_join(airports)
## Joining, by = c("year", "month", "day", "hour", "origin")
## Joining, by = "name"
## # A tibble: 100 × 26
##     year month   day  hour origin dest  tailnum carrier name    temp  dewp humid
##    <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>  <dbl> <dbl> <dbl>
##  1  2013     1     1     5 EWR    IAH   N14228  UA      Unite…  39.0  28.0  64.4
##  2  2013     1     1     5 LGA    IAH   N24211  UA      Unite…  39.9  25.0  54.8
##  3  2013     1     1     5 JFK    MIA   N619AA  AA      Ameri…  39.0  27.0  61.6
##  4  2013     1     1     5 JFK    BQN   N804JB  B6      JetBl…  39.0  27.0  61.6
##  5  2013     1     1     6 LGA    ATL   N668DN  DL      Delta…  39.9  25.0  54.8
##  6  2013     1     1     5 EWR    ORD   N39463  UA      Unite…  39.0  28.0  64.4
##  7  2013     1     1     6 EWR    FLL   N516JB  B6      JetBl…  37.9  28.0  67.2
##  8  2013     1     1     6 LGA    IAD   N829AS  EV      Expre…  39.9  25.0  54.8
##  9  2013     1     1     6 JFK    MCO   N593JB  B6      JetBl…  37.9  27.0  64.3
## 10  2013     1     1     6 LGA    ORD   N3ALAA  AA      Ameri…  39.9  25.0  54.8
## # … with 90 more rows, and 14 more variables: wind_dir <dbl>, wind_speed <dbl>,
## #   wind_gust <dbl>, precip <dbl>, pressure <dbl>, visib <dbl>,
## #   time_hour <dttm>, faa <chr>, lat <dbl>, lon <dbl>, alt <dbl>, tz <dbl>,
## #   dst <chr>, tzone <chr>
subflights<-subset(flights, dest %in% c("ALB", "BDL", "BTV"))
subflights
## # A tibble: 3,471 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      908            910        -2     1020           1027
##  2  2013     1     1     1111           1115        -4     1222           1226
##  3  2013     1     1     1202           1207        -5     1318           1314
##  4  2013     1     1     1315           1317        -2     1413           1423
##  5  2013     1     1     1318           1322        -4     1358           1416
##  6  2013     1     1     1655           1621        34     1804           1724
##  7  2013     1     1     1711           1650        21     1820           1806
##  8  2013     1     1     1842           1422       260     1958           1535
##  9  2013     1     1     2056           2004        52     2156           2112
## 10  2013     1     1     2302           2200        62     2342           2253
## # … with 3,461 more rows, and 11 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
select(subflights,carrier,dest)
## # A tibble: 3,471 × 2
##    carrier dest 
##    <chr>   <chr>
##  1 B6      BTV  
##  2 B6      BTV  
##  3 EV      BTV  
##  4 EV      ALB  
##  5 EV      BDL  
##  6 EV      ALB  
##  7 EV      BTV  
##  8 EV      BTV  
##  9 EV      ALB  
## 10 EV      BDL  
## # … with 3,461 more rows
count(subflights,dest)
## # A tibble: 3 × 2
##   dest      n
##   <chr> <int>
## 1 ALB     439
## 2 BDL     443
## 3 BTV    2589
by_com <- group_by(flights, carrier,month,origin)
summarise(by_com, delay = mean(dep_delay, na.rm = TRUE))
## `summarise()` has grouped output by 'carrier', 'month'. You can override using the `.groups` argument.
## # A tibble: 399 × 4
## # Groups:   carrier, month [185]
##    carrier month origin delay
##    <chr>   <int> <chr>  <dbl>
##  1 9E          1 EWR    12.9 
##  2 9E          1 JFK    17.1 
##  3 9E          1 LGA    17.4 
##  4 9E          2 EWR    -1.18
##  5 9E          2 JFK    18.0 
##  6 9E          2 LGA     6.08
##  7 9E          3 EWR     5.6 
##  8 9E          3 JFK    14.4 
##  9 9E          3 LGA     6.95
## 10 9E          4 EWR     5.87
## # … with 389 more rows
summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.