1.nycflights13 라이브러리에 담겨 있는 nycflights 데이터 불러와서 구조확인

library(nycflights13)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
c(str(flights), dim(flights))
## Classes 'tbl_df', 'tbl' and 'data.frame':    336776 obs. of  19 variables:
##  $ year          : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ month         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ day           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ dep_time      : int  517 533 542 544 554 554 555 557 557 558 ...
##  $ sched_dep_time: int  515 529 540 545 600 558 600 600 600 600 ...
##  $ dep_delay     : num  2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
##  $ arr_time      : int  830 850 923 1004 812 740 913 709 838 753 ...
##  $ sched_arr_time: int  819 830 850 1022 837 728 854 723 846 745 ...
##  $ arr_delay     : num  11 20 33 -18 -25 12 19 -14 -8 8 ...
##  $ carrier       : chr  "UA" "UA" "AA" "B6" ...
##  $ flight        : int  1545 1714 1141 725 461 1696 507 5708 79 301 ...
##  $ tailnum       : chr  "N14228" "N24211" "N619AA" "N804JB" ...
##  $ origin        : chr  "EWR" "LGA" "JFK" "JFK" ...
##  $ dest          : chr  "IAH" "IAH" "MIA" "BQN" ...
##  $ air_time      : num  227 227 160 183 116 150 158 53 140 138 ...
##  $ distance      : num  1400 1416 1089 1576 762 ...
##  $ hour          : num  5 5 5 5 6 5 6 6 6 6 ...
##  $ minute        : num  15 29 40 45 0 58 0 0 0 0 ...
##  $ time_hour     : POSIXct, format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
## [1] 336776     19

2. year, month, day 컬럼 선택하기

select(flights, year, month, day)
## # A tibble: 336,776 x 3
##     year month   day
##    <int> <int> <int>
##  1  2013     1     1
##  2  2013     1     1
##  3  2013     1     1
##  4  2013     1     1
##  5  2013     1     1
##  6  2013     1     1
##  7  2013     1     1
##  8  2013     1     1
##  9  2013     1     1
## 10  2013     1     1
## # ... with 336,766 more rows

3. 년도와 일 컬럼선택하기(inclusive)

select(flights, year:day)
## # A tibble: 336,776 x 3
##     year month   day
##    <int> <int> <int>
##  1  2013     1     1
##  2  2013     1     1
##  3  2013     1     1
##  4  2013     1     1
##  5  2013     1     1
##  6  2013     1     1
##  7  2013     1     1
##  8  2013     1     1
##  9  2013     1     1
## 10  2013     1     1
## # ... with 336,766 more rows

4.년도와 일 컬럼 제외하고 선택하기(exclusive)

select(flights, -(year:day))
## # A tibble: 336,776 x 16
##    dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay
##       <int>          <int>     <dbl>    <int>          <int>     <dbl>
##  1      517            515      2.00      830            819     11.0 
##  2      533            529      4.00      850            830     20.0 
##  3      542            540      2.00      923            850     33.0 
##  4      544            545     -1.00     1004           1022    -18.0 
##  5      554            600     -6.00      812            837    -25.0 
##  6      554            558     -4.00      740            728     12.0 
##  7      555            600     -5.00      913            854     19.0 
##  8      557            600     -3.00      709            723    -14.0 
##  9      557            600     -3.00      838            846    - 8.00
## 10      558            600     -2.00      753            745      8.00
## # ... with 336,766 more rows, and 10 more variables: carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

5.select 함수와 everything() 함수를 결합하면 원하는 컬럼을 맨앞으로 두고 정렬 가능

select(flights, time_hour, air_time, everything())
## # A tibble: 336,776 x 19
##    time_hour           air_time  year month   day dep_time sched_dep_time
##    <dttm>                 <dbl> <int> <int> <int>    <int>          <int>
##  1 2013-01-01 05:00:00    227    2013     1     1      517            515
##  2 2013-01-01 05:00:00    227    2013     1     1      533            529
##  3 2013-01-01 05:00:00    160    2013     1     1      542            540
##  4 2013-01-01 05:00:00    183    2013     1     1      544            545
##  5 2013-01-01 06:00:00    116    2013     1     1      554            600
##  6 2013-01-01 05:00:00    150    2013     1     1      554            558
##  7 2013-01-01 06:00:00    158    2013     1     1      555            600
##  8 2013-01-01 06:00:00     53.0  2013     1     1      557            600
##  9 2013-01-01 06:00:00    140    2013     1     1      557            600
## 10 2013-01-01 06:00:00    138    2013     1     1      558            600
## # ... with 336,766 more rows, and 12 more variables: dep_delay <dbl>,
## #   arr_time <int>, sched_arr_time <int>, arr_delay <dbl>, carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, distance <dbl>,
## #   hour <dbl>, minute <dbl>

6.select 함수와 contain 함수를 같이 사용하여 특정 값이 포함된 컬럼 선택

select(flights, contains("TIME"))
## # A tibble: 336,776 x 6
##    dep_time sched_dep_time arr_time sched_arr_time air_time
##       <int>          <int>    <int>          <int>    <dbl>
##  1      517            515      830            819    227  
##  2      533            529      850            830    227  
##  3      542            540      923            850    160  
##  4      544            545     1004           1022    183  
##  5      554            600      812            837    116  
##  6      554            558      740            728    150  
##  7      555            600      913            854    158  
##  8      557            600      709            723     53.0
##  9      557            600      838            846    140  
## 10      558            600      753            745    138  
## # ... with 336,766 more rows, and 1 more variable: time_hour <dttm>

ref: R for data Scientist pp.51~54