Import your data
data(flights)
flights %>% skimr::skim()
Data summary
| Name |
Piped data |
| Number of rows |
336776 |
| Number of columns |
19 |
| _______________________ |
|
| Column type frequency: |
|
| character |
4 |
| numeric |
14 |
| POSIXct |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| carrier |
0 |
1.00 |
2 |
2 |
0 |
16 |
0 |
| tailnum |
2512 |
0.99 |
5 |
6 |
0 |
4043 |
0 |
| origin |
0 |
1.00 |
3 |
3 |
0 |
3 |
0 |
| dest |
0 |
1.00 |
3 |
3 |
0 |
105 |
0 |
Variable type: numeric
| year |
0 |
1.00 |
2013.00 |
0.00 |
2013 |
2013 |
2013 |
2013 |
2013 |
▁▁▇▁▁ |
| month |
0 |
1.00 |
6.55 |
3.41 |
1 |
4 |
7 |
10 |
12 |
▇▆▆▆▇ |
| day |
0 |
1.00 |
15.71 |
8.77 |
1 |
8 |
16 |
23 |
31 |
▇▇▇▇▆ |
| dep_time |
8255 |
0.98 |
1349.11 |
488.28 |
1 |
907 |
1401 |
1744 |
2400 |
▁▇▆▇▃ |
| sched_dep_time |
0 |
1.00 |
1344.25 |
467.34 |
106 |
906 |
1359 |
1729 |
2359 |
▁▇▇▇▃ |
| dep_delay |
8255 |
0.98 |
12.64 |
40.21 |
-43 |
-5 |
-2 |
11 |
1301 |
▇▁▁▁▁ |
| arr_time |
8713 |
0.97 |
1502.05 |
533.26 |
1 |
1104 |
1535 |
1940 |
2400 |
▁▃▇▇▇ |
| sched_arr_time |
0 |
1.00 |
1536.38 |
497.46 |
1 |
1124 |
1556 |
1945 |
2359 |
▁▃▇▇▇ |
| arr_delay |
9430 |
0.97 |
6.90 |
44.63 |
-86 |
-17 |
-5 |
14 |
1272 |
▇▁▁▁▁ |
| flight |
0 |
1.00 |
1971.92 |
1632.47 |
1 |
553 |
1496 |
3465 |
8500 |
▇▃▃▁▁ |
| air_time |
9430 |
0.97 |
150.69 |
93.69 |
20 |
82 |
129 |
192 |
695 |
▇▂▂▁▁ |
| distance |
0 |
1.00 |
1039.91 |
733.23 |
17 |
502 |
872 |
1389 |
4983 |
▇▃▂▁▁ |
| hour |
0 |
1.00 |
13.18 |
4.66 |
1 |
9 |
13 |
17 |
23 |
▁▇▇▇▅ |
| minute |
0 |
1.00 |
26.23 |
19.30 |
0 |
8 |
29 |
44 |
59 |
▇▃▆▃▅ |
Variable type: POSIXct
| time_hour |
0 |
1 |
2013-01-01 05:00:00 |
2013-12-31 23:00:00 |
2013-07-03 10:00:00 |
6936 |
Mydata <- read_csv("../00_data/tdf_winners.csv")
## Rows: 106 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): winner_name, winner_team, full_name, nickname, birth_town, birth_c...
## dbl (9): edition, distance, time_overall, time_margin, stage_wins, stages_l...
## date (3): start_date, born, died
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mydata
## # A tibble: 106 × 19
## edition start_date winner_name winner_team distance time_overall time_margin
## <dbl> <date> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 1903-07-01 Maurice Gar… La Françai… 2428 94.6 2.99
## 2 2 1904-07-02 Henri Cornet Conte 2428 96.1 2.27
## 3 3 1905-07-09 Louis Trous… Peugeot–Wo… 2994 NA NA
## 4 4 1906-07-04 René Pottier Peugeot–Wo… 4637 NA NA
## 5 5 1907-07-08 Lucien Peti… Peugeot–Wo… 4488 NA NA
## 6 6 1908-07-13 Lucien Peti… Peugeot–Wo… 4497 NA NA
## 7 7 1909-07-05 François Fa… Alcyon–Dun… 4498 NA NA
## 8 8 1910-07-01 Octave Lapi… Alcyon–Dun… 4734 NA NA
## 9 9 1911-07-02 Gustave Gar… Alcyon–Dun… 5343 NA NA
## 10 10 1912-06-30 Odile Defra… Alcyon–Dun… 5289 NA NA
## # ℹ 96 more rows
## # ℹ 12 more variables: stage_wins <dbl>, stages_led <dbl>, height <dbl>,
## # weight <dbl>, age <dbl>, born <date>, died <date>, full_name <chr>,
## # nickname <chr>, birth_town <chr>, birth_country <chr>, nationality <chr>
Create Data frame functions
Example 1: count columns
code snippets
ncol_num <- flights %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 14
Turn them into a function
count_ncol_numeric <- function(.data) {
ncol_num <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
#return new variable
return(ncol_num)
}
flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 4
Adding arguments for details of operation
count_ncol_type <- function(.data, type_data = "numeric") {
# if statement
if(type_data == "numeric") {
ncol_type <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
} else if(type_data == "character") {
ncol_type <- .data %>%
# Select a type of variables
select(where(is.character)) %>%
# Count columns
ncol()
}
return(ncol_type)
}
flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 0
Example 2: count rows
code snippets
nrow_num <- flights %>%
# filter rows that meet a condition
filter(carrier == "UA") %>%
# Count rows
nrow()
nrow_num
## [1] 58665
Turn them into a function
count_num_flights_by_carrier <- function(.data, carrier_name) {
nrow_num <- .data %>%
filter(carrier == carrier_name) %>%
nrow()
return(nrow_num)
}
flights %>% count_num_flights_by_carrier(carrier_name = "UA")
## [1] 58665
flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "AA")
## [1] 2
Example 3: count rows
code snippets
nrow_num <- Mydata %>%
# filter rows that meet a condition
filter(winner_name == "Lance Armstrong") %>%
# Count rows
nrow()
nrow_num
## [1] 7
Turn them into a function
count_num_wins_by_perosn <- function(.data, winner_name) {
nrow_num <- .data %>%
filter(winner_name == !!winner_name) %>%
nrow()
return(nrow_num)
}
Mydata %>% count_num_wins_by_perosn(winner_name = "Lance Armstrong")
## [1] 7
Mydata %>% count_num_wins_by_perosn(winner_name = "Philippe Thys")
## [1] 3