Import your data
data(flights)
data(penguins)
# Preview both datasets
skim(flights)
Data summary
| Name |
flights |
| Number of rows |
336776 |
| Number of columns |
19 |
| _______________________ |
|
| Column type frequency: |
|
| character |
4 |
| numeric |
14 |
| POSIXct |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| carrier |
0 |
1.00 |
2 |
2 |
0 |
16 |
0 |
| tailnum |
2512 |
0.99 |
5 |
6 |
0 |
4043 |
0 |
| origin |
0 |
1.00 |
3 |
3 |
0 |
3 |
0 |
| dest |
0 |
1.00 |
3 |
3 |
0 |
105 |
0 |
Variable type: numeric
| year |
0 |
1.00 |
2013.00 |
0.00 |
2013 |
2013 |
2013 |
2013 |
2013 |
▁▁▇▁▁ |
| month |
0 |
1.00 |
6.55 |
3.41 |
1 |
4 |
7 |
10 |
12 |
▇▆▆▆▇ |
| day |
0 |
1.00 |
15.71 |
8.77 |
1 |
8 |
16 |
23 |
31 |
▇▇▇▇▆ |
| dep_time |
8255 |
0.98 |
1349.11 |
488.28 |
1 |
907 |
1401 |
1744 |
2400 |
▁▇▆▇▃ |
| sched_dep_time |
0 |
1.00 |
1344.25 |
467.34 |
106 |
906 |
1359 |
1729 |
2359 |
▁▇▇▇▃ |
| dep_delay |
8255 |
0.98 |
12.64 |
40.21 |
-43 |
-5 |
-2 |
11 |
1301 |
▇▁▁▁▁ |
| arr_time |
8713 |
0.97 |
1502.05 |
533.26 |
1 |
1104 |
1535 |
1940 |
2400 |
▁▃▇▇▇ |
| sched_arr_time |
0 |
1.00 |
1536.38 |
497.46 |
1 |
1124 |
1556 |
1945 |
2359 |
▁▃▇▇▇ |
| arr_delay |
9430 |
0.97 |
6.90 |
44.63 |
-86 |
-17 |
-5 |
14 |
1272 |
▇▁▁▁▁ |
| flight |
0 |
1.00 |
1971.92 |
1632.47 |
1 |
553 |
1496 |
3465 |
8500 |
▇▃▃▁▁ |
| air_time |
9430 |
0.97 |
150.69 |
93.69 |
20 |
82 |
129 |
192 |
695 |
▇▂▂▁▁ |
| distance |
0 |
1.00 |
1039.91 |
733.23 |
17 |
502 |
872 |
1389 |
4983 |
▇▃▂▁▁ |
| hour |
0 |
1.00 |
13.18 |
4.66 |
1 |
9 |
13 |
17 |
23 |
▁▇▇▇▅ |
| minute |
0 |
1.00 |
26.23 |
19.30 |
0 |
8 |
29 |
44 |
59 |
▇▃▆▃▅ |
Variable type: POSIXct
| time_hour |
0 |
1 |
2013-01-01 05:00:00 |
2013-12-31 23:00:00 |
2013-07-03 10:00:00 |
6936 |
skim(penguins)
Data summary
| Name |
penguins |
| Number of rows |
344 |
| Number of columns |
8 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
3 |
| numeric |
5 |
| ________________________ |
|
| Group variables |
None |
Variable type: factor
| species |
0 |
1.00 |
FALSE |
3 |
Ade: 152, Gen: 124, Chi: 68 |
| island |
0 |
1.00 |
FALSE |
3 |
Bis: 168, Dre: 124, Tor: 52 |
| sex |
11 |
0.97 |
FALSE |
2 |
mal: 168, fem: 165 |
Variable type: numeric
| bill_length_mm |
2 |
0.99 |
43.92 |
5.46 |
32.1 |
39.23 |
44.45 |
48.5 |
59.6 |
▃▇▇▆▁ |
| bill_depth_mm |
2 |
0.99 |
17.15 |
1.97 |
13.1 |
15.60 |
17.30 |
18.7 |
21.5 |
▅▅▇▇▂ |
| flipper_length_mm |
2 |
0.99 |
200.92 |
14.06 |
172.0 |
190.00 |
197.00 |
213.0 |
231.0 |
▂▇▃▅▂ |
| body_mass_g |
2 |
0.99 |
4201.75 |
801.95 |
2700.0 |
3550.00 |
4050.00 |
4750.0 |
6300.0 |
▃▇▆▃▂ |
| year |
0 |
1.00 |
2008.03 |
0.82 |
2007.0 |
2007.00 |
2008.00 |
2009.0 |
2009.0 |
▇▁▇▁▇ |
Create Data frame functions
Example 1: Count numeric columns
Code snippet
ncol_num <- flights %>%
select(where(is.numeric)) %>%
ncol()
ncol_num
## [1] 14
Function
count_num_cols <- function(.data) {
num_cols <- .data %>%
select(where(is.numeric)) %>%
ncol()
return(num_cols)
}
# Test
count_num_cols(flights)
## [1] 14
Function with argument for type
count_cols_by_type <- function(.data, type = "numeric") {
if (type == "numeric") {
num_cols <- .data %>% select(where(is.numeric)) %>% ncol()
return(num_cols)
} else if (type == "character") {
char_cols <- .data %>% select(where(is.character)) %>% ncol()
return(char_cols)
}
}
# Test
count_cols_by_type(flights)
## [1] 14
count_cols_by_type(flights, type = "character")
## [1] 4
Example 2: Count rows by carrier
Code snippet
nrow_num <- flights %>%
filter(carrier == "UA") %>%
nrow()
nrow_num
## [1] 58665
Function
count_rows_by_carrier <- function(.data, carrier_name) {
row_count <- .data %>%
filter(carrier == carrier_name) %>%
nrow()
return(row_count)
}
# Test
count_rows_by_carrier(flights, "UA")
## [1] 58665
count_rows_by_carrier(flights, "AA")
## [1] 32729
Example 3: Your own function – Count penguins by species
Code snippet
penguins %>%
filter(species == "Adelie") %>%
nrow()
## [1] 152
Function
count_penguins_by_species <- function(.data, species_name) {
.data %>%
filter(species == species_name) %>%
nrow()
}
# Test
count_penguins_by_species(penguins, "Adelie")
## [1] 152
count_penguins_by_species(penguins, "Gentoo")
## [1] 124