Import your data
data(flights)
flights %>% skimr::skim()
Data summary
Name |
Piped data |
Number of rows |
336776 |
Number of columns |
19 |
_______________________ |
|
Column type frequency: |
|
character |
4 |
numeric |
14 |
POSIXct |
1 |
________________________ |
|
Group variables |
None |
Variable type: character
carrier |
0 |
1.00 |
2 |
2 |
0 |
16 |
0 |
tailnum |
2512 |
0.99 |
5 |
6 |
0 |
4043 |
0 |
origin |
0 |
1.00 |
3 |
3 |
0 |
3 |
0 |
dest |
0 |
1.00 |
3 |
3 |
0 |
105 |
0 |
Variable type: numeric
year |
0 |
1.00 |
2013.00 |
0.00 |
2013 |
2013 |
2013 |
2013 |
2013 |
▁▁▇▁▁ |
month |
0 |
1.00 |
6.55 |
3.41 |
1 |
4 |
7 |
10 |
12 |
▇▆▆▆▇ |
day |
0 |
1.00 |
15.71 |
8.77 |
1 |
8 |
16 |
23 |
31 |
▇▇▇▇▆ |
dep_time |
8255 |
0.98 |
1349.11 |
488.28 |
1 |
907 |
1401 |
1744 |
2400 |
▁▇▆▇▃ |
sched_dep_time |
0 |
1.00 |
1344.25 |
467.34 |
106 |
906 |
1359 |
1729 |
2359 |
▁▇▇▇▃ |
dep_delay |
8255 |
0.98 |
12.64 |
40.21 |
-43 |
-5 |
-2 |
11 |
1301 |
▇▁▁▁▁ |
arr_time |
8713 |
0.97 |
1502.05 |
533.26 |
1 |
1104 |
1535 |
1940 |
2400 |
▁▃▇▇▇ |
sched_arr_time |
0 |
1.00 |
1536.38 |
497.46 |
1 |
1124 |
1556 |
1945 |
2359 |
▁▃▇▇▇ |
arr_delay |
9430 |
0.97 |
6.90 |
44.63 |
-86 |
-17 |
-5 |
14 |
1272 |
▇▁▁▁▁ |
flight |
0 |
1.00 |
1971.92 |
1632.47 |
1 |
553 |
1496 |
3465 |
8500 |
▇▃▃▁▁ |
air_time |
9430 |
0.97 |
150.69 |
93.69 |
20 |
82 |
129 |
192 |
695 |
▇▂▂▁▁ |
distance |
0 |
1.00 |
1039.91 |
733.23 |
17 |
502 |
872 |
1389 |
4983 |
▇▃▂▁▁ |
hour |
0 |
1.00 |
13.18 |
4.66 |
1 |
9 |
13 |
17 |
23 |
▁▇▇▇▅ |
minute |
0 |
1.00 |
26.23 |
19.30 |
0 |
8 |
29 |
44 |
59 |
▇▃▆▃▅ |
Variable type: POSIXct
time_hour |
0 |
1 |
2013-01-01 05:00:00 |
2013-12-31 23:00:00 |
2013-07-03 10:00:00 |
6936 |
Coaster <- read_xlsx("../00_data/MyData.xlsx")
Coaster %>% skimr::skim()
Data summary
Name |
Piped data |
Number of rows |
8351 |
Number of columns |
23 |
_______________________ |
|
Column type frequency: |
|
character |
16 |
numeric |
6 |
POSIXct |
1 |
________________________ |
|
Group variables |
None |
Variable type: character
acc_state |
0 |
1.00 |
2 |
2 |
0 |
40 |
0 |
acc_city |
118 |
0.99 |
4 |
20 |
0 |
674 |
0 |
fix_port |
0 |
1.00 |
1 |
1 |
0 |
3 |
0 |
source |
0 |
1.00 |
12 |
57 |
0 |
30 |
0 |
bus_type |
0 |
1.00 |
4 |
29 |
0 |
17 |
0 |
industry_sector |
0 |
1.00 |
7 |
14 |
0 |
4 |
0 |
device_category |
0 |
1.00 |
7 |
23 |
0 |
21 |
0 |
device_type |
0 |
1.00 |
4 |
26 |
0 |
91 |
0 |
tradename_or_generic |
0 |
1.00 |
4 |
32 |
0 |
407 |
0 |
manufacturer |
3310 |
0.60 |
2 |
40 |
0 |
253 |
0 |
gender |
728 |
0.91 |
1 |
1 |
0 |
4 |
0 |
acc_desc |
3 |
1.00 |
4 |
1258 |
0 |
8023 |
0 |
injury_desc |
10 |
1.00 |
4 |
367 |
0 |
3985 |
0 |
report |
8273 |
0.01 |
77 |
86 |
0 |
77 |
0 |
category |
0 |
1.00 |
5 |
54 |
0 |
49 |
0 |
notes |
8290 |
0.01 |
9 |
678 |
0 |
41 |
0 |
Variable type: numeric
acc_id |
0 |
1.00 |
1.005e+06 |
3126.04 |
920315 |
1002160 |
1005414 |
1007676 |
1009907 |
▁▁▁▁▇ |
num_injured |
2 |
1.00 |
1.050e+00 |
0.71 |
0 |
1 |
1 |
1 |
30 |
▇▁▁▁▁ |
age_youngest |
684 |
0.92 |
2.460e+01 |
18.28 |
0 |
10 |
18 |
38 |
92 |
▇▃▃▁▁ |
mechanical |
7977 |
0.04 |
1.000e+00 |
0.00 |
1 |
1 |
1 |
1 |
1 |
▁▁▇▁▁ |
op_error |
8192 |
0.02 |
1.000e+00 |
0.00 |
1 |
1 |
1 |
1 |
1 |
▁▁▇▁▁ |
employee |
8306 |
0.01 |
1.000e+00 |
0.00 |
1 |
1 |
1 |
1 |
1 |
▁▁▇▁▁ |
Variable type: POSIXct
acc_date |
0 |
1 |
2010-06-12 |
2017-07-26 |
2014-06-28 |
1845 |
Create Data frame functions
Example 1: count columns
code snippets
ncol_num <- flights %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 14
Turn them into a function
Number_of_columns <- function(x) {
ncol_num <- flights %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
return(ncol_num)
}
numerical_columns <- Number_of_columns(flights)
numerical_columns
## [1] 14
Adding arguments for details of operation
Example 2: count rows
code snippets
nrow_num <- flights %>%
# filter rows that meet a condition
filter(carrier == "UA") %>%
# Count rows
nrow()
nrow_num
## [1] 58665
Turn them into a function
nrows <- function(x){nrow_num <- flights %>%
# filter rows that meet a condition
filter(carrier == "UA") %>%
# Count rows
nrow()
return(nrow_num)
}
Number_of_UA_flights <- nrows()
Example 3: count rows
code snippets
nrows_num_ex <- Coaster %>%
select(where(is.numeric)) %>%
nrow()
nrow_num
## [1] 58665
nrow_num_ex2 <- Coaster %>%
filter(acc_state == "NH") %>%
nrow()
nrow_num_ex2
## [1] 211
Turn them into a function
Number_Accidents_in_NH <- function(x) {
nrow_num_ex2 <- Coaster %>%
filter(acc_state == "NH") %>%
nrow()
return(nrow_num_ex2)
}
Accidents_in_NH <- Number_Accidents_in_NH(Coaster)
Accidents_in_NH
## [1] 211