data <- read_excel("../00_data/Apply_1.xlsx")
data %>% skimr::skim()
| Name | Piped data |
| Number of rows | 1155 |
| Number of columns | 13 |
| _______________________ | |
| Column type frequency: | |
| character | 8 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| movie_name | 0 | 1 | 2 | 43 | 0 | 830 | 0 |
| director | 0 | 1 | 3 | 31 | 0 | 510 | 0 |
| actor_1_name | 0 | 1 | 6 | 22 | 0 | 567 | 0 |
| actor_2_name | 0 | 1 | 7 | 27 | 0 | 647 | 0 |
| character_1_gender | 0 | 1 | 3 | 5 | 0 | 2 | 0 |
| character_2_gender | 0 | 1 | 3 | 5 | 0 | 2 | 0 |
| actor_1_birthdate | 0 | 1 | 10 | 10 | 0 | 562 | 0 |
| actor_2_birthdate | 0 | 1 | 10 | 10 | 0 | 640 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| release_year | 0 | 1 | 2000.80 | 16.37 | 1935 | 1997 | 2004 | 2012 | 2022 | ▁▁▁▆▇ |
| age_difference | 0 | 1 | 10.42 | 8.51 | 0 | 4 | 8 | 15 | 52 | ▇▃▂▁▁ |
| couple_number | 0 | 1 | 1.40 | 0.75 | 1 | 1 | 1 | 2 | 7 | ▇▁▁▁▁ |
| actor_1_age | 0 | 1 | 40.64 | 10.42 | 18 | 33 | 39 | 47 | 81 | ▂▇▅▂▁ |
| actor_2_age | 0 | 1 | 30.21 | 7.50 | 17 | 25 | 29 | 34 | 68 | ▇▇▂▁▁ |
ncol_num <- data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 5
count_ncol_numeric <- function(.data) {
# Body
ncol_num <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
# Return the new variable
return(ncol_num)
}
data %>% count_ncol_numeric()
## [1] 5
data %>% .[1:10, 1:13]
## # A tibble: 10 × 13
## movie_name release_year director age_difference couple_number actor_1_name
## <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Harold and M… 1971 Hal Ash… 52 1 Ruth Gordon
## 2 Venus 2006 Roger M… 50 1 Peter O'Too…
## 3 The Quiet Am… 2002 Phillip… 49 1 Michael Cai…
## 4 The Big Lebo… 1998 Joel Co… 45 1 David Huddl…
## 5 Beginners 2010 Mike Mi… 43 1 Christopher…
## 6 Poison Ivy 1992 Katt Sh… 42 1 Tom Skerritt
## 7 Whatever Wor… 2009 Woody A… 40 1 Larry David
## 8 Entrapment 1999 Jon Ami… 39 1 Sean Connery
## 9 Husbands and… 1992 Woody A… 38 1 Woody Allen
## 10 Magnolia 1999 Paul Th… 38 1 Jason Robar…
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## # character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## # actor_1_age <dbl>, actor_2_age <dbl>
count_ncol_type <- function(.data, type_data = "numeric") {
# if statement for type of variables
if(type_data == "numeric") {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
} else if (type_data == "character") {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.character)) %>%
# Count columns
ncol()
}
# return new variable
return(ncol_type)
}
data %>% count_ncol_type()
## [1] 5
data %>% count_ncol_type(type_data = "numeric")
## [1] 5
data %>% count_ncol_type(type_data = "character")
## [1] 8
data %>% .[1:10, 1:6] %>% count_ncol_type(type_data = "character")
## [1] 3
nrow_num <- data %>%
# filter rows that meet a condition
filter(actor_1_age == "43") %>%
# Count rows
nrow()
nrow_num
## [1] 27
count_type_actor <- function(.data, actor_1_age) {
# body
nrow_num <- data %>%
# filter rows that meet a condition
filter(actor_1_age == "43") %>%
# Count rows
nrow()
# return new variable
return(nrow_num)
}
data %>% .[1.10, "43"] %>%
count_type_actor(actor_1_age = "43")
## [1] 27
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()