data <- read_excel("../00_data/myData.xlsx")
data %>% skimr::skim()
| Name | Piped data |
| Number of rows | 45088 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 6 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| stock_symbol | 0 | 1 | 3 | 5 | 0 | 14 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| open | 0 | 1 | 89.27 | 101.63 | 1.08 | 25.67 | 47.93 | 128.66 | 6.962800e+02 | ▇▂▁▁▁ |
| high | 0 | 1 | 90.37 | 103.00 | 1.11 | 25.93 | 48.46 | 129.85 | 7.009900e+02 | ▇▂▁▁▁ |
| low | 0 | 1 | 88.11 | 100.12 | 1.00 | 25.36 | 47.47 | 127.25 | 6.860900e+02 | ▇▂▁▁▁ |
| close | 0 | 1 | 89.27 | 101.59 | 1.05 | 25.66 | 47.97 | 128.64 | 6.916900e+02 | ▇▂▁▁▁ |
| adj_close | 0 | 1 | 85.21 | 101.00 | 1.05 | 22.08 | 45.38 | 113.67 | 6.916900e+02 | ▇▁▁▁▁ |
| volume | 0 | 1 | 52978130.54 | 93247295.87 | 589200.00 | 9629425.00 | 26463150.00 | 58397675.00 | 1.880998e+09 | ▇▁▁▁▁ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 2010-01-04 | 2023-01-24 | 2016-08-09 | 3287 |
ncol_num <- data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 6
count_ncol_numeric <- function(.data) {
ncol_num <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
# Return the new variable
return(ncol_num)
}
data %>% count_ncol_numeric()
## [1] 6
data %>% .[1:10, -1:-6] %>% count_ncol_numeric()
## [1] 2
count_ncol_type <- function(.data, type_data = "numeric") {
#If statement for type of variables
if (type_data == "numeric") {
ncol_type <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
} else if(type_data == "character") {
ncol_type <- .data %>%
# Select a type of variables
select(where(is.character)) %>%
# Count columns
ncol()
}
# Return the new variable
return(ncol_type)
}
data %>% count_ncol_type()
## [1] 6
data %>% count_ncol_type(type_data = "character")
## [1] 1
nrow_num <- data %>%
# filter rows that meet a condition
filter(stock_symbol == "INTC") %>%
# Count rows
nrow()
nrow_num
## [1] 3271
count_vol_by_stock_symbol <- function(.data,stock_symbol_name) {
# Body
nrow_num <- .data %>%
# filter rows that meet a condition
filter(stock_symbol == stock_symbol_name) %>%
# Count rows
nrow()
# return the new variable
return(nrow_num)
}
data %>% count_vol_by_stock_symbol(stock_symbol_name = "MSFT")
## [1] 3271
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()