Load libraries
# load the libraries at the beginning of rmd file
library(dplyr)
# library(stringr)
fn_read_data_file
- Note:
@export
tag “puts the function name in the
package NAMESPACE file which means it can be accessed by users after
they run library(<packagename>)
”.
- Reference here.
- Usually, helper functions are not exported
- Note: difference between
@import
and
@importFrom
: @import
imports every functions
in the package into the namespace (could lead to potential conflict in
namespace); @importFrom
only import specific functions
inside the package.
#' @title Read Data File to Summarize
#' @description This function reads a tab split file, and get the summary stats of a column within it
#' @param filePath A string of the full file path to the data
#' @param column A non-quoted string of the column name
#' @return A df (row vector) of summary statistics
#' @export
#' @importFrom dplyr summarise
#' @examples
#' fn_read_data_file(filePath = ls_files[i], column = trait)
fn_read_data_file = function(filePath, column){
# load the data to a temp object
tmp_df = read.table(filePath, sep="\t", header=TRUE)
# get the summary data
dfRow_summary_results = tmp_df %>%
summarise(
names = basename(filePath), #removes all of the path up to and including the last path separator (if any) = get the last element in the path name
# str_split_i(string = filePath, pattern = "/", i=-1), #split -> get the last element (dataset name)
N=nrow(.),
mean=mean({{column}}),
median=median({{column}}),
var=var({{column}})) #returns a row vector (in df format)
return(dfRow_summary_results)
}
Get the summary
table
path_to_dir = "~/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data"
ls_files = list.files(path = path_to_dir, full.names = TRUE)
ls_files
## [1] "/Users/vincent/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data/dataset1.txt"
## [2] "/Users/vincent/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data/dataset2.txt"
## [3] "/Users/vincent/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data/dataset3.txt"
## [4] "/Users/vincent/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data/dataset4.txt"
## [5] "/Users/vincent/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data/dataset5.txt"
## [6] "/Users/vincent/Desktop/HUGEN2071/07_R_functions_debug_BuildPackage/data/dataset6.txt"
df_summary = data.frame(names = rep(NA, length(ls_files)),
N=NA, mean=NA, median=NA, var=NA)
# create a function, return a vector -> store in each row
# store the results in the
for (i in seq(length(ls_files)) ){
df_summary[i,] = fn_read_data_file(filePath = ls_files[i], column = trait)
}
df_summary