Write a function that reads a directory full of files and reports the number of completely observed cases in each data file. The
function should return a data frame where the first column is the name of the file and the second column is the number of
complete cases.
Please save your code to a file named complete.R. To run the test script for this part, make sure your working directory has the
file complete.R in it and the run:
source(“http://spark-public.s3.amazonaws.com/compdata/scripts/complete-test.R”)
complete.testscript()
#
# |------------------------------------------------------------------------------------------|
# | I N I T I A L I Z A T I O N |
# |------------------------------------------------------------------------------------------|
Init <- function(workDirStr = "C:/Users/denbrige/100 FxOption/103 FxOptionVerBack/080 Fx Git/R-source") {
setwd(workDirStr)
}
#
# |------------------------------------------------------------------------------------------|
# | I N T E R N A L F U N C T I O N S |
# |------------------------------------------------------------------------------------------|
complete <- function(directory, id = 1:332) {
# --- Assert 'directory' is a character vector of length 1 indicating the
# location of the CSV files. 'id' is an integer vector indicating the
# monitor ID numbers to be used Return a data frame of the form: id nobs 1
# 117 2 1041 ... where 'id' is the monitor ID number and 'nobs' is the
# number of complete cases
# --- Assert create an empty vector
nobsNum <- numeric(0)
for (cid in id) {
# --- Assert get data frame as ID
cDfr <- getmonitor(cid, directory)
# --- Assert count the number of complete cases and append to numeric
# vector
nobsNum <- c(nobsNum, nrow(na.omit(cDfr)))
}
# --- Assert return value is a data frame with TWO (2) columns
data.frame(id = id, nobs = nobsNum)
}
getmonitor <- function(id, directory, summarize = FALSE) {
# --- Assert 'id' is a vector of length 1 indicating the monitor ID
# number. The user can specify 'id' as either an integer, a character, or
# a numeric. 'directory' is a character vector of length 1 indicating the
# location of the CSV files 'summarize' is a logical indicating whether a
# summary of the data should be printed to the console; the default is
# FALSE
# --- Assert construct file name Directory is pre-appended to file name.
# Use sprintf() to add leading zeroes. E.g. 'specdata/001.csv'
fileStr <- paste(directory, "/", sprintf("%03d", as.numeric(id)), ".csv",
sep = "")
# --- Assert read csv
rawDfr <- read.csv(fileStr)
# --- Assert summary if true
if (summarize) {
print(summary(rawDfr))
}
# --- Return value is a data frame
return(rawDfr)
}
#
# |------------------------------------------------------------------------------------------|
# | M A I N P R O C E D U R E |
# |------------------------------------------------------------------------------------------|
# --- Init set working directory
Init()
# --- Get data frame as ID = 1
complete("specdata", 1)
## id nobs
## 1 1 117
# --- Get data frame as ID = 2,4,6,8,10
complete("specdata", c(2, 4, 8, 10, 12))
## id nobs
## 1 2 1041
## 2 4 474
## 3 8 192
## 4 10 148
## 5 12 96
# --- Get data frame and summary as ID = 30..25
complete("specdata", 30:25)
## id nobs
## 1 30 932
## 2 29 711
## 3 28 475
## 4 27 338
## 5 26 586
## 6 25 463
# --- Get data frame and summary as ID = 3
complete("specdata", 3)
## id nobs
## 1 3 243
#
# |------------------------------------------------------------------------------------------|
# | E N D O F S C R I P T |
# |------------------------------------------------------------------------------------------|