These are my solutions to the week2 coding assignment for the “R programming” course on Coursera. These functions got the job done, but obviously there are ways to optimize the speed and to avoid loops.

The first function to calculate means across different ids:

pollutantmean <- function(directory, pollutant, id = 1:332){
    # locate the files:
    files <- list.files(directory)
    
    # read the files indicated by id:
    dfs <- list()
        for(i in id){
            dfs[[i]]<- read.csv(files[i], header = TRUE)
        }
    
    # find the mean:
    big_df <- do.call(rbind, dfs)
      mean <- mean(big_df[, eval(pollutant)], na.rm = TRUE) # The eval() function does the trick for colmun indexing
return(mean)
}

The second function to count complete cases for each id, This function did not work out one of the quiz questions which indexed a reverse-ordered dataframe, I haven’t figured out the solution yet:

complete <- function(directory, id = 1:332){
    # locate the file:
    files <- list.files(directory)
    
    # read the files indicated by id
      dfs <- list()
     comp <- list()
     for(i in id){
         dfs[[i]] <- read.csv(files[i], header = TRUE)
    
         # the comp list stores the number of non-NA values in2nd & 3rd columns
        comp[[i]] <- sum(!is.na(dfs[[i]][, 2]) & !is.na(dfs[[i]][, 3]))
    }
   
      # output new df
      nobs <- unlist(comp)
    new_df <- as.data.frame(cbind(id, nobs))
return(new_df)
}

The third function to calculate the correlations in regards to a threshold of complete cases:

corr <- function(directory, threshold = 0){
    
    files <- list.files(directory)
    comp <- complete(directory) # using the previous function for easy indexing
    test <- comp[, 2] >= threshold
        if(sum(test) > 0) {
            id  <- which(test)
            dfs <- list()
            r   <- vector()
                for(j in id){
                    dfs[[j]] <- read.csv(files[j], header = TRUE)
                    r[j]<- cor(dfs[[j]][, 2], dfs[[j]][, 3], use = "pairwise.complete.obs")
                }
            r <- unlist(r)
            r <- r[which(!is.na(r))]
        } else {
            r <- vector() # r should be an empty vector
        }
return(r)
}