Trying out the script I wrote for uploading files to Owl for the first time in a “production” setting.

## The three lines below are user supplied. dataDir is where the data currently is located (either mounted FTP directory or 
## some temporary directory where you've copied the files to locally), facilityMD5FileName is the MD5 file supplied 
## by the sequencing facility, considered "correct" for the purposes of this script, and owlDir is the directory where files 
## will be copied to in Owl
library(readr)
dataDir <- "~/Documents/Genewiz_hdd/"
facilityMD5FileName <- "md5sum.txt"
owlDir <- "~/Documents/owl/nightingales/P_generosa/RRBS2"
## Main Logic loop, First three if statements are checks to make sure the user has updated the script with directories
## and the MD5 file to ensure correct operation. If any of the criteria are not met, script stops automatically.
if(dataDir == "Input Data Dir Here" )   {
  print("Update dataDir variable with where your data is")
} else if (owlDir == "Input Owl Directory Here")   {
  print("Update owlDir with where you want the files copied to Owl")
} else if (facilityMD5FileName == "File Name Here")   {
  print("Update Script with MD5 file provided by the facility")
## The else loop assumes that everything is correct and proceeds with the checking and copying process
} else {
  # Sets the working directory to dataDir
  setwd(dataDir)
  # Pulls in all of the files which match the .gz file extension. May want to add a user supplied option for compression
  #schemes other than gzip.
  filenames <- list.files(path = dataDir, pattern = "*.gz")
  # Rums MD5 checks on all of the files, saving them to the external file chksum2.txt. This is just temporary and is removed
  # during cleanup
  for(i in 1: length(filenames))   {
    system(paste0("md5sum ", filenames[i], " >> chksum2.txt"))
  }
  # reads in and formats the facility and local MD5 files, removing any NA spaces due to read_delim only using a single whitespace
  # character to delimit, but most MD5 files seem to have two. Then names columns appropriately
  facility.MD5s <- read_delim(paste0(dataDir,facilityMD5FileName), 
                              "  ", escape_double = FALSE, col_names = FALSE, 
                              trim_ws = TRUE)
  
  facility.MD5s <- facility.MD5s[,!apply(is.na(facility.MD5s),2,all)]
  colnames(facility.MD5s) <- c("md5", "name")
  
  file.MD5s <- read_delim(paste0(dataDir,"chksum2.txt"), 
                          "  ", escape_double = FALSE, col_names = FALSE, 
                          trim_ws = TRUE)
  
  file.MD5s <- file.MD5s[,!apply(is.na(file.MD5s),2,all)]
  colnames(file.MD5s) <- c("md5", "name")
  
  ## Logic loop for checking MD5s and initiationg copying if MD5s match.
  for(i in 1:nrow(facility.MD5s))   {
    setwd(owlDir)
    ## Checks if the number of files match between chksum2.txt and the facility file. Stops script if they don't
    if (nrow(facility.MD5s) != nrow(file.MD5s))   {
      print("Number of Facility entries does not match number of files, check if all files are present")
      break
    # This loop is for when MD5s match, and will first copy the file to the supplied Owl directory, then re-run
    # an MD5, comparing it to the facility file again, and if that matches append the MD5 checksum to the existing
    # MD5 file and add the file name to the readme.MD file in Owl. If it fails, then it prints that the copy has failed,
    # removes the file from owl, and then stops the script
    }else if(facility.MD5s$md5[which(facility.MD5s$name == file.MD5s$name[i])] == file.MD5s$md5[i]) {
      system(paste0("scp ", dataDir, file.MD5s$name[i], " ", owlDir))
      tempMD5 <- substr(system(paste0("md5sum ",owlDir, "/", filenames[i]), intern = TRUE),1 , 32)
      if (facility.MD5s$md5[which(facility.MD5s$name == file.MD5s$name[i])] == tempMD5)   {
        system(paste0("echo ", file.MD5s$name[i], " >> readme.MD"))
        system(paste0("echo ", tempMD5, "  ", file.MD5s$name[i] ,">> checksum.MD5"))
        print(paste(file.MD5s$name[i], "copied sucessfully"))
      }else   {
        print("Copy Failure. Produced incorrect MD5")
        system("rm ", owlDir, "/", file.MD5s$name[i], intern = FALSE)
        break
      }
    # This final if statement is for if the inital file checksum and facility checksums do not match, if that's
    # the case then it prints that they've failed, with the file name, and saves the file name to a MD5Mismatch
    # file for further consideration. This does not stop the loop however
    } else if(facility.MD5s$md5[which(facility.MD5s$name == file.MD5s$name[i])] != file.MD5s$md5[i])   {
      print(paste("MD5 mismatch between facility and copied file for file ", file.MD5s$name[i]))
      system(paste0("echo ", file.MD5s$name[i], " >> MD5Mismatch.txt"))
    }
  }  
setwd(dataDir)
system("rm chksum2.txt", intern = FALSE)
}
Parsed with column specification:
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character()
)
Parsed with column specification:
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character()
)
sh: 1: cannot create readme.MD: Permission denied
sh: 1: cannot create checksum.MD5: Permission denied
[1] "EPI-135WG_S42_L005_R1_001.fastq.gz copied sucessfully"
cp: failed to close '/home/sean/Documents/owl/nightingales/P_generosa/RRBS2/EPI-135WG_S42_L005_R2_001.fastq.gz': Input/output error
sh: 1: cannot create readme.MD: Permission denied
sh: 1: cannot create checksum.MD5: Permission denied
[1] "EPI-135WG_S42_L005_R2_001.fastq.gz copied sucessfully"
sh: 1: cannot create readme.MD: Permission denied
sh: 1: cannot create checksum.MD5: Permission denied
[1] "EPI-151_S2_L002_R1_001.fastq.gz copied sucessfully"
[1] "Copy Failure. Produced incorrect MD5"
Error in system("rm ", owlDir, "/", file.MD5s$name[i], intern = FALSE) : 
  'ignore.stdout' must be TRUE or FALSE

Well, that’s a bummer. It looks like there are some permission issues with the nightingales directory on Owl. At least it appears the script worked!

LS0tCnRpdGxlOiAiT3dsIFVwbG9hZGVyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpUcnlpbmcgb3V0IHRoZSBzY3JpcHQgSSB3cm90ZSBmb3IgdXBsb2FkaW5nIGZpbGVzIHRvIE93bCBmb3IgdGhlIGZpcnN0IHRpbWUgaW4gYSAicHJvZHVjdGlvbiIgc2V0dGluZy4gCgoKYGBge3J9CiMjIFRoZSB0aHJlZSBsaW5lcyBiZWxvdyBhcmUgdXNlciBzdXBwbGllZC4gZGF0YURpciBpcyB3aGVyZSB0aGUgZGF0YSBjdXJyZW50bHkgaXMgbG9jYXRlZCAoZWl0aGVyIG1vdW50ZWQgRlRQIGRpcmVjdG9yeSBvciAKIyMgc29tZSB0ZW1wb3JhcnkgZGlyZWN0b3J5IHdoZXJlIHlvdSd2ZSBjb3BpZWQgdGhlIGZpbGVzIHRvIGxvY2FsbHkpLCBmYWNpbGl0eU1ENUZpbGVOYW1lIGlzIHRoZSBNRDUgZmlsZSBzdXBwbGllZCAKIyMgYnkgdGhlIHNlcXVlbmNpbmcgZmFjaWxpdHksIGNvbnNpZGVyZWQgImNvcnJlY3QiIGZvciB0aGUgcHVycG9zZXMgb2YgdGhpcyBzY3JpcHQsIGFuZCBvd2xEaXIgaXMgdGhlIGRpcmVjdG9yeSB3aGVyZSBmaWxlcyAKIyMgd2lsbCBiZSBjb3BpZWQgdG8gaW4gT3dsCmxpYnJhcnkocmVhZHIpCgpkYXRhRGlyIDwtICJ+L0RvY3VtZW50cy9HZW5ld2l6X2hkZC8iCmZhY2lsaXR5TUQ1RmlsZU5hbWUgPC0gIm1kNXN1bS50eHQiCm93bERpciA8LSAifi9Eb2N1bWVudHMvb3dsL25pZ2h0aW5nYWxlcy9QX2dlbmVyb3NhL1JSQlMyIgoKIyMgTWFpbiBMb2dpYyBsb29wLCBGaXJzdCB0aHJlZSBpZiBzdGF0ZW1lbnRzIGFyZSBjaGVja3MgdG8gbWFrZSBzdXJlIHRoZSB1c2VyIGhhcyB1cGRhdGVkIHRoZSBzY3JpcHQgd2l0aCBkaXJlY3RvcmllcwojIyBhbmQgdGhlIE1ENSBmaWxlIHRvIGVuc3VyZSBjb3JyZWN0IG9wZXJhdGlvbi4gSWYgYW55IG9mIHRoZSBjcml0ZXJpYSBhcmUgbm90IG1ldCwgc2NyaXB0IHN0b3BzIGF1dG9tYXRpY2FsbHkuCmlmKGRhdGFEaXIgPT0gIklucHV0IERhdGEgRGlyIEhlcmUiICkgICB7CiAgcHJpbnQoIlVwZGF0ZSBkYXRhRGlyIHZhcmlhYmxlIHdpdGggd2hlcmUgeW91ciBkYXRhIGlzIikKCn0gZWxzZSBpZiAob3dsRGlyID09ICJJbnB1dCBPd2wgRGlyZWN0b3J5IEhlcmUiKSAgIHsKICBwcmludCgiVXBkYXRlIG93bERpciB3aXRoIHdoZXJlIHlvdSB3YW50IHRoZSBmaWxlcyBjb3BpZWQgdG8gT3dsIikKCn0gZWxzZSBpZiAoZmFjaWxpdHlNRDVGaWxlTmFtZSA9PSAiRmlsZSBOYW1lIEhlcmUiKSAgIHsKICBwcmludCgiVXBkYXRlIFNjcmlwdCB3aXRoIE1ENSBmaWxlIHByb3ZpZGVkIGJ5IHRoZSBmYWNpbGl0eSIpCgojIyBUaGUgZWxzZSBsb29wIGFzc3VtZXMgdGhhdCBldmVyeXRoaW5nIGlzIGNvcnJlY3QgYW5kIHByb2NlZWRzIHdpdGggdGhlIGNoZWNraW5nIGFuZCBjb3B5aW5nIHByb2Nlc3MKfSBlbHNlIHsKICAjIFNldHMgdGhlIHdvcmtpbmcgZGlyZWN0b3J5IHRvIGRhdGFEaXIKICBzZXR3ZChkYXRhRGlyKQogICMgUHVsbHMgaW4gYWxsIG9mIHRoZSBmaWxlcyB3aGljaCBtYXRjaCB0aGUgLmd6IGZpbGUgZXh0ZW5zaW9uLiBNYXkgd2FudCB0byBhZGQgYSB1c2VyIHN1cHBsaWVkIG9wdGlvbiBmb3IgY29tcHJlc3Npb24KICAjc2NoZW1lcyBvdGhlciB0aGFuIGd6aXAuCiAgZmlsZW5hbWVzIDwtIGxpc3QuZmlsZXMocGF0aCA9IGRhdGFEaXIsIHBhdHRlcm4gPSAiKi5neiIpCgogICMgUnVtcyBNRDUgY2hlY2tzIG9uIGFsbCBvZiB0aGUgZmlsZXMsIHNhdmluZyB0aGVtIHRvIHRoZSBleHRlcm5hbCBmaWxlIGNoa3N1bTIudHh0LiBUaGlzIGlzIGp1c3QgdGVtcG9yYXJ5IGFuZCBpcyByZW1vdmVkCiAgIyBkdXJpbmcgY2xlYW51cAogIGZvcihpIGluIDE6IGxlbmd0aChmaWxlbmFtZXMpKSAgIHsKICAgIHN5c3RlbShwYXN0ZTAoIm1kNXN1bSAiLCBmaWxlbmFtZXNbaV0sICIgPj4gY2hrc3VtMi50eHQiKSkKICB9CiAgIyByZWFkcyBpbiBhbmQgZm9ybWF0cyB0aGUgZmFjaWxpdHkgYW5kIGxvY2FsIE1ENSBmaWxlcywgcmVtb3ZpbmcgYW55IE5BIHNwYWNlcyBkdWUgdG8gcmVhZF9kZWxpbSBvbmx5IHVzaW5nIGEgc2luZ2xlIHdoaXRlc3BhY2UKICAjIGNoYXJhY3RlciB0byBkZWxpbWl0LCBidXQgbW9zdCBNRDUgZmlsZXMgc2VlbSB0byBoYXZlIHR3by4gVGhlbiBuYW1lcyBjb2x1bW5zIGFwcHJvcHJpYXRlbHkKICBmYWNpbGl0eS5NRDVzIDwtIHJlYWRfZGVsaW0ocGFzdGUwKGRhdGFEaXIsZmFjaWxpdHlNRDVGaWxlTmFtZSksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiICAiLCBlc2NhcGVfZG91YmxlID0gRkFMU0UsIGNvbF9uYW1lcyA9IEZBTFNFLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJpbV93cyA9IFRSVUUpCiAgCiAgZmFjaWxpdHkuTUQ1cyA8LSBmYWNpbGl0eS5NRDVzWywhYXBwbHkoaXMubmEoZmFjaWxpdHkuTUQ1cyksMixhbGwpXQogIGNvbG5hbWVzKGZhY2lsaXR5Lk1ENXMpIDwtIGMoIm1kNSIsICJuYW1lIikKICAKICBmaWxlLk1ENXMgPC0gcmVhZF9kZWxpbShwYXN0ZTAoZGF0YURpciwiY2hrc3VtMi50eHQiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIiAgIiwgZXNjYXBlX2RvdWJsZSA9IEZBTFNFLCBjb2xfbmFtZXMgPSBGQUxTRSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgdHJpbV93cyA9IFRSVUUpCiAgCiAgZmlsZS5NRDVzIDwtIGZpbGUuTUQ1c1ssIWFwcGx5KGlzLm5hKGZpbGUuTUQ1cyksMixhbGwpXQogIGNvbG5hbWVzKGZpbGUuTUQ1cykgPC0gYygibWQ1IiwgIm5hbWUiKQogIAogICMjIExvZ2ljIGxvb3AgZm9yIGNoZWNraW5nIE1ENXMgYW5kIGluaXRpYXRpb25nIGNvcHlpbmcgaWYgTUQ1cyBtYXRjaC4KICBmb3IoaSBpbiAxOm5yb3coZmFjaWxpdHkuTUQ1cykpICAgewogICAgc2V0d2Qob3dsRGlyKQogICAgIyMgQ2hlY2tzIGlmIHRoZSBudW1iZXIgb2YgZmlsZXMgbWF0Y2ggYmV0d2VlbiBjaGtzdW0yLnR4dCBhbmQgdGhlIGZhY2lsaXR5IGZpbGUuIFN0b3BzIHNjcmlwdCBpZiB0aGV5IGRvbid0CiAgICBpZiAobnJvdyhmYWNpbGl0eS5NRDVzKSAhPSBucm93KGZpbGUuTUQ1cykpICAgewogICAgICBwcmludCgiTnVtYmVyIG9mIEZhY2lsaXR5IGVudHJpZXMgZG9lcyBub3QgbWF0Y2ggbnVtYmVyIG9mIGZpbGVzLCBjaGVjayBpZiBhbGwgZmlsZXMgYXJlIHByZXNlbnQiKQogICAgICBicmVhawogICAgIyBUaGlzIGxvb3AgaXMgZm9yIHdoZW4gTUQ1cyBtYXRjaCwgYW5kIHdpbGwgZmlyc3QgY29weSB0aGUgZmlsZSB0byB0aGUgc3VwcGxpZWQgT3dsIGRpcmVjdG9yeSwgdGhlbiByZS1ydW4KICAgICMgYW4gTUQ1LCBjb21wYXJpbmcgaXQgdG8gdGhlIGZhY2lsaXR5IGZpbGUgYWdhaW4sIGFuZCBpZiB0aGF0IG1hdGNoZXMgYXBwZW5kIHRoZSBNRDUgY2hlY2tzdW0gdG8gdGhlIGV4aXN0aW5nCiAgICAjIE1ENSBmaWxlIGFuZCBhZGQgdGhlIGZpbGUgbmFtZSB0byB0aGUgcmVhZG1lLk1EIGZpbGUgaW4gT3dsLiBJZiBpdCBmYWlscywgdGhlbiBpdCBwcmludHMgdGhhdCB0aGUgY29weSBoYXMgZmFpbGVkLAogICAgIyByZW1vdmVzIHRoZSBmaWxlIGZyb20gb3dsLCBhbmQgdGhlbiBzdG9wcyB0aGUgc2NyaXB0CiAgICB9ZWxzZSBpZihmYWNpbGl0eS5NRDVzJG1kNVt3aGljaChmYWNpbGl0eS5NRDVzJG5hbWUgPT0gZmlsZS5NRDVzJG5hbWVbaV0pXSA9PSBmaWxlLk1ENXMkbWQ1W2ldKSB7CiAgICAgIHN5c3RlbShwYXN0ZTAoInNjcCAiLCBkYXRhRGlyLCBmaWxlLk1ENXMkbmFtZVtpXSwgIiAiLCBvd2xEaXIpKQogICAgICB0ZW1wTUQ1IDwtIHN1YnN0cihzeXN0ZW0ocGFzdGUwKCJtZDVzdW0gIixvd2xEaXIsICIvIiwgZmlsZW5hbWVzW2ldKSwgaW50ZXJuID0gVFJVRSksMSAsIDMyKQogICAgICBpZiAoZmFjaWxpdHkuTUQ1cyRtZDVbd2hpY2goZmFjaWxpdHkuTUQ1cyRuYW1lID09IGZpbGUuTUQ1cyRuYW1lW2ldKV0gPT0gdGVtcE1ENSkgICB7CiAgICAgICAgc3lzdGVtKHBhc3RlMCgiZWNobyAiLCBmaWxlLk1ENXMkbmFtZVtpXSwgIiA+PiByZWFkbWUuTUQiKSkKICAgICAgICBzeXN0ZW0ocGFzdGUwKCJlY2hvICIsIHRlbXBNRDUsICIgICIsIGZpbGUuTUQ1cyRuYW1lW2ldICwiPj4gY2hlY2tzdW0uTUQ1IikpCiAgICAgICAgcHJpbnQocGFzdGUoZmlsZS5NRDVzJG5hbWVbaV0sICJjb3BpZWQgc3VjZXNzZnVsbHkiKSkKICAgICAgfWVsc2UgICB7CiAgICAgICAgcHJpbnQoIkNvcHkgRmFpbHVyZS4gUHJvZHVjZWQgaW5jb3JyZWN0IE1ENSIpCiAgICAgICAgc3lzdGVtKCJybSAiLCBvd2xEaXIsICIvIiwgZmlsZS5NRDVzJG5hbWVbaV0sIGludGVybiA9IEZBTFNFKQogICAgICAgIGJyZWFrCiAgICAgIH0KICAgICMgVGhpcyBmaW5hbCBpZiBzdGF0ZW1lbnQgaXMgZm9yIGlmIHRoZSBpbml0YWwgZmlsZSBjaGVja3N1bSBhbmQgZmFjaWxpdHkgY2hlY2tzdW1zIGRvIG5vdCBtYXRjaCwgaWYgdGhhdCdzCiAgICAjIHRoZSBjYXNlIHRoZW4gaXQgcHJpbnRzIHRoYXQgdGhleSd2ZSBmYWlsZWQsIHdpdGggdGhlIGZpbGUgbmFtZSwgYW5kIHNhdmVzIHRoZSBmaWxlIG5hbWUgdG8gYSBNRDVNaXNtYXRjaAogICAgIyBmaWxlIGZvciBmdXJ0aGVyIGNvbnNpZGVyYXRpb24uIFRoaXMgZG9lcyBub3Qgc3RvcCB0aGUgbG9vcCBob3dldmVyCiAgICB9IGVsc2UgaWYoZmFjaWxpdHkuTUQ1cyRtZDVbd2hpY2goZmFjaWxpdHkuTUQ1cyRuYW1lID09IGZpbGUuTUQ1cyRuYW1lW2ldKV0gIT0gZmlsZS5NRDVzJG1kNVtpXSkgICB7CiAgICAgIHByaW50KHBhc3RlKCJNRDUgbWlzbWF0Y2ggYmV0d2VlbiBmYWNpbGl0eSBhbmQgY29waWVkIGZpbGUgZm9yIGZpbGUgIiwgZmlsZS5NRDVzJG5hbWVbaV0pKQogICAgICBzeXN0ZW0ocGFzdGUwKCJlY2hvICIsIGZpbGUuTUQ1cyRuYW1lW2ldLCAiID4+IE1ENU1pc21hdGNoLnR4dCIpKQogICAgfQogIH0gIApzZXR3ZChkYXRhRGlyKQpzeXN0ZW0oInJtIGNoa3N1bTIudHh0IiwgaW50ZXJuID0gRkFMU0UpCn0KCmBgYAoKV2VsbCwgdGhhdCdzIGEgYnVtbWVyLiBJdCBsb29rcyBsaWtlIHRoZXJlIGFyZSBzb21lIHBlcm1pc3Npb24gaXNzdWVzIHdpdGggdGhlIG5pZ2h0aW5nYWxlcyBkaXJlY3Rvcnkgb24gT3dsLiBBdCBsZWFzdCBpdCBhcHBlYXJzIHRoZSBzY3JpcHQgd29ya2VkIQo=