This file will take Jeff’s latest CDBN experiment data (V1.3) and do the following:
wb <- loadWorkbook("All_experiments_V1.3.1.xlsx")
lst = readWorksheet(wb, sheet = getSheets(wb))
metadata1 <- loadWorkbook("../../CDBN Variety Info/CDBN_Metadata_sequencing_2017-07-25.xlsx")
metadata = readWorksheet(metadata1, sheet = getSheets(metadata1))
lst$GermplasmI added the echo = FALSE parameter to most of the code for this step, so the html file does not show several hundred regular expression substitutions that generated the CDBN_ID’s from the Genotypes. If you want to see these, refer to the .Rmd document.
Here are three punctuation fixes, as examples:
Germplasm$CDBN_ID <- gsub("-","_", Germplasm$CDBN_ID)
Germplasm$CDBN_ID <- gsub(" ","_", Germplasm$CDBN_ID)
Germplasm$CDBN_ID <- gsub("Long's_Peak","Longs_Peak", Germplasm$CDBN_ID)
The table below is empty, which means that there are no remaining CDBN_ID’s in my metadata that are unaccounted for.
metadata %>%
anti_join(Germplasm, by = "CDBN_ID") %>%
select(CDBN_ID)
## [1] CDBN_ID
## <0 rows> (or 0-length row.names)
Germplasm_ahm <- Germplasm %>%
left_join(metadata, by = "CDBN_ID") %>%
select(Genotype:GBS_barcode_length.2, MDP_ID, Flower_color:Stem_thickness_below_cotyledons_width.1, Hue:Width_10..cm., Gene_pool:Market.class, Seed.From.1:Seed.From.2, BEANCAP_ID:Synonym.10)
Germplasm_small <- Germplasm_ahm %>%
select(Genotype, CDBN_ID, Seq_ID, Gene_pool, Race, Market.class)
Phenotypes <- lst$Phenotypes
Phenotypes_ahm <- Phenotypes %>%
left_join(Germplasm_small, by = "Genotype") %>%
select(Genotype, CDBN_ID, Seq_ID, Location_code, Year, Gene_pool, Race, Market.class, everything())
loc_transl_wb <- loadWorkbook("Location_code_translations_w_JW_check.xlsx")
loc_transl = readWorksheet(loc_transl_wb, sheet = getSheets(loc_transl_wb))
Locations <- lst$Locations
loc_transl <- loc_transl %>%
mutate(Location_code_key = coalesce(Location_code, Location_code_v2))
Locations_ahm <- Locations %>%
left_join(loc_transl, by = c("Location_code" = "Location_code_key", "State")) %>%
select(Location_code, Climate_bin, State, Latitude, Longitude, everything()) %>%
mutate_if(is.numeric, funs(na_if(., -99)))
lst$Germplasm, Phenotypes_ahm to lst$Phenotypes, Locations_ahm to lst$Locations, and save a new All_Experiments_V1.3.1.xlsx file.NB: I commented out this code because it only needed to run once to generate the new Excel file.
# createSheet(wb, name = "Germplasm_ahm")
# createSheet(wb, name = "Phenotypes_ahm")
# createSheet(wb, name = "Locations_ahm")
# writeWorksheet(wb, Germplasm_ahm, sheet = "Germplasm_ahm")
# writeWorksheet(wb, Phenotypes_ahm, sheet = "Phenotypes_ahm")
# writeWorksheet(wb, Locations_ahm, sheet = "Locations_ahm")
# saveWorkbook(wb)