Lets bring some data in!
PD <- read_csv("~/Documents/Roberts Lab/Primer Databse/PrimerDatabase - primers.csv")
Missing column names filled in: 'X18' [18], 'X19' [19], 'X20' [20], 'X21' [21], 'X22' [22], 'X23' [23], 'X24' [24], 'X25' [25]Parsed with column specification:
cols(
.default = col_character(),
sr_ID = col_integer(),
`#bp` = col_integer(),
`GC%` = col_double(),
`melting temp` = col_double(),
`IDT #` = col_integer()
)
See spec(...) for full column specifications.
5 parsing failures.
row col expected actual
1365 melting temp a double .
1457 sr_ID no trailing characters .2
1458 sr_ID no trailing characters .1
1461 sr_ID no trailing characters .1
1463 sr_ID no trailing characters .1
primerbox <- read_csv("~/Documents/Roberts Lab/Primer Databse/primerbox.csv")
Parsed with column specification:
cols(
Position = col_character(),
Name = col_character(),
`SR ID` = col_integer(),
`IDT #` = col_integer(),
Organism = col_character(),
Date = col_character(),
Notes = col_character(),
`Box Name` = col_character()
)
Well, that’s annoying. Looks lik there’s some leftover formatting stuff from pulling it down from Sheets.
PD <- PD[25:length(PD$`Primer Sequence`),1:17]
for( i in 1:ncol(PD)) {
for(j in 1:nrow(PD)) {
PD[j,i] <- tolower(PD[j,i])
PD[j,i] <- gsub(" ", "_", PD[j,i])
}
}
for( i in 1:ncol(primerbox)) {
for(j in 1:nrow(primerbox)) {
primerbox[j,i] <- tolower(primerbox[j,i])
primerbox[j,i] <- gsub(" ", "_", primerbox[j,i])
}
}
primerbox <- primerbox[-c(which(is.na(primerbox$Name) == TRUE)),]
Sufficiently sanitized, lets try a “join”
PD.test <- PD
PB.test <- primerbox
leftover = PB.test[1,]
counter = 1
for(i in 1:nrow(PB.test)) {
for(j in 1:nrow(PD.test)) {
if((!is.na(PD.test$sr_ID[j]) && !is.na(PB.test$`SR ID`[i])) && PD.test$sr_ID[j] == PB.test$`SR ID`[i]) {
PD.test[j,18] <- PB.test$`Box Name`[i]
PD.test[j,19] <- PB.test$Position[i]
PD.test[j,20] <- PB.test$`IDT #`[i]
PD.test[j,21] <- PB.test$Date[i]
PD.test[j,22] <- PB.test$Notes[i]
} else if(PD.test$`Primer name`[j] == PB.test$Name[i]) {
PD.test[j,18] <- PB.test$`Box Name`[i]
PD.test[j,19] <- PB.test$Position[i]
PD.test[j,20] <- PB.test$`IDT #`[i]
PD.test[j,21] <- PB.test$Date[i]
PD.test[j,22] <- PB.test$Notes[i]
}
}
}
colnames(PD.test)[18:22] <- c("Box_Name", "Box_Position", "IDT #", "Date", "Box_Notes")
The code below combines the two IDT # columns, setting column 17 to be the “master” column, but retains any values in column 20 that do not match.
for(i in 1:nrow(PD.test)) {
if(!is.na(as.numeric(PD.test[i,17])) == TRUE && !is.na((as.numeric(PD.test[i,17]) != as.numeric(PD.test[i,20]))) && (as.numeric(PD.test[i,17]) == as.numeric(PD.test[i,20])) == TRUE ) {
PD.test[i,20] <- NA
}
}
NAs introduced by coercion
for(i in 1:nrow(PD.test)) {
if(!is.na(as.numeric(PD.test[i,20])) == TRUE && (is.na(PD.test[i,17]) == TRUE)) {
PD.test[i, 17] <- PD.test[i,20]
PD.test[i,20] <- NA
}
}
#write.csv(PD.test, "Combined_primer_database.csv")