This R Markdown document will show how to read in a .txt file and
create a human readable output.
#Reading in the raw .txt file of the Human Gut Project survey.
biosample<- read.table("biosample_result (1).txt", sep = "\t", header = FALSE, fill = TRUE, quote = "")
head(biosample)
## V1 V2
## 1 1: american gut project; 10317.X00185902
## 2 Identifiers: BioSample: SAMEA112990854; SRA: ERS14985877
## 3 Organism: human gut metagenome
## 4 Attributes:
## 5 /ENA-CHECKLIST ERC000011
## 6 /ENA-FIRST-PUBLIC 4/28/2023
#Cleaning the Survey questions column to remove the"/" used as a separator.
biosample<-biosample %>%
mutate(V1 = sub("/", "", V1))
#Creating headers to differentiate the columns for survey questions and submissions.
headers<- c("Survey", "Submission")
colnames(biosample)<- headers
#Checking the first and last few rows of data to ensure proper formatting.
head(biosample)
## Survey Submission
## 1 1: american gut project; 10317.X00185902
## 2 Identifiers: BioSample: SAMEA112990854; SRA: ERS14985877
## 3 Organism: human gut metagenome
## 4 Attributes:
## 5 ENA-CHECKLIST ERC000011
## 6 ENA-FIRST-PUBLIC 4/28/2023
tail(biosample)
## Survey Submission
## 326 weight_change increased more than 10 pounds
## 327 whole_eggs rarely (less than once/week)
## 328 whole_grain_frequency occasionally (1-2 times/week)
## 329 Description:
## 330 american gut project stool sample
## 331 Accession: SAMEA112990854 ID: 34470136
#Writing the cleaned up survey and submissions as a .csv file for easier reading.
write.csv(biosample, "biosample.csv")