read genotypes

How to read genotypes with Hypore format

This is based on code provided by Austin Putz with a minor mod.

library(tidyverse) #it includes readr
setwd("D:/Breed composition hypore")

data.geno<-read_delim("genotypes_Hypor_JP_9010_2012-09-29.dat",delim = " ",col_names = FALSE)
dim(data.geno)

## [1] 9010    3

colnames(data.geno)<-c("Animal_ID","Breed","Genotype")

Now separate genotype columns and tack it back to the file, notice the matrix function used to re-arrange genotype dosages.

# split strings
temp <- str_split(data.geno$Genotype, "")

# split matrix
data.split <- matrix(as.numeric(unlist(temp)), 
          ncol=nchar(data.geno[1,3]), 
          nrow=nrow(data.geno),byrow = TRUE)

# combine ID + Line with split genotypes
data.geno<-cbind(data.geno[, 1:2], data.split)
dim(data.geno)

## [1]  9010 45438

data.geno[1:10,1:5]

##    Animal_ID Breed 1 2 3
## 1   15377830     C 2 0 2
## 2   15381905     C 2 1 1
## 3   15389253     C 2 1 2
## 4   15394976     C 2 2 2
## 5   15404794     C 2 2 2
## 6   15404860     C 2 1 2
## 7   15405068     C 2 1 2
## 8   15426179     C 2 1 2
## 9   15428620     C 2 2 2
## 10  15505425     C 2 0 2