use GWBC_ref_C as reference data

library(breedTools)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data("GWBC_ref_C")
load("~/Documents/rproject/statcourse/additional_ref_geno.RData")

use breedtools

duroc

breed_duroc<-solve_composition(durocMarcGenoDose,GWBC_ref_C, ped = NULL, groups = NULL, mia = FALSE,sire = FALSE, dam = FALSE)
breed_duroc1 = as.data.frame(breed_duroc[,1])
names(breed_duroc1) <- c("ratio")
number=breed_duroc1%>%filter(ratio>=0.95)%>%summarize(n = n())
sum=breed_duroc1%>%summarize(n = n())
# ratio of > 95%
number[1,1]/sum[1,1]
## [1] 0.8409091

hampshire

breed_hampshire<-solve_composition(hampshireMarcGenoDose,GWBC_ref_C, ped = NULL, groups = NULL, mia = FALSE,sire = FALSE, dam = FALSE)
breed_hampshire1 = as.data.frame(breed_hampshire[,2])
names(breed_hampshire1) <- c("ratio")
number=breed_hampshire1%>%filter(ratio>=0.95)%>%summarize(n = n())
sum=breed_hampshire1%>%summarize(n = n())
# ratio of > 95%
number[1,1]/sum[1,1]
## [1] 1

landrace

breed_landrace<-solve_composition(landraceMarcGenoDose,GWBC_ref_C, ped = NULL, groups = NULL, mia = FALSE,sire = FALSE, dam = FALSE)
breed_landrace1 = as.data.frame(breed_landrace[,3])
names(breed_landrace1) <- c("ratio")
number=breed_landrace1%>%filter(ratio>=0.95)%>%summarize(n = n())
sum=breed_landrace1%>%summarize(n = n())
# ratio of > 95%
number[1,1]/sum[1,1]
## [1] 0.6615385

yorkshire

breed_yorkshire<-solve_composition(yorkshireMarcGenoDose,GWBC_ref_C, ped = NULL, groups = NULL, mia = FALSE,sire = FALSE, dam = FALSE)
breed_yorkshire1 = as.data.frame(breed_yorkshire[,4])
names(breed_yorkshire1) <- c("ratio")
number=breed_yorkshire1%>%filter(ratio>=0.95)%>%summarize(n = n())
sum=breed_yorkshire1%>%summarize(n = n())
# ratio of > 95%
number[1,1]/sum[1,1]
## [1] 0.7256637

genetype data

library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ tidyr   1.2.1     ✔ forcats 0.5.2
## ✔ readr   2.1.3     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
data.geno<-read_delim("/Users/xiaohanj/Desktop/博一开学/animalscience/genotypes_Hypor_JP_9010_2012-09-29.dat",delim = " ",col_names = FALSE)
## Rows: 9010 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (2): X2, X3
## dbl (1): X1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(data.geno)
## [1] 9010    3
colnames(data.geno)<-c("Animal_ID","Breed","Genotype")
# split strings
temp <- str_split(data.geno$Genotype, "")

# split matrix
data.split <- matrix(as.numeric(unlist(temp)), 
          ncol=nchar(data.geno[1,3]), 
          nrow=nrow(data.geno),byrow = TRUE)

# combine ID + Line with split genotypes
data.geno<-cbind(data.geno[, 1:2], data.split)
dim(data.geno)
## [1]  9010 45438

breedtools

breed_geno<-data.geno[,-1:-2]
breed_geno2<-breed_geno[1:10,1:5]
# breed_output<-solve_composition(breed_geno2,GWBC_ref_C, ped = NULL, groups = NULL, mia = FALSE,sire = FALSE, dam = FALSE)