Prepare packages and data
Read data
Data are read from a folder with provided final report files and they are listed here
[1] "C:/Users/jsteibel/OneDrive/Documents/Berkshire/ABA/analyze"
[1] "American_Berkshire_Assn_GGPPORHDAFFY_20230822_FinalReport.txt"
[2] "American_Berkshire_Assn_GGPPORHDAFFY_20231011_FinalReport.txt"
[3] "American_Berkshire_Assn_GGPPORHDAFFY_20231012_FinalReport.txt"
[4] "American_Berkshire_Assn_GGPPORHDAFFY_20231019_FinalReport.txt"
$American_Berkshire_Assn_GGPPORHDAFFY_20230822_FinalReport.txt
[1] "00-1" "00-2" "1--1" "1--2" "102-9" "104-5" "105-6" "105-7" "106-1"
[10] "110-5" "14-2" "18-5" "2--2" "20-3" "21-4" "23-1" "23-3" "25--1"
[19] "3--50" "3--6" "32-2" "33-4" "34-1" "35-8" "36-8" "36-9" "37-3"
[28] "40-4" "54-3" "6--3" "6--7" "66-2" "7--3" "9--4" "9--9" "94-4"
$American_Berkshire_Assn_GGPPORHDAFFY_20231011_FinalReport.txt
[1] "42721048407" "31318044113" "10521036832" "10521044861" "21021065339"
[6] "GS513559" "10521044731" "10521044498" "GS513558" "52919014981"
[11] "GS513537" "GS513581" "GS513579" "GS513560" "12422056984"
[16] "72214038838" "GS513575" "GS517680" "GS513574" "GS512903"
[21] "GS517683" "GS512867" "GS519661" "GS519640" "GS512894"
[26] "173655D" "GS513562" "GS519664" "72214026632" "GS500917"
[31] "GS513555" "12422056997" "GS513416" "GS501329" "GS513485"
[36] "GS513484" "GS513483" "GS513486" "GS532083" "GS513564"
[41] "GS501391" "12422056970" "GS513421" "GS500921" "GS532074"
[46] "GS513488" "GS532080" "GS523792" "GS532075" "GS532078"
[51] "GS513568" "GS517679" "GS513541" "GS519639" "GS532079"
[56] "GS513487" "GS532073" "GS532077" "GS532076" "GS513482"
[61] "GS532081" "GS532082" "GS532084" "GS532085" "GS513478"
[66] "GS513479" "GS513477" "GS513476" "GS513475" "GS513420"
[71] "GS512896" "31815028661" "GS501330" "GS513023" "42721065142"
[76] "GS512715" "10521038099" "31815028579" "GS513417" "35796002"
[81] "GS517637" "12422023120" "11300952817" "GS513422" "82115084125"
[86] "GS513534" "GS501318" "GS513556" "GS513585" "GS501153"
[91] "GS513525" "GS513418" "GS513557" "GS519642" "GS512899"
[96] "10521014497" "10521038100" "9161325807" "12422056972" "GS513419"
[101] "GS513577" "9161325809" "GS513552" "8161326100" "GS513573"
[106] "GS513536" "GS517677" "GS513563" "72214038759" "102120046574"
[111] "29718" "GS481971" "GS513538" "GS514325" "GS513507"
[116] "31815026907" "31815026931" "GS519669" "GS515603" "GS500916"
[121] "42721065166" "GS517960" "GS513572" "GS512362" "72214038847"
[126] "12422023137" "GS519709" "50603" "GS513580" "GS513569"
[131] "GS513533" "101019016842" "GS512892" "GS513551" "GS513523"
[136] "GS412923" "GS505882" "10521036892" "GS513542" "GS512904"
[141] "43804" "GS517964" "GS513535" "82115084190" "1111226287"
[146] "GS514324" "12422025566" "GS513554" "GS513582" "12422057415"
[151] "42721065180" "GS500920" "22717077188" "GS512847" "GS512897"
[156] "GS513561" "GS517967" "GS517963" "GS513583" "121120012013"
[161] "GS513020" "GS500691" "9161325631" "GS509137" "GS513565"
[166] "GS486533" "GS513570" "GS517945" "72214026673" "12422023375"
[171] "GS500918" "GS507968" "11300921323" "GS513508" "GS513578"
[176] "31815019192" "GS513543" "GS513566" "GS513509" "GS513586"
[181] "GS513539" "GS513567" "6315110233" "GS513571" "GS513540"
[186] "GS513576" "GS513520" "8161241419"
$American_Berkshire_Assn_GGPPORHDAFFY_20231012_FinalReport.txt
[1] "GS513437" "GS513438" "GS513439" "62521089551" "62521089554"
[6] "GS513443" "GS517958" "GS519706" "2219051047" "51927001"
[11] "GS513492" "165049D" "GS512921" "GS513431" "GS513495"
[16] "GS512987" "GS501338" "GS517925" "1111226193" "GS507934"
[21] "GS513489" "GS513493" "GS513444" "173659D" "GS513021"
[26] "GS513025" "90514015203" "31318044376" "GS500913" "GS513099"
[31] "GS501331" "171504D" "61165001" "GS501332" "6315110266"
[36] "101019016841" "GS513480" "82321065367" "135667D" "GS501277"
[41] "GS517917" "GS512363" "GS501275" "16268" "GS513434"
[46] "GS513432" "GS513430" "GS517926" "GS513447" "12422056962"
[51] "GS513445" "173661D" "GS513425" "GS500457" "GS512886"
[56] "GS515606" "GS520794" "GS513448" "GS513440" "1213001"
[61] "GS513435" "GS507933" "GS513428" "GS501273" "GS501385"
[66] "GS513426" "GS513423" "165052D" "GS520665" "GS513442"
[71] "GS513429" "GS513441" "GS501274" "GS507973" "GS517928"
[76] "GS512361" "GS513424" "GS513494" "GS513427" "165050D"
[81] "GS512905" "GS507987" "GS501413" "GS513491" "GS513024"
[86] "GS520890" "GS517784" "GS512999" "GS501327" "GS501387"
[91] "171512D" "GS513490" "GS517969" "GS513433" "GS501276"
[96] "GS512997" "509855" "GS517634" "148117001" "22717077633"
[101] "GS512996" "GS475584" "42721064989" "52919014982" "GS513446"
[106] "GS513019" "GS512882" "GS513497" "GS501278" "GS513498"
[111] "GS513022" "173660D" "31318044392"
$American_Berkshire_Assn_GGPPORHDAFFY_20231019_FinalReport.txt
[1] "GS513513" "GS513521" "GS513528" "GS513524" "8161200483"
[6] "GS513587" "GS513546" "GS513526" "GS532227" "GS513549"
[11] "GS513548" "GS513510" "GS513522" "GS513584" "9161325801"
[16] "GS513511" "GS513514" "GS513550" "GS513502" "GS513503"
[21] "GS513512" "GS513496" "GS513518" "GS513504" "GS513519"
[26] "GS513530" "GS513547" "GS513500" "GS513532" "GS513499"
[31] "GS513531" "GS513515" "GS513544" "GS513545" "GS513501"
[36] "GS513553" "GS513529" "GS513505" "GS513517" "GS513506"
Compute allele frequencies for this population and compare to another (publicly available) reference panel
10000235 10000345 10004575 10007074 10007097 10007113
0.3556150 0.1818182 0.9518950 0.5642633 0.3480000 0.9369973
After matching snp, let’s explore allele frequencies
library (ggplot2)
library (GGally)
ggpairs (GWBC_spec)
The correlation of Berks allele frequencies is highest with Hampshire and lowest with Duroc.
This means that the hardest breed to separate will be Hampshire. However, based on my experience, this correlation is weak enough to be able to separate breeds well.
Specifically: a similar correlation between yorks and landraces is no problem to separate those breeds.
Next: we need crossbred genotypes to test this procedure [not included here]
Apply breed composition algorithm
Test on all samples for now
visualize and summarize some results
proportion of all samples with > 90% estimated Berk contribution
Samples with < 90% estimated Berk contribution
[1] "00-1" "00-2" "104-5" "20-3" "23-3"
[6] "3--6" "32-2" "6--7" "66-2" "94-4"
[11] "GS513575" "GS517683" "GS513416" "GS501329" "GS513485"
[16] "GS513484" "GS513483" "GS513486" "GS513564" "GS532078"
[21] "GS532073" "GS513482" "10521038099" "GS513422" "82115084125"
[26] "GS513525" "GS513557" "GS519642" "GS513419" "8161326100"
[31] "GS517677" "29718" "31815026907" "31815026931" "72214038847"
[36] "GS519709" "12422057415" "42721065180" "GS500920" "GS512897"
[41] "GS513561" "GS517963" "GS513570" "12422023375" "11300921323"
[46] "31815019192" "6315110233" "GS513540" "GS513437" "GS513438"
[51] "GS513439" "51927001" "GS513492" "GS512921" "GS501338"
[56] "GS513444" "GS513099" "GS501332" "82321065367" "135667D"
[61] "GS512363" "GS517926" "173661D" "GS520794" "GS513440"
[66] "GS501274" "GS512361" "165050D" "GS507987" "GS501387"
[71] "GS517969" "GS513433" "GS517634" "GS513497" "173660D"
[76] "GS513528" "8161200483" "GS513587" "GS532227" "GS513496"
[81] "GS513547" "GS513515" "GS513505"
proportion of all samples with < 50% estimated Berk contribution
proportion of all samples with < 75% estimated Berk contribution
[1] "00-1" "00-2" "GS513416" "GS517677" "12422057415"
[6] "31815019192" "GS501338" "GS513099" "GS512363" "GS520794"
Duroc Hampshire Landrace Yorkshire Berk R2
00-1 0.0082 0.2692 0.0000 0.5639 0.1587 0.3402
00-2 0.0076 0.2694 0.0000 0.5652 0.1578 0.3396
GS513416 0.0000 0.1572 0.0097 0.2035 0.6296 0.4096
GS517677 0.0000 0.1030 0.0069 0.2809 0.6091 0.4486
12422057415 0.0000 0.0449 0.0000 0.2306 0.7245 0.4631
31815019192 0.0806 0.0946 0.0738 0.0080 0.7429 0.3998
GS501338 0.0041 0.1845 0.0069 0.2868 0.5178 0.4371
GS513099 0.0747 0.0714 0.0799 0.0268 0.7473 0.4104
GS512363 0.0000 0.1111 0.0000 0.2475 0.6414 0.4780
GS520794 0.0000 0.1018 0.0000 0.2274 0.6708 0.4531
[1] "C:/Users/jsteibel/OneDrive/Documents/Berkshire/ABA/analyze/output_composition.csv"