ABA genotype analyses

breed composition

Author

Juan Steibel

Published

January 1, 2026

Setup Code

Prepare packages and data

Read data

Data are read from a folder with provided final report files and they are listed here

[1] "C:/Users/jsteibel/OneDrive/Documents/Berkshire/ABA/analyze"
[1] "File names"
[1] "----------"
[1] "American_Berkshire_Assn_GGPPORHDAFFY_20230822_FinalReport.txt"
[2] "American_Berkshire_Assn_GGPPORHDAFFY_20231011_FinalReport.txt"
[3] "American_Berkshire_Assn_GGPPORHDAFFY_20231012_FinalReport.txt"
[4] "American_Berkshire_Assn_GGPPORHDAFFY_20231019_FinalReport.txt"
[1] "----------"
[1] "sample names"
[1] "----------"
$American_Berkshire_Assn_GGPPORHDAFFY_20230822_FinalReport.txt
 [1] "00-1"  "00-2"  "1--1"  "1--2"  "102-9" "104-5" "105-6" "105-7" "106-1"
[10] "110-5" "14-2"  "18-5"  "2--2"  "20-3"  "21-4"  "23-1"  "23-3"  "25--1"
[19] "3--50" "3--6"  "32-2"  "33-4"  "34-1"  "35-8"  "36-8"  "36-9"  "37-3" 
[28] "40-4"  "54-3"  "6--3"  "6--7"  "66-2"  "7--3"  "9--4"  "9--9"  "94-4" 

$American_Berkshire_Assn_GGPPORHDAFFY_20231011_FinalReport.txt
  [1] "42721048407"  "31318044113"  "10521036832"  "10521044861"  "21021065339" 
  [6] "GS513559"     "10521044731"  "10521044498"  "GS513558"     "52919014981" 
 [11] "GS513537"     "GS513581"     "GS513579"     "GS513560"     "12422056984" 
 [16] "72214038838"  "GS513575"     "GS517680"     "GS513574"     "GS512903"    
 [21] "GS517683"     "GS512867"     "GS519661"     "GS519640"     "GS512894"    
 [26] "173655D"      "GS513562"     "GS519664"     "72214026632"  "GS500917"    
 [31] "GS513555"     "12422056997"  "GS513416"     "GS501329"     "GS513485"    
 [36] "GS513484"     "GS513483"     "GS513486"     "GS532083"     "GS513564"    
 [41] "GS501391"     "12422056970"  "GS513421"     "GS500921"     "GS532074"    
 [46] "GS513488"     "GS532080"     "GS523792"     "GS532075"     "GS532078"    
 [51] "GS513568"     "GS517679"     "GS513541"     "GS519639"     "GS532079"    
 [56] "GS513487"     "GS532073"     "GS532077"     "GS532076"     "GS513482"    
 [61] "GS532081"     "GS532082"     "GS532084"     "GS532085"     "GS513478"    
 [66] "GS513479"     "GS513477"     "GS513476"     "GS513475"     "GS513420"    
 [71] "GS512896"     "31815028661"  "GS501330"     "GS513023"     "42721065142" 
 [76] "GS512715"     "10521038099"  "31815028579"  "GS513417"     "35796002"    
 [81] "GS517637"     "12422023120"  "11300952817"  "GS513422"     "82115084125" 
 [86] "GS513534"     "GS501318"     "GS513556"     "GS513585"     "GS501153"    
 [91] "GS513525"     "GS513418"     "GS513557"     "GS519642"     "GS512899"    
 [96] "10521014497"  "10521038100"  "9161325807"   "12422056972"  "GS513419"    
[101] "GS513577"     "9161325809"   "GS513552"     "8161326100"   "GS513573"    
[106] "GS513536"     "GS517677"     "GS513563"     "72214038759"  "102120046574"
[111] "29718"        "GS481971"     "GS513538"     "GS514325"     "GS513507"    
[116] "31815026907"  "31815026931"  "GS519669"     "GS515603"     "GS500916"    
[121] "42721065166"  "GS517960"     "GS513572"     "GS512362"     "72214038847" 
[126] "12422023137"  "GS519709"     "50603"        "GS513580"     "GS513569"    
[131] "GS513533"     "101019016842" "GS512892"     "GS513551"     "GS513523"    
[136] "GS412923"     "GS505882"     "10521036892"  "GS513542"     "GS512904"    
[141] "43804"        "GS517964"     "GS513535"     "82115084190"  "1111226287"  
[146] "GS514324"     "12422025566"  "GS513554"     "GS513582"     "12422057415" 
[151] "42721065180"  "GS500920"     "22717077188"  "GS512847"     "GS512897"    
[156] "GS513561"     "GS517967"     "GS517963"     "GS513583"     "121120012013"
[161] "GS513020"     "GS500691"     "9161325631"   "GS509137"     "GS513565"    
[166] "GS486533"     "GS513570"     "GS517945"     "72214026673"  "12422023375" 
[171] "GS500918"     "GS507968"     "11300921323"  "GS513508"     "GS513578"    
[176] "31815019192"  "GS513543"     "GS513566"     "GS513509"     "GS513586"    
[181] "GS513539"     "GS513567"     "6315110233"   "GS513571"     "GS513540"    
[186] "GS513576"     "GS513520"     "8161241419"  

$American_Berkshire_Assn_GGPPORHDAFFY_20231012_FinalReport.txt
  [1] "GS513437"     "GS513438"     "GS513439"     "62521089551"  "62521089554" 
  [6] "GS513443"     "GS517958"     "GS519706"     "2219051047"   "51927001"    
 [11] "GS513492"     "165049D"      "GS512921"     "GS513431"     "GS513495"    
 [16] "GS512987"     "GS501338"     "GS517925"     "1111226193"   "GS507934"    
 [21] "GS513489"     "GS513493"     "GS513444"     "173659D"      "GS513021"    
 [26] "GS513025"     "90514015203"  "31318044376"  "GS500913"     "GS513099"    
 [31] "GS501331"     "171504D"      "61165001"     "GS501332"     "6315110266"  
 [36] "101019016841" "GS513480"     "82321065367"  "135667D"      "GS501277"    
 [41] "GS517917"     "GS512363"     "GS501275"     "16268"        "GS513434"    
 [46] "GS513432"     "GS513430"     "GS517926"     "GS513447"     "12422056962" 
 [51] "GS513445"     "173661D"      "GS513425"     "GS500457"     "GS512886"    
 [56] "GS515606"     "GS520794"     "GS513448"     "GS513440"     "1213001"     
 [61] "GS513435"     "GS507933"     "GS513428"     "GS501273"     "GS501385"    
 [66] "GS513426"     "GS513423"     "165052D"      "GS520665"     "GS513442"    
 [71] "GS513429"     "GS513441"     "GS501274"     "GS507973"     "GS517928"    
 [76] "GS512361"     "GS513424"     "GS513494"     "GS513427"     "165050D"     
 [81] "GS512905"     "GS507987"     "GS501413"     "GS513491"     "GS513024"    
 [86] "GS520890"     "GS517784"     "GS512999"     "GS501327"     "GS501387"    
 [91] "171512D"      "GS513490"     "GS517969"     "GS513433"     "GS501276"    
 [96] "GS512997"     "509855"       "GS517634"     "148117001"    "22717077633" 
[101] "GS512996"     "GS475584"     "42721064989"  "52919014982"  "GS513446"    
[106] "GS513019"     "GS512882"     "GS513497"     "GS501278"     "GS513498"    
[111] "GS513022"     "173660D"      "31318044392" 

$American_Berkshire_Assn_GGPPORHDAFFY_20231019_FinalReport.txt
 [1] "GS513513"   "GS513521"   "GS513528"   "GS513524"   "8161200483"
 [6] "GS513587"   "GS513546"   "GS513526"   "GS532227"   "GS513549"  
[11] "GS513548"   "GS513510"   "GS513522"   "GS513584"   "9161325801"
[16] "GS513511"   "GS513514"   "GS513550"   "GS513502"   "GS513503"  
[21] "GS513512"   "GS513496"   "GS513518"   "GS513504"   "GS513519"  
[26] "GS513530"   "GS513547"   "GS513500"   "GS513532"   "GS513499"  
[31] "GS513531"   "GS513515"   "GS513544"   "GS513545"   "GS513501"  
[36] "GS513553"   "GS513529"   "GS513505"   "GS513517"   "GS513506"  
[1] "----------"

Compute allele frequencies for this population and compare to another (publicly available) reference panel

 10000235  10000345  10004575  10007074  10007097  10007113 
0.3556150 0.1818182 0.9518950 0.5642633 0.3480000 0.9369973 

After matching snp, let’s explore allele frequencies

library(ggplot2)
library(GGally)
ggpairs(GWBC_spec)

The correlation of Berks allele frequencies is highest with Hampshire and lowest with Duroc.

This means that the hardest breed to separate will be Hampshire. However, based on my experience, this correlation is weak enough to be able to separate breeds well.

Specifically: a similar correlation between yorks and landraces is no problem to separate those breeds.

Next: we need crossbred genotypes to test this procedure [not included here]

Apply breed composition algorithm

Test on all samples for now

[1]  377 8791

visualize and summarize some results

proportion of all samples with > 90% estimated Berk contribution
[1] 0.7798408
Samples with < 90% estimated Berk contribution
 [1] "00-1"        "00-2"        "104-5"       "20-3"        "23-3"       
 [6] "3--6"        "32-2"        "6--7"        "66-2"        "94-4"       
[11] "GS513575"    "GS517683"    "GS513416"    "GS501329"    "GS513485"   
[16] "GS513484"    "GS513483"    "GS513486"    "GS513564"    "GS532078"   
[21] "GS532073"    "GS513482"    "10521038099" "GS513422"    "82115084125"
[26] "GS513525"    "GS513557"    "GS519642"    "GS513419"    "8161326100" 
[31] "GS517677"    "29718"       "31815026907" "31815026931" "72214038847"
[36] "GS519709"    "12422057415" "42721065180" "GS500920"    "GS512897"   
[41] "GS513561"    "GS517963"    "GS513570"    "12422023375" "11300921323"
[46] "31815019192" "6315110233"  "GS513540"    "GS513437"    "GS513438"   
[51] "GS513439"    "51927001"    "GS513492"    "GS512921"    "GS501338"   
[56] "GS513444"    "GS513099"    "GS501332"    "82321065367" "135667D"    
[61] "GS512363"    "GS517926"    "173661D"     "GS520794"    "GS513440"   
[66] "GS501274"    "GS512361"    "165050D"     "GS507987"    "GS501387"   
[71] "GS517969"    "GS513433"    "GS517634"    "GS513497"    "173660D"    
[76] "GS513528"    "8161200483"  "GS513587"    "GS532227"    "GS513496"   
[81] "GS513547"    "GS513515"    "GS513505"   
proportion of all samples with < 50% estimated Berk contribution
[1] 0.00530504
[1] "00-1" "00-2"
proportion of all samples with < 75% estimated Berk contribution
[1] 0.0265252
 [1] "00-1"        "00-2"        "GS513416"    "GS517677"    "12422057415"
 [6] "31815019192" "GS501338"    "GS513099"    "GS512363"    "GS520794"   
             Duroc Hampshire Landrace Yorkshire   Berk     R2
00-1        0.0082    0.2692   0.0000    0.5639 0.1587 0.3402
00-2        0.0076    0.2694   0.0000    0.5652 0.1578 0.3396
GS513416    0.0000    0.1572   0.0097    0.2035 0.6296 0.4096
GS517677    0.0000    0.1030   0.0069    0.2809 0.6091 0.4486
12422057415 0.0000    0.0449   0.0000    0.2306 0.7245 0.4631
31815019192 0.0806    0.0946   0.0738    0.0080 0.7429 0.3998
GS501338    0.0041    0.1845   0.0069    0.2868 0.5178 0.4371
GS513099    0.0747    0.0714   0.0799    0.0268 0.7473 0.4104
GS512363    0.0000    0.1111   0.0000    0.2475 0.6414 0.4780
GS520794    0.0000    0.1018   0.0000    0.2274 0.6708 0.4531
results saved to:
[1] "C:/Users/jsteibel/OneDrive/Documents/Berkshire/ABA/analyze/output_composition.csv"