Constructing Multi-Family Genetic Maps with MAPpoly2: A Simulation Example

1 Introduction

This document presents a genetic mapping analysis using the mappoly2 package in R.

2 Setup

First, we’ll clear the workspace and load the required packages.

rm(list = ls())
require(mappoly2)

3 Data Preparation

We’ll start by setting up the necessary data for our analysis.

ploidy.vec <- c(4, 2, 4, 2, 4, 6) # Six parents
names(ploidy.vec) <- c("P1", "P2", "P3", "P4", "P5", "P6")

parents.mat <- matrix(c("P1", "P2", 
                        "P1", "P3", 
                        "P2", "P2", 
                        "P3", "P4", 
                        "P4", "P5", 
                        "P5", "P6", 
                        "P5", "P2"),
                      ncol = 2, byrow = TRUE)

n.mrk <- rep(150, length(ploidy.vec)) # Number of markers
alleles <- list(P1 = 0:1, P2 = 0:1, P3 = 0:1, P4 = 0:1, P5 = 0:1, P6 = 0:1)
n.ind <- rep(200, nrow(parents.mat)) # Number of individuals
n.chrom <- 8 # Number of chromosomes
map.length <- round(runif(n.chrom, 60, 100)) # Length of maps

4 Simulation

Simulating multiple crosses for each chromosome.

source(file = "https://raw.githubusercontent.com/mmollina/MAPpoly_vignettes/master/misc_scripts/multiallelic_multiparental_simulation.R")
x <- vector("list", n.chrom)
for (i in 1:n.chrom) {
  cat("chrom: ", i, "\n")
  x[[i]] <- simulate_multiple_crosses(ploidy.vec, parents.mat, n.ind, n.mrk, alleles, map.length[i])
}
#> chrom:  1 
#> chrom:  2 
#> chrom:  3 
#> chrom:  4 
#> chrom:  5 
#> chrom:  6 
#> chrom:  7 
#> chrom:  8

5 Data Conversion

Converting simulation output to mappoly2 format.

source(file = "https://raw.githubusercontent.com/mmollina/MAPpoly_vignettes/master/misc_scripts/mappoly_mp_simulation_to_mappoly2.R")
dir.create(path = "~/temp/")
#> Warning in dir.create(path = "~/temp/"): '/Users/mmollin/temp' already exists
system("rm ~/temp/*")
mappoly_mp_simulation_to_mappoly2(x, file_path = "~/temp")

6 Full-sib map construction

# Rename parents for visualization
pm <- parents.mat
pm[parents.mat == "P1"] <- "Plant_A"
pm[parents.mat == "P2"] <- "Plant_B"
pm[parents.mat == "P3"] <- "Plant_C"
pm[parents.mat == "P4"] <- "Plant_D"
pm[parents.mat == "P5"] <- "Plant_E"
pm[parents.mat == "P6"] <- "Plant_F"

# Process and visualize the maps for each population
MAPs <- vector("list", length(list))
for (i in 1:nrow(parents.mat)) {
  p1 <- pm[i,1]
  p2 <- pm[i,2]
  fl <- paste0("~/temp/",parents.mat[i,1],"x",parents.mat[i,2],".csv")
  dat <- read_geno_csv(file.in = fl,
                       ploidy.p1 = ploidy.vec[parents.mat[i,1]],
                       ploidy.p2 = ploidy.vec[parents.mat[i,2]],
                       name.p1 = p1, name.p2 = p2)
  dat <- filter_data(dat)
  dat <- pairwise_rf(dat, mrk.scope = "per.chrom")
  dat <- rf_filter(dat, probs = c(0, 1))
  g <- group(x = dat, expected.groups = n.chrom, comp.mat = TRUE, inter = FALSE)
  s <- make_sequence(g)
  s <- order_sequence(s, type = "genome")
  s <- pairwise_phasing(s, type = "genome", parent = "p1p2")
  s <- mapping(s, type = "genome", parent = "p1p2", ncpus = n.chrom)
  s <- calc_haplotypes(s, type = "genome", ncpus = n.chrom)
  
  # Append the processed maps to the MAPs list
  MAPs[[i]] <- s
}
#> Reading the following data:
#>      Ploidy level of Plant_A:  4
#>      Ploidy level of Plant_B:  2
#>      No. individuals:          200
#>      No. markers:              1890
#>      No. informative markers:  1830 (96.8%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 223 phased markers out of 223: (100%)
#>   --> 2 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 216 phased markers out of 216: (100%)
#>   --> 3 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 228 phased markers out of 228: (100%)
#>   --> 4 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 222 phased markers out of 223: (99.6%)
#>   --> 5 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 203 phased markers out of 203: (100%)
#>   --> 6 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 221 phased markers out of 221: (100%)
#>   --> 7 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 231 phased markers out of 231: (100%)
#>   --> 8 
#> Phasing parent Plant_A 
#> Phasing parent Plant_B 
#> 217 phased markers out of 217: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#>      Ploidy level of Plant_A:  4
#>      Ploidy level of Plant_C:  4
#>      No. individuals:          200
#>      No. markers:              2083
#>      No. informative markers:  2083 (100%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 266 phased markers out of 267: (99.6%)
#>   --> 2 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 259 phased markers out of 259: (100%)
#>   --> 3 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 253 phased markers out of 253: (100%)
#>   --> 4 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 257 phased markers out of 257: (100%)
#>   --> 5 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 260 phased markers out of 260: (100%)
#>   --> 6 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 252 phased markers out of 252: (100%)
#>   --> 7 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 255 phased markers out of 256: (99.6%)
#>   --> 8 
#> Phasing parent Plant_A 
#> Phasing parent Plant_C 
#> 254 phased markers out of 256: (99.2%)
#> Multi-locus map estimation
#> Reading the following data:
#>      Ploidy level of Plant_B:  2
#>      Ploidy level of Plant_B:  2
#>      No. individuals:          200
#>      No. markers:              1834
#>      No. informative markers:  1834 (100%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 177 phased markers out of 177: (100%)
#>   --> 2 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 163 phased markers out of 163: (100%)
#>   --> 3 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 178 phased markers out of 178: (100%)
#>   --> 4 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 163 phased markers out of 163: (100%)
#>   --> 5 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 145 phased markers out of 145: (100%)
#>   --> 6 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 158 phased markers out of 158: (100%)
#>   --> 7 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 176 phased markers out of 176: (100%)
#>   --> 8 
#> Phasing parent Plant_B 
#> Phasing parent Plant_B 
#> 175 phased markers out of 175: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#>      Ploidy level of Plant_C:  4
#>      Ploidy level of Plant_D:  2
#>      No. individuals:          200
#>      No. markers:              1877
#>      No. informative markers:  1825 (97.2%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 220 phased markers out of 220: (100%)
#>   --> 2 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 228 phased markers out of 228: (100%)
#>   --> 3 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 225 phased markers out of 225: (100%)
#>   --> 4 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 226 phased markers out of 226: (100%)
#>   --> 5 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 207 phased markers out of 208: (99.5%)
#>   --> 6 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 213 phased markers out of 213: (100%)
#>   --> 7 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 213 phased markers out of 213: (100%)
#>   --> 8 
#> Phasing parent Plant_C 
#> Phasing parent Plant_D 
#> 218 phased markers out of 218: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#>      Ploidy level of Plant_D:  2
#>      Ploidy level of Plant_E:  4
#>      No. individuals:          200
#>      No. markers:              1894
#>      No. informative markers:  1838 (97%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 223 phased markers out of 223: (100%)
#>   --> 2 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 222 phased markers out of 222: (100%)
#>   --> 3 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 237 phased markers out of 237: (100%)
#>   --> 4 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 222 phased markers out of 222: (100%)
#>   --> 5 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 214 phased markers out of 214: (100%)
#>   --> 6 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 221 phased markers out of 221: (100%)
#>   --> 7 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 225 phased markers out of 225: (100%)
#>   --> 8 
#> Phasing parent Plant_D 
#> Phasing parent Plant_E 
#> 220 phased markers out of 220: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#>      Ploidy level of Plant_E:  4
#>      Ploidy level of Plant_F:  4
#>      No. individuals:          200
#>      No. markers:              2081
#>      No. informative markers:  2081 (100%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 265 phased markers out of 265: (100%)
#>   --> 2 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 254 phased markers out of 254: (100%)
#>   --> 3 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 257 phased markers out of 260: (98.8%)
#>   --> 4 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 259 phased markers out of 259: (100%)
#>   --> 5 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 251 phased markers out of 254: (98.8%)
#>   --> 6 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 260 phased markers out of 260: (100%)
#>   --> 7 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 251 phased markers out of 252: (99.6%)
#>   --> 8 
#> Phasing parent Plant_E 
#> Phasing parent Plant_F 
#> 258 phased markers out of 258: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#>      Ploidy level of Plant_E:  4
#>      Ploidy level of Plant_B:  2
#>      No. individuals:          200
#>      No. markers:              1886
#>      No. informative markers:  1810 (96%)
#>      ---
#>      This dataset contains chromosome information.
#>      This dataset contains position information.
#>       -->  Filtering non-conforming markers.
#>       -->  Filtering markers with redundant information.
#>      ----------------------------------

#>   --> Chr_1
#>   --> Chr_2
#>   --> Chr_3
#>   --> Chr_4
#>   --> Chr_5
#>   --> Chr_6
#>   --> Chr_7
#>   --> Chr_8

#>   --> 1
#>   --> 2
#>   --> 3
#>   --> 4
#>   --> 5
#>   --> 6
#>   --> 7
#>   --> 8
#>   --> 1 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 207 phased markers out of 207: (100%)
#>   --> 2 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 223 phased markers out of 223: (100%)
#>   --> 3 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 228 phased markers out of 228: (100%)
#>   --> 4 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 211 phased markers out of 211: (100%)
#>   --> 5 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 214 phased markers out of 214: (100%)
#>   --> 6 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 214 phased markers out of 214: (100%)
#>   --> 7 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 224 phased markers out of 224: (100%)
#>   --> 8 
#> Phasing parent Plant_E 
#> Phasing parent Plant_B 
#> 230 phased markers out of 230: (100%)
#> Multi-locus map estimation

7 Visualization of maps and haplotypes

plot_map_list(MAPs[[1]], type = "genome", col = mappoly::mp_pallet3(n.chrom))

plot_haplotypes(MAPs[[1]], type = "genome", lg = 1)

plot_map(MAPs[[1]], type = "genome")

map_summary(MAPs[[1]], type = "genome")
#> ---------- Genome --- Plant_A x Plant_B -------------------------------------------------------------------------- 
#>      LG     Chrom  Map_length_(cM)  Markers/cM  Simplex_P1  Simplex_P2  Double-simplex  Multiplex  Total  Max_gap  
#> ------------------------------------------------------------------------------------------------------------------ 
#>      lg1    1      89               2.506       50          19          67              87         223    4        
#>      lg2    2      68.8             3.14        67          21          42              86         216    2.5      
#>      lg3    3      103.5            2.203       76          17          58              77         228    2.4      
#>      lg4    4      67.7             3.279       60          29          50              83         222    2        
#>      lg5    5      65.7             3.09        57          23          54              69         203    2.6      
#>      lg6    6      79.7             2.773       59          25          60              77         221    2.9      
#>      lg7    7      92.6             2.495       59          27          69              76         231    2.1      
#>      lg8    8      72.1             3.01        56          17          61              83         217    2.1      
#> ------------------------------------------------------------------------------------------------------------------ 
#>      Total         639.1            3           484         178         461             638        1761   4        
#> ------------------------------------------------------------------------------------------------------------------
plot_multi_map(MAPs)

8 Prepare and visualize integrated maps

First, we’ll clear the workspace and load the required packages.

x1 <- prepare_to_integrate(x = MAPs)
plot(x1)

plot_shared_markers(x1)

9 Build consensus map

First, we’ll clear the workspace and load the required packages.

x <- estimate_consensus_map(x1, ncpus = n.chrom)
x
#> Ploidy of founders:              4 2 4 2 4 4 
#> Total No. individuals:           1400 
#> Total No. markers                3387 
#> Haplotype probability computed:  No 
#> 
#> Number of individuals per crosse:
#>          Plant_B  Plant_C  Plant_D  Plant_E  Plant_F  
#> ----------------------------------------------------- 
#> Plant_A  200      200      .        .        .        
#> Plant_B  200      .        .        .        .        
#> Plant_C  .        .        200      .        .        
#> Plant_D  .        .        .        200      .        
#> Plant_E  200      .        .        .        200      
#> ----------------------------------------------------- 
#> 
#> Consensus Map:
#> ---------------
#>      LG   Map_length_.cM.  Markers.cM  Total.mrk  Max_gap  
#> ---------------------------------------------------------- 
#>      lg1  88.2             4.966       438        1.01     
#>      lg2  74.9             5.514       413        1.01     
#>      lg3  106.26           4.065       432        1.01     
#>      lg4  84.44            5.069       428        1.01     
#>      lg5  63.31            6.539       414        1.01     
#>      lg6  86.02            4.941       425        1.01     
#>      lg7  98.51            4.324       426        1.01     
#>      lg8  68.5             6           411        1.3      
#> ----------------------------------------------------------
plot(x)

plot(x, only.consensus = TRUE, col = mappoly::mp_pallet3(n.chrom))

10 Calculate and visualize consensus haplotypes

First, we’ll clear the workspace and load the required packages.

system.time(x <- calc_consensus_haplo(x, ncpus = n.chrom))
#>    user  system elapsed 
#>  48.310  22.818  25.627
plot_consensus_haplo(x, lg = 1, ind = "Ind_P1xP2_3")

plot_consensus_haplo(x, lg = 1, ind = "Ind_P1xP3_1")

plot_consensus_haplo(x, lg = 1, ind = "Ind_P2xP2_7")

plot_consensus_haplo(x, lg = 1, ind = "Ind_P3xP4_3")

plot_consensus_haplo(x, lg = 1, ind = "Ind_P4xP5_4")

plot_consensus_haplo(x, lg = 1, ind = "Ind_P5xP6_6")

plot_consensus_haplo(x, lg = 1, ind = "Ind_P5xP2_1")

save.image("simulation_image.rda")