Constructing Multi-Family Genetic Maps with MAPpoly2: A Simulation Example
1 Introduction
This document presents a genetic mapping analysis using the mappoly2 package in R.
2 Setup
First, we’ll clear the workspace and load the required packages.
3 Data Preparation
We’ll start by setting up the necessary data for our analysis.
ploidy.vec <- c(4, 2, 4, 2, 4, 6) # Six parents
names(ploidy.vec) <- c("P1", "P2", "P3", "P4", "P5", "P6")
parents.mat <- matrix(c("P1", "P2",
"P1", "P3",
"P2", "P2",
"P3", "P4",
"P4", "P5",
"P5", "P6",
"P5", "P2"),
ncol = 2, byrow = TRUE)
n.mrk <- rep(150, length(ploidy.vec)) # Number of markers
alleles <- list(P1 = 0:1, P2 = 0:1, P3 = 0:1, P4 = 0:1, P5 = 0:1, P6 = 0:1)
n.ind <- rep(200, nrow(parents.mat)) # Number of individuals
n.chrom <- 8 # Number of chromosomes
map.length <- round(runif(n.chrom, 60, 100)) # Length of maps4 Simulation
Simulating multiple crosses for each chromosome.
source(file = "https://raw.githubusercontent.com/mmollina/MAPpoly_vignettes/master/misc_scripts/multiallelic_multiparental_simulation.R")
x <- vector("list", n.chrom)
for (i in 1:n.chrom) {
cat("chrom: ", i, "\n")
x[[i]] <- simulate_multiple_crosses(ploidy.vec, parents.mat, n.ind, n.mrk, alleles, map.length[i])
}
#> chrom: 1
#> chrom: 2
#> chrom: 3
#> chrom: 4
#> chrom: 5
#> chrom: 6
#> chrom: 7
#> chrom: 85 Data Conversion
Converting simulation output to mappoly2 format.
source(file = "https://raw.githubusercontent.com/mmollina/MAPpoly_vignettes/master/misc_scripts/mappoly_mp_simulation_to_mappoly2.R")
dir.create(path = "~/temp/")
#> Warning in dir.create(path = "~/temp/"): '/Users/mmollin/temp' already exists
system("rm ~/temp/*")
mappoly_mp_simulation_to_mappoly2(x, file_path = "~/temp")6 Full-sib map construction
# Rename parents for visualization
pm <- parents.mat
pm[parents.mat == "P1"] <- "Plant_A"
pm[parents.mat == "P2"] <- "Plant_B"
pm[parents.mat == "P3"] <- "Plant_C"
pm[parents.mat == "P4"] <- "Plant_D"
pm[parents.mat == "P5"] <- "Plant_E"
pm[parents.mat == "P6"] <- "Plant_F"
# Process and visualize the maps for each population
MAPs <- vector("list", length(list))
for (i in 1:nrow(parents.mat)) {
p1 <- pm[i,1]
p2 <- pm[i,2]
fl <- paste0("~/temp/",parents.mat[i,1],"x",parents.mat[i,2],".csv")
dat <- read_geno_csv(file.in = fl,
ploidy.p1 = ploidy.vec[parents.mat[i,1]],
ploidy.p2 = ploidy.vec[parents.mat[i,2]],
name.p1 = p1, name.p2 = p2)
dat <- filter_data(dat)
dat <- pairwise_rf(dat, mrk.scope = "per.chrom")
dat <- rf_filter(dat, probs = c(0, 1))
g <- group(x = dat, expected.groups = n.chrom, comp.mat = TRUE, inter = FALSE)
s <- make_sequence(g)
s <- order_sequence(s, type = "genome")
s <- pairwise_phasing(s, type = "genome", parent = "p1p2")
s <- mapping(s, type = "genome", parent = "p1p2", ncpus = n.chrom)
s <- calc_haplotypes(s, type = "genome", ncpus = n.chrom)
# Append the processed maps to the MAPs list
MAPs[[i]] <- s
}
#> Reading the following data:
#> Ploidy level of Plant_A: 4
#> Ploidy level of Plant_B: 2
#> No. individuals: 200
#> No. markers: 1890
#> No. informative markers: 1830 (96.8%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 223 phased markers out of 223: (100%)
#> --> 2
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 216 phased markers out of 216: (100%)
#> --> 3
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 228 phased markers out of 228: (100%)
#> --> 4
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 222 phased markers out of 223: (99.6%)
#> --> 5
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 203 phased markers out of 203: (100%)
#> --> 6
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 221 phased markers out of 221: (100%)
#> --> 7
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 231 phased markers out of 231: (100%)
#> --> 8
#> Phasing parent Plant_A
#> Phasing parent Plant_B
#> 217 phased markers out of 217: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#> Ploidy level of Plant_A: 4
#> Ploidy level of Plant_C: 4
#> No. individuals: 200
#> No. markers: 2083
#> No. informative markers: 2083 (100%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------
#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 266 phased markers out of 267: (99.6%)
#> --> 2
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 259 phased markers out of 259: (100%)
#> --> 3
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 253 phased markers out of 253: (100%)
#> --> 4
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 257 phased markers out of 257: (100%)
#> --> 5
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 260 phased markers out of 260: (100%)
#> --> 6
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 252 phased markers out of 252: (100%)
#> --> 7
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 255 phased markers out of 256: (99.6%)
#> --> 8
#> Phasing parent Plant_A
#> Phasing parent Plant_C
#> 254 phased markers out of 256: (99.2%)
#> Multi-locus map estimation
#> Reading the following data:
#> Ploidy level of Plant_B: 2
#> Ploidy level of Plant_B: 2
#> No. individuals: 200
#> No. markers: 1834
#> No. informative markers: 1834 (100%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------
#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 177 phased markers out of 177: (100%)
#> --> 2
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 163 phased markers out of 163: (100%)
#> --> 3
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 178 phased markers out of 178: (100%)
#> --> 4
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 163 phased markers out of 163: (100%)
#> --> 5
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 145 phased markers out of 145: (100%)
#> --> 6
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 158 phased markers out of 158: (100%)
#> --> 7
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 176 phased markers out of 176: (100%)
#> --> 8
#> Phasing parent Plant_B
#> Phasing parent Plant_B
#> 175 phased markers out of 175: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#> Ploidy level of Plant_C: 4
#> Ploidy level of Plant_D: 2
#> No. individuals: 200
#> No. markers: 1877
#> No. informative markers: 1825 (97.2%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------
#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 220 phased markers out of 220: (100%)
#> --> 2
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 228 phased markers out of 228: (100%)
#> --> 3
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 225 phased markers out of 225: (100%)
#> --> 4
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 226 phased markers out of 226: (100%)
#> --> 5
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 207 phased markers out of 208: (99.5%)
#> --> 6
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 213 phased markers out of 213: (100%)
#> --> 7
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 213 phased markers out of 213: (100%)
#> --> 8
#> Phasing parent Plant_C
#> Phasing parent Plant_D
#> 218 phased markers out of 218: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#> Ploidy level of Plant_D: 2
#> Ploidy level of Plant_E: 4
#> No. individuals: 200
#> No. markers: 1894
#> No. informative markers: 1838 (97%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------
#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 223 phased markers out of 223: (100%)
#> --> 2
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 222 phased markers out of 222: (100%)
#> --> 3
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 237 phased markers out of 237: (100%)
#> --> 4
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 222 phased markers out of 222: (100%)
#> --> 5
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 214 phased markers out of 214: (100%)
#> --> 6
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 221 phased markers out of 221: (100%)
#> --> 7
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 225 phased markers out of 225: (100%)
#> --> 8
#> Phasing parent Plant_D
#> Phasing parent Plant_E
#> 220 phased markers out of 220: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#> Ploidy level of Plant_E: 4
#> Ploidy level of Plant_F: 4
#> No. individuals: 200
#> No. markers: 2081
#> No. informative markers: 2081 (100%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------
#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 265 phased markers out of 265: (100%)
#> --> 2
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 254 phased markers out of 254: (100%)
#> --> 3
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 257 phased markers out of 260: (98.8%)
#> --> 4
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 259 phased markers out of 259: (100%)
#> --> 5
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 251 phased markers out of 254: (98.8%)
#> --> 6
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 260 phased markers out of 260: (100%)
#> --> 7
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 251 phased markers out of 252: (99.6%)
#> --> 8
#> Phasing parent Plant_E
#> Phasing parent Plant_F
#> 258 phased markers out of 258: (100%)
#> Multi-locus map estimation
#> Reading the following data:
#> Ploidy level of Plant_E: 4
#> Ploidy level of Plant_B: 2
#> No. individuals: 200
#> No. markers: 1886
#> No. informative markers: 1810 (96%)
#> ---
#> This dataset contains chromosome information.
#> This dataset contains position information.
#> --> Filtering non-conforming markers.
#> --> Filtering markers with redundant information.
#> ----------------------------------
#> --> Chr_1
#> --> Chr_2
#> --> Chr_3
#> --> Chr_4
#> --> Chr_5
#> --> Chr_6
#> --> Chr_7
#> --> Chr_8
#> --> 1
#> --> 2
#> --> 3
#> --> 4
#> --> 5
#> --> 6
#> --> 7
#> --> 8
#> --> 1
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 207 phased markers out of 207: (100%)
#> --> 2
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 223 phased markers out of 223: (100%)
#> --> 3
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 228 phased markers out of 228: (100%)
#> --> 4
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 211 phased markers out of 211: (100%)
#> --> 5
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 214 phased markers out of 214: (100%)
#> --> 6
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 214 phased markers out of 214: (100%)
#> --> 7
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 224 phased markers out of 224: (100%)
#> --> 8
#> Phasing parent Plant_E
#> Phasing parent Plant_B
#> 230 phased markers out of 230: (100%)
#> Multi-locus map estimation
7 Visualization of maps and haplotypes
map_summary(MAPs[[1]], type = "genome")
#> ---------- Genome --- Plant_A x Plant_B --------------------------------------------------------------------------
#> LG Chrom Map_length_(cM) Markers/cM Simplex_P1 Simplex_P2 Double-simplex Multiplex Total Max_gap
#> ------------------------------------------------------------------------------------------------------------------
#> lg1 1 89 2.506 50 19 67 87 223 4
#> lg2 2 68.8 3.14 67 21 42 86 216 2.5
#> lg3 3 103.5 2.203 76 17 58 77 228 2.4
#> lg4 4 67.7 3.279 60 29 50 83 222 2
#> lg5 5 65.7 3.09 57 23 54 69 203 2.6
#> lg6 6 79.7 2.773 59 25 60 77 221 2.9
#> lg7 7 92.6 2.495 59 27 69 76 231 2.1
#> lg8 8 72.1 3.01 56 17 61 83 217 2.1
#> ------------------------------------------------------------------------------------------------------------------
#> Total 639.1 3 484 178 461 638 1761 4
#> ------------------------------------------------------------------------------------------------------------------
plot_multi_map(MAPs)8 Prepare and visualize integrated maps
First, we’ll clear the workspace and load the required packages.
9 Build consensus map
First, we’ll clear the workspace and load the required packages.
x <- estimate_consensus_map(x1, ncpus = n.chrom)
x
#> Ploidy of founders: 4 2 4 2 4 4
#> Total No. individuals: 1400
#> Total No. markers 3387
#> Haplotype probability computed: No
#>
#> Number of individuals per crosse:
#> Plant_B Plant_C Plant_D Plant_E Plant_F
#> -----------------------------------------------------
#> Plant_A 200 200 . . .
#> Plant_B 200 . . . .
#> Plant_C . . 200 . .
#> Plant_D . . . 200 .
#> Plant_E 200 . . . 200
#> -----------------------------------------------------
#>
#> Consensus Map:
#> ---------------
#> LG Map_length_.cM. Markers.cM Total.mrk Max_gap
#> ----------------------------------------------------------
#> lg1 88.2 4.966 438 1.01
#> lg2 74.9 5.514 413 1.01
#> lg3 106.26 4.065 432 1.01
#> lg4 84.44 5.069 428 1.01
#> lg5 63.31 6.539 414 1.01
#> lg6 86.02 4.941 425 1.01
#> lg7 98.51 4.324 426 1.01
#> lg8 68.5 6 411 1.3
#> ----------------------------------------------------------
plot(x)