library(mappoly)
Load file
file.name <- system.file("extdata/potato_example.csv", package = "mappoly")
dat <- read_geno_csv(file.in = file.name, ploidy = 4)
## Reading the following data:
## Ploidy level: 4
## No. individuals: 153
## No. markers: 2225
## No. informative markers: 2225 (100%)
## ...
## Done with reading.
## Filtering non-conforming markers.
## ...
## Done with filtering.
print(dat, detailed = T)
## This is an object of class 'mappoly.data'
## Ploidy level: 4
## No. individuals: 153
## No. markers: 2225
## Missing data under prob. threshold: 1.07%
##
## ----------
## No. markers per chromosome:
## seq No.mrk
## 1 258
## 10 114
## 11 150
## 12 122
## 2 214
## 3 197
## 4 207
## 5 168
## 6 202
## 7 221
## 8 179
## 9 179
## ----------
## Markers with no chromosome information: 14
## ----------
## No. of markers per dosage combination in both parents:
## P1 P2 freq
## 0 1 60
## 0 2 24
## 1 0 213
## 1 1 195
## 1 2 140
## 1 3 52
## 2 0 51
## 2 1 153
## 2 2 212
## 2 3 203
## 2 4 90
## 3 1 62
## 3 2 152
## 3 3 252
## 3 4 249
## 4 2 27
## 4 3 90
plot(dat)

Data quality control
dat <- filter_individuals(dat, ind.to.remove = c("B2721.026", "B2721.078"))
## Initial data:
## Number of Individuals: 155
## Number of Markers: 2225
##
## Missing data check:
## Total SNPs: 2225
## 0 SNPs dropped due to missing data threshold of 1
## Total of: 2225 SNPs
## MAF check:
## No SNPs with MAF below 0
## Monomorphic check:
## No monomorphic SNPs
## Summary check:
## Initial: 2225 SNPs
## Final: 2225 SNPs ( 0 SNPs removed)
##
## Completed! Time = 0.688 seconds

dat <- filter_missing(dat, type = "marker", filter.thres = .05)
dat <- filter_missing(dat, type = "individual", filter.thres = .05)
seq.filt <- filter_segregation(dat, chisq.pval.thres = 0.05/dat$n.mrk)
seq.filt <- make_seq_mappoly(seq.filt)
seq.red <- elim_redundant(seq.filt)
seq.init <- make_seq_mappoly(seq.red)
plot(seq.init)

Genomic order
go <- get_genomic_order(input.seq = seq.init) ## get genomic order of the sequence
plot(go)

Two-points for the whole dataset
ncores <- parallel::detectCores() - 1
tpt <- est_pairwise_rf2(seq.init, ncpus = ncores)
## Going RcppParallel mode.
## ...
## Done with pairwise computation.
## ~~~~~~~
## Reorganizing results
## ...
m <- rf_list_to_matrix(tpt) ## converts rec. frac. list into a matrix
sgo <- make_seq_mappoly(go) ## creates a sequence of markers in the genome order
plot(m, ord = sgo, fact = 5) ## plots a rec. frac. matrix using the genome order, averaging neighbor cells in a 5 x 5 grid

Group
g <- group_mappoly(m, expected.groups = 12, comp.mat = TRUE)
plot(g)

g
## This is an object of class 'mappoly.group'
## ------------------------------------------
## Criteria used to assign markers to groups:
##
## - Number of markers: 1988
## - Number of linkage groups: 12
## - Number of markers per linkage groups:
## group n.mrk
## 1 118
## 2 159
## 3 191
## 4 166
## 5 197
## 6 180
## 7 133
## 8 197
## 9 222
## 10 105
## 11 168
## 12 152
## ------------------------------------------
## 10 9 4 5 2 3 11 7 1 12 6 8 NoChr
## 1 88 0 3 5 7 5 3 1 2 2 0 1 1
## 2 0 147 1 0 2 0 1 2 2 0 1 2 1
## 3 0 1 166 3 0 3 3 2 2 0 4 2 5
## 4 1 1 4 140 3 2 4 3 5 0 2 1 0
## 5 1 1 3 1 174 5 0 1 2 0 0 8 1
## 6 0 4 0 2 6 155 1 1 6 0 3 2 0
## 7 6 2 1 1 0 0 118 0 2 1 1 1 0
## 8 1 3 2 1 0 2 1 179 2 3 1 1 1
## 9 1 1 2 5 2 0 0 6 204 0 0 1 0
## 10 0 1 2 0 0 1 0 1 1 98 0 0 1
## 11 2 0 0 0 1 1 0 0 0 1 160 2 1
## 12 2 0 0 1 1 1 0 0 1 1 2 142 1
## ------------------------------------------
Ordering and Phasing
id <- as.numeric(colnames(g$seq.vs.grouped.snp)[-13])
MAPs <- vector("list", 12)
for(i in id){
s <- make_seq_mappoly(g, i, genomic.info = 1)
tpt <- est_pairwise_rf(s, ncpus = ncores)
gen.o <- get_genomic_order(s) ## Using genomic order
s.gen <- make_seq_mappoly(gen.o)
MAPs[[i]] <- est_rf_hmm_sequential(s.gen,twopt = tpt,
sub.map.size.diff.limit = 5,
extend.tail = 200)
MAPs[[i]]$info$chrom <- rep(paste0("ST4.03ch", stringr::str_pad(i, 2, pad = "0")),
MAPs[[i]]$info$n.mrk)
}
plot_map_list(MAPs, horiz = FALSE, col = "ggstyle")

Updating map distances
MAPs.up <- vector("list", 12)
for(i in 1:12)
MAPs.up[[i]] <- est_full_hmm_with_global_error(MAPs[[i]], error = 0.05)
plot_map_list(MAPs.up, horiz = FALSE, col = "ggstyle")

Plot map vs. genome
plot_genome_vs_map(MAPs.up, same.ch.lg = TRUE)

Map summary and export map to CSV
summary_maps(MAPs.up)
## LG Genomic sequence Map length (cM) Markers/cM Simplex Double-simplex
## 1 1 ST4.03ch01 67.55 1.24 15 14
## 2 2 ST4.03ch02 102.9 1.43 32 42
## 3 3 ST4.03ch03 98.72 1.67 46 32
## 4 4 ST4.03ch04 74.72 1.83 63 24
## 5 5 ST4.03ch05 79.07 2.2 54 52
## 6 6 ST4.03ch06 77.6 1.96 58 8
## 7 7 ST4.03ch07 73.6 1.6 30 21
## 8 8 ST4.03ch08 81.22 2.18 55 40
## 9 9 ST4.03ch09 123.15 1.62 43 47
## 10 10 ST4.03ch10 76.82 1.24 44 16
## 11 11 ST4.03ch11 82.68 1.91 30 29
## 12 12 ST4.03ch12 81.51 1.74 28 47
## 13 Total <NA> 1019.54 1.72 498 372
## Multiplex Total Max gap
## 1 55 84 7.43
## 2 73 147 10.78
## 3 87 165 6.17
## 4 50 137 5.42
## 5 68 174 5.36
## 6 86 152 6.86
## 7 67 118 12.19
## 8 82 177 4.17
## 9 109 199 7.35
## 10 35 95 5.32
## 11 99 158 5.4
## 12 67 142 9.83
## 13 878 1748 7.19
export_map_list(MAPs.up, file = "output_map.csv")
Calculate homolog probability profiles
geno.prob <- lapply(MAPs.up, calc_genoprob_error, error = 0.05)
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
## Ploidy level:4
## Number of individuals:144
## ..................................................
## ..................................................
## ............................................
h <- calc_homologprob(geno.prob)
##
## Linkage group 1 ...
## Linkage group 2 ...
## Linkage group 3 ...
## Linkage group 4 ...
## Linkage group 5 ...
## Linkage group 6 ...
## Linkage group 7 ...
## Linkage group 8 ...
## Linkage group 9 ...
## Linkage group 10 ...
## Linkage group 11 ...
## Linkage group 12 ...
plot(h, lg = 3, ind = 12)
plot(h, lg = c(1,3,5,7), ind = 12)
Preferential pairing profiles
pp <- calc_prefpair_profiles(geno.prob)
##
## Linkage group 1 ...
## Linkage group 2 ...
## Linkage group 3 ...
## Linkage group 4 ...
## Linkage group 5 ...
## Linkage group 6 ...
## Linkage group 7 ...
## Linkage group 8 ...
## Linkage group 9 ...
## Linkage group 10 ...
## Linkage group 11 ...
## Linkage group 12 ...
plot(pp)
