library(mappoly)

Load file

file.name <- system.file("extdata/potato_example.csv", package = "mappoly")
dat <- read_geno_csv(file.in = file.name, ploidy = 4)
## Reading the following data:
##     Ploidy level: 4
##     No. individuals:  153
##     No. markers:  2225
##     No. informative markers:  2225 (100%)
##     ...
##     Done with reading.
##     Filtering non-conforming markers.
##     ...
##     Done with filtering.
print(dat, detailed = T)
## This is an object of class 'mappoly.data'
##     Ploidy level:                            4 
##     No. individuals:                         153 
##     No. markers:                             2225 
##     Missing data under  prob. threshold: 1.07%
## 
##     ----------
##     No. markers per chromosome:
##        seq No.mrk
##          1    258
##         10    114
##         11    150
##         12    122
##          2    214
##          3    197
##          4    207
##          5    168
##          6    202
##          7    221
##          8    179
##          9    179
##     ----------
##     Markers with no chromosome information: 14
##     ----------
##     No. of markers per dosage combination in both parents:
##     P1 P2 freq
##      0  1   60
##      0  2   24
##      1  0  213
##      1  1  195
##      1  2  140
##      1  3   52
##      2  0   51
##      2  1  153
##      2  2  212
##      2  3  203
##      2  4   90
##      3  1   62
##      3  2  152
##      3  3  252
##      3  4  249
##      4  2   27
##      4  3   90
plot(dat)

Data quality control

dat <- filter_individuals(dat, ind.to.remove = c("B2721.026", "B2721.078"))
## Initial data: 
##  Number of Individuals: 155 
##  Number of Markers: 2225 
## 
## Missing data check: 
##  Total SNPs: 2225 
##   0 SNPs dropped due to missing data threshold of 1 
##  Total of: 2225  SNPs 
## MAF check: 
##  No SNPs with MAF below 0 
## Monomorphic check: 
##  No monomorphic SNPs 
## Summary check: 
##  Initial:  2225 SNPs 
##  Final:  2225  SNPs ( 0  SNPs removed) 
##  
## Completed! Time = 0.688  seconds

dat <- filter_missing(dat, type = "marker", filter.thres = .05)
dat <- filter_missing(dat, type = "individual", filter.thres = .05)
seq.filt <- filter_segregation(dat, chisq.pval.thres = 0.05/dat$n.mrk)
seq.filt <- make_seq_mappoly(seq.filt)
seq.red  <- elim_redundant(seq.filt)
seq.init <- make_seq_mappoly(seq.red)
plot(seq.init)

Genomic order

go <- get_genomic_order(input.seq = seq.init) ## get genomic order of the sequence
plot(go)

Two-points for the whole dataset

ncores <- parallel::detectCores() - 1
tpt <- est_pairwise_rf2(seq.init, ncpus = ncores)
## Going RcppParallel mode.
## ...
## Done with pairwise computation.
## ~~~~~~~
## Reorganizing results
## ...
m <- rf_list_to_matrix(tpt) ## converts rec. frac. list into a matrix 
sgo <- make_seq_mappoly(go) ## creates a sequence of markers in the genome order
plot(m, ord = sgo, fact = 5) ## plots a rec. frac. matrix using the genome order, averaging neighbor cells in a 5 x 5 grid 

Group

g <- group_mappoly(m, expected.groups = 12, comp.mat = TRUE)
plot(g)

g
##   This is an object of class 'mappoly.group'
##   ------------------------------------------
##   Criteria used to assign markers to groups:
## 
##     - Number of markers:          1988 
##     - Number of linkage groups:   12 
##     - Number of markers per linkage groups: 
##     group n.mrk
##         1   118
##         2   159
##         3   191
##         4   166
##         5   197
##         6   180
##         7   133
##         8   197
##         9   222
##        10   105
##        11   168
##        12   152
##   ------------------------------------------
##    10   9   4   5   2   3  11   7   1 12   6   8 NoChr
## 1  88   0   3   5   7   5   3   1   2  2   0   1     1
## 2   0 147   1   0   2   0   1   2   2  0   1   2     1
## 3   0   1 166   3   0   3   3   2   2  0   4   2     5
## 4   1   1   4 140   3   2   4   3   5  0   2   1     0
## 5   1   1   3   1 174   5   0   1   2  0   0   8     1
## 6   0   4   0   2   6 155   1   1   6  0   3   2     0
## 7   6   2   1   1   0   0 118   0   2  1   1   1     0
## 8   1   3   2   1   0   2   1 179   2  3   1   1     1
## 9   1   1   2   5   2   0   0   6 204  0   0   1     0
## 10  0   1   2   0   0   1   0   1   1 98   0   0     1
## 11  2   0   0   0   1   1   0   0   0  1 160   2     1
## 12  2   0   0   1   1   1   0   0   1  1   2 142     1
##   ------------------------------------------

Ordering and Phasing

id <- as.numeric(colnames(g$seq.vs.grouped.snp)[-13])
MAPs <- vector("list", 12)
for(i in id){
  s <- make_seq_mappoly(g, i, genomic.info = 1) 
  tpt <- est_pairwise_rf(s, ncpus = ncores)
  gen.o <- get_genomic_order(s) ## Using genomic order
  s.gen <- make_seq_mappoly(gen.o)
  MAPs[[i]] <- est_rf_hmm_sequential(s.gen,twopt = tpt, 
                                     sub.map.size.diff.limit = 5, 
                                     extend.tail = 200)
  MAPs[[i]]$info$chrom <- rep(paste0("ST4.03ch", stringr::str_pad(i, 2, pad = "0")),
                               MAPs[[i]]$info$n.mrk)
}
plot_map_list(MAPs, horiz = FALSE, col = "ggstyle")

Updating map distances

MAPs.up <- vector("list", 12)
for(i in 1:12)
  MAPs.up[[i]] <- est_full_hmm_with_global_error(MAPs[[i]], error = 0.05)
plot_map_list(MAPs.up, horiz = FALSE, col = "ggstyle")

Plot map vs. genome

plot_genome_vs_map(MAPs.up, same.ch.lg = TRUE)

Map summary and export map to CSV

summary_maps(MAPs.up)
##       LG Genomic sequence Map length (cM) Markers/cM Simplex Double-simplex
## 1      1       ST4.03ch01           67.55       1.24      15             14
## 2      2       ST4.03ch02           102.9       1.43      32             42
## 3      3       ST4.03ch03           98.72       1.67      46             32
## 4      4       ST4.03ch04           74.72       1.83      63             24
## 5      5       ST4.03ch05           79.07        2.2      54             52
## 6      6       ST4.03ch06            77.6       1.96      58              8
## 7      7       ST4.03ch07            73.6        1.6      30             21
## 8      8       ST4.03ch08           81.22       2.18      55             40
## 9      9       ST4.03ch09          123.15       1.62      43             47
## 10    10       ST4.03ch10           76.82       1.24      44             16
## 11    11       ST4.03ch11           82.68       1.91      30             29
## 12    12       ST4.03ch12           81.51       1.74      28             47
## 13 Total             <NA>         1019.54       1.72     498            372
##    Multiplex Total Max gap
## 1         55    84    7.43
## 2         73   147   10.78
## 3         87   165    6.17
## 4         50   137    5.42
## 5         68   174    5.36
## 6         86   152    6.86
## 7         67   118   12.19
## 8         82   177    4.17
## 9        109   199    7.35
## 10        35    95    5.32
## 11        99   158     5.4
## 12        67   142    9.83
## 13       878  1748    7.19
export_map_list(MAPs.up, file = "output_map.csv")

Calculate homolog probability profiles

geno.prob <- lapply(MAPs.up, calc_genoprob_error, error = 0.05)
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
## Ploidy level:4
## Number of individuals:144
##  ..................................................
##  ..................................................
##  ............................................
h <- calc_homologprob(geno.prob)
## 
## Linkage group  1 ...
## Linkage group  2 ...
## Linkage group  3 ...
## Linkage group  4 ...
## Linkage group  5 ...
## Linkage group  6 ...
## Linkage group  7 ...
## Linkage group  8 ...
## Linkage group  9 ...
## Linkage group  10 ...
## Linkage group  11 ...
## Linkage group  12 ...
plot(h, lg = 3, ind = 12)
plot(h, lg = c(1,3,5,7), ind = 12)

Preferential pairing profiles

pp <- calc_prefpair_profiles(geno.prob)
## 
## Linkage group  1 ...
## Linkage group  2 ...
## Linkage group  3 ...
## Linkage group  4 ...
## Linkage group  5 ...
## Linkage group  6 ...
## Linkage group  7 ...
## Linkage group  8 ...
## Linkage group  9 ...
## Linkage group  10 ...
## Linkage group  11 ...
## Linkage group  12 ...
plot(pp)