Add the necessary code to make this script function.

Preliminaries

Load packages

library(seqinr)
library(rentrez)
library(compbio4all)
library(Biostrings)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:seqinr':
## 
##     translate
## The following object is masked from 'package:base':
## 
##     strsplit

Get data shroom data

Download sequence of hShroom 3 protein: NP_001166171.1

shroom3 <- rentrez::entrez_fetch(id = "NP_001166171.1",
                                      db = "protein", 
                                      rettype="fasta")

Clean and set up sequence as vector.

shroom3_vector <- fasta_cleaner(shroom3)

Confirm set up using str()

str(shroom3_vector)
##  chr [1:852] "M" "E" "A" "L" "G" "P" "G" "G" "D" "R" "A" "S" "P" "A" "S" ...

Check length - Shroom is on the big end of proteins

length(shroom3)
## [1] 1

I am therefore going to subset it initially so the example plots don’t take as long to make

shroom3_vector <- shroom3_vector[1:400]

Make a 2 x 2 grid of dotplots to explore effect of changing window size and nmatch

# set up 2 x 2 grid, make margins thinner
par(mfrow = c(2,2), 
    mar = c(0,0,2,1))

# plot 1: Shroom - Defaults
dotPlot(shroom3_vector, 
        shroom3_vector, 
        wsize = 1, 
        nmatch = 1, 
        main = "Shroom Defaults")

# plot 2 Shroom - size = 10, nmatch = 1
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 10, 
        nmatch = 1, 
        main = "Shroom - size = 10, nmatch = 1")

# plot 3: Shroom - size = 10, nmatch = 5
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 10, 
        nmatch = 5, 
        main = "Shroom - size = 10, nmatch = 5")

# plot 4: size = 20, nmatch = 5
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 20,
        nmatch =5,
        main = "Shroom - size = 20, nmatch = 5")

# reset par() - run this or other plots will be small!
par(mfrow = c(1,1), 
    mar = c(4,4,4,4))

Make ANOTHER 2 x 2 grid of dotplots to explore effect of changing window size and nmatch

# set up 2 x 2 grid, make margins thinner
par(mfrow = c(2,2), 
    mar = c(0,0,2,1))

# plot 1: 
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 30, 
        nmatch = 5, 
        main = "Shroom 30 / 5")

# plot 2 Shroom - size = 10, nmatch = 1
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 30, 
        nmatch = 10, 
        main = "Shroom - size = 30, nmatch = 10")

# plot 3: Shroom - size = 10, nmatch = 1
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 5, 
        nmatch = 2, 
        main = "Shroom - size = 5, nmatch = 2")

# plot 4: size = 12, nmatch = 4
dotPlot(shroom3_vector, shroom3_vector, 
        wsize = 12,
        nmatch = 4,
        main = "Shroom - size = 12, nmatch = 4")

# reset par() - run this or other plots will be small!
par(mfrow = c(1,1), 
    mar = c(4,4,4,4))

Best plot using normal dotplot

This is the most interesting shroom dotplot based on the changes investigated above

# be sure to run par - re-run just in case
par(mfrow = c(1,1), 
    mar = c(4,4,4,4))

dotPlot(shroom3_vector, 
        shroom3_vector,
        wsize = 10, 
        nmatch = 5, 
        main = "Shroom window = XXXXXX, match = XXXXXX")

Full-length plot

Use the new dotplot function defined above on the full-length protein, save output (doesn’t autoplot)

my_dot_out <- dotPlot(shroom3_vector, shroom3_vector, wsize = 10, wstep = 5, nmatch = 5)

Get rid of upper triangular portion

#my_dot_out$z[lower.tri(my_dot_out$z)] <- FALSE

Do some weird prep (don’t worry about it)

#my_dot_out$z <- my_dot_out$z[, nrow(my_dot_out$z):1]

##Plot using image() command. Add a title to “main =”

# seriously - it will drive you crazy if you forget about this
#par(mfrow = c(1,1), 
#    mar = c(4,4,4,4))

# plot with image()
#image(x = my_dot_out$x, 
#      y = my_dot_out$y, 
#      z = my_dot_out$z, 
#      main = "Plot")

Focal subplot

Use new function on the full-length protein, save output (doesn’t autoplot). Subset from 200 to 400

my_dot_out <- dotPlot(shroom3_vector[200:400],
                       shroom3_vector[200:400], 
        wsize = 15, 
        wstep = 1,
        nmatch = 5)

Get rid of upper triangular portion

#my_dot_out$z[lower.tri(my_dot_out$z)] <- FALSE

Do some weird prep (don’t worry about it)

#my_dot_out$z <- my_dot_out$z[, nrow(my_dot_out$z):1]

Plot using image() command. Add a title.

# seriously - it will drive you crazy if you forget about this
#par(mfrow = c(1,1), 
#    mar = c(4,4,4,4))

# plot with image()
#image(x = my_dot_out$x, 
#      y = my_dot_out$y, 
#      z = my_dot_out$z, 
#      main = "Plot")