—- BCB504 HW2 Problem 3 TBMarsden —-

This is an R Markdown document. Markdown is a simple formatting syntax for authoring web pages (click the Help toolbar button for more details on using R Markdown).

When you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Simulate differentiation of sub-populations
N = 30  # Size of each line
lineN = 100  #Number of lines
ACount = array(NA, dim = c(100, 2))
AFreq = array(NA, dim = c(100, 2))
GCount = array(NA, dim = c(100, 3))
GFreq = array(NA, dim = c(100, 3))
Genes0 = array(NA, dim = c(lineN, 2 * N))
# Matrix to store alleles and genotypes (two adjacent alleles form a
# genotype). Each row represent a line.

# Initialize Genes for population 0
for (i in 1:lineN) {
    Genes0[i, ] = sample(x = c(0:1), size = 10, replace = T, prob = c(0.3, 0.7))
    # Randomly form genotypes by sample function, probabilities 30:70!
}

generationN = 100
ParentGenes = Genes0
for (g in 1:generationN) {

    ChildGenes = array(NA, dim = c(lineN, 2 * N))
    for (i in 1:lineN) {
        ChildGenes[i, ] = sample(ParentGenes[i, ], replace = T)
        # Randam sampling with replacement <=> simple inbreeding
    }
    ParentGenes = ChildGenes
}
table(ChildGenes)/(lineN * 2 * N)  # Allele frequencies
## ChildGenes
##     0     1 
## 0.315 0.685


# Convert allele to genotypes: AA <=> 2; Aa / aA <=> 1; aa <=> 0.
Genotypes = array(NA, dim = c(lineN, N))
for (j in 1:N) {
    Genotypes[, j] = ChildGenes[, 2 * j - 1] + ChildGenes[, 2 * j]
}
aaa = table(Genotypes)/(lineN * N)  # Genotype frequencies.

## ----- f.R --------

ACount = array(NA, c(100, 4))
AFreq = array(NA, c(100, 4))
# bDataAA = array(NA,c(100,30))
AL1 = c("0,0", "0,1", "1,0", "1,1")

Cooo <- array(NA, c(100, 30))
for (i in 1:100) {
    ACount[i, 1] = sum(Cooo[i, ] == "AB")
    ACount[i, 2] = sum(Cooo[i, ] == "Ab")
    ACount[i, 3] = sum(Cooo[i, ] == "aB")
    ACount[i, 4] = sum(Cooo[i, ] == "ab")
    AFreq[i, 1] = ACount[i, 1]/30
    AFreq[i, 2] = ACount[i, 2]/30
    AFreq[i, 3] = ACount[i, 3]/30
    AFreq[i, 4] = ACount[i, 4]/30
}

matplot(AFreq, pch = 19, type = "b", lwd = 2, col = 1:3, ylim = c(0:1))

GCount = array(NA, c(100, 3))
GFreq = array(NA, c(100, 3))
FCount = array(NA, c(100, 1))
WCount = array(NA, c(100, 2))
WFreq = array(NA, c(100, 2))

for (i in 1:100) {
    Data = Genotypes[i, ]
    GCount[i, 1] = sum(Data == 0)
    GCount[i, 2] = sum(Data == 1)
    GCount[i, 3] = sum(Data == 2)
    GFreq[i, 1] = GCount[i, 1]/30
    GFreq[i, 2] = GCount[i, 2]/30
    GFreq[i, 3] = GCount[i, 3]/30
    WCount[i, 1] = GCount[i, 1] + GCount[i, 2]
    WCount[i, 2] = GCount[i, 3]
    WFreq[i, 1] = WCount[i, 1]/30
    WFreq[i, 2] = WCount[i, 2]/30
}

# Average10s
GAvg = array(NA, c(10, 3))
WAvg = array(NA, c(10, 2))

AvgG <- function(Data, col) {
    for (i in 1:10) {
        m = i * 10
        GAvg[i, col] <<- mean(Data[(m - 9), col]:Data[m, col])
    }
}

AvgW <- function(Data, col) {
    for (i in 1:10) {
        m = i * 10
        WAvg[i, col] <<- mean(Data[(m - 9), col]:Data[m, col])
    }
}

AvgG(GFreq, 1)
AvgG(GFreq, 2)
AvgG(GFreq, 3)
AvgW(WFreq, 1)
AvgW(WFreq, 2)

AFreqF <- data.frame(AFreq)
names(AFreqF) <- c("AB", "Ab", "aB", "ab")

GFreqF <- data.frame(GFreq)
GAvgF <- data.frame(GAvg)
WFreqF <- data.frame(WFreq)
WAvgF <- data.frame(WAvg)
names(GAvgF) <- c("AA", "Aa", "aa")
names(GFreqF) <- c("AA", "Aa", "aa")
names(WAvgF) <- c("Wildtype", "Recessive")
names(WFreqF) <- c("Wildtype", "Recessive")

library(ggplot2)

plot of chunk unnamed-chunk-1

library(reshape)
## Loading required package: plyr
## 
## Attaching package: 'reshape'
## 
## The following object(s) are masked from 'package:plyr':
## 
##     rename, round_any

PlotG1 <- function(Data, length, Title) {
    df <- data.frame(Generation = 1:length, AA = Data[, 1], Aa = Data[, 2], 
        aa = Data[, 3])
    df <- melt(df, id = "Generation", variable_name = "Allele")
    # Plot on same grid, each series colored differently --
    GAAA = ggplot(df, aes(Generation, value)) + geom_line(aes(colour = Allele), 
        size = 1) + ggtitle(Title)
    print(GAAA)
}

PlotW1 <- function(Data, length, Title) {
    df <- data.frame(Generation = 1:length, Wildtype = Data[, 1], Recessive = Data[, 
        2])
    df <- melt(df, id = "Generation", variable_name = "Phenotype")
    # Plot on same grid, each series colored differently --
    WAAA = ggplot(df, aes(Generation, value)) + geom_line(aes(colour = Phenotype), 
        size = 1) + ggtitle(Title)
    print(WAAA)
}

PlotA1 <- function(Data, length, Title) {
    df <- data.frame(Generation = 1:length, AB = Data[, 1], Ab = Data[, 2], 
        aB = Data[, 3], ab = Data[, 4])
    df <- melt(df, id = "Generation", variable_name = "Alleles")
    # Plot on same grid, each series colored differently --
    WAAA = ggplot(df, aes(Generation, value)) + geom_line(aes(colour = Alleles), 
        size = 1) + ggtitle(Title)
    print(WAAA)
}


PlotG1(GAvgF, 10, "Allele Frequency: Averaged by 10")

plot of chunk unnamed-chunk-1


PlotW1(WAvgF, 10, "Phenotype Frequency (Averaged by 10")

plot of chunk unnamed-chunk-1


PlotA1(AFreqF, 100, "Allele Combination Frequencies")
## Error: 'from' must be of length 1

You can also embed plots, for example: