BIO 509 Homework 1

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE, 
    echo = TRUE, fig.width = 10, fig.height = 8)
options(width = 116, scipen = 10)

setwd("~/statistics/bio509/")

HW 1.1 Generation of pseudo-random numbers

## 1. generation by for loop
fun.1 <- function(m, b, x0, end) {
    x <- vector(length = end)

    x[1] <- (b * x0) %% m

    for (i in seq(1, end - 1, 1)) {
        x[i + 1] <- (b * x[i]) %% m
    }

    data.frame(x = x, u = x / m)
}

## 2.
fun.1(m = 7, b = 3, x0 = 2, end = 20)

   x      u
1  6 0.8571
2  4 0.5714
3  5 0.7143
4  1 0.1429
5  3 0.4286
6  2 0.2857
7  6 0.8571
8  4 0.5714
9  5 0.7143
10 1 0.1429
11 3 0.4286
12 2 0.2857
13 6 0.8571
14 4 0.5714
15 5 0.7143
16 1 0.1429
17 3 0.4286
18 2 0.2857
19 6 0.8571
20 4 0.5714


## 3.
fun.1(m = 29241, b = 171, x0 = 3, end = 20)

     x       u
1  513 0.01754
2    0 0.00000
3    0 0.00000
4    0 0.00000
5    0 0.00000
6    0 0.00000
7    0 0.00000
8    0 0.00000
9    0 0.00000
10   0 0.00000
11   0 0.00000
12   0 0.00000
13   0 0.00000
14   0 0.00000
15   0 0.00000
16   0 0.00000
17   0 0.00000
18   0 0.00000
19   0 0.00000
20   0 0.00000


## 4.
fun.1(m = 30269, b = 171, x0 = 27218, end = 50)

       x       u
1  23121 0.76385
2  18721 0.61849
3  23046 0.76137
4   5896 0.19479
5   9339 0.30853
6  22981 0.75923
7  25050 0.82758
8  15621 0.51607
9   7519 0.24841
10 14451 0.47742
11 19332 0.63867
12  6451 0.21312
13 13437 0.44392
14 27552 0.91024
15 19697 0.65073
16  8328 0.27513
17  1445 0.04774
18  4943 0.16330
19 27990 0.92471
20  3788 0.12514
21 12099 0.39972
22 10637 0.35142
23  2787 0.09207
24 22542 0.74472
25 10519 0.34752
26 12878 0.42545
27 22770 0.75225
28 19238 0.63557
29 20646 0.68208
30 19262 0.63636
31 24750 0.81767
32 24859 0.82127
33 13229 0.43705
34 22253 0.73517
35 21638 0.71486
36  7280 0.24051
37  3851 0.12723
38 22872 0.75562
39  6411 0.21180
40  6597 0.21795
41  8134 0.26872
42 28809 0.95177
43 22761 0.75196
44 17699 0.58472
45 29898 0.98774
46 27366 0.90409
47 18160 0.59995
48 17922 0.59209
49  7493 0.24755
50 10005 0.33054


## 5. variance is 1/12 * (b - a)
U <- runif(n = 1000)
summary(U)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0006  0.2570  0.4910  0.4970  0.7440  1.0000

var(U)

[1] 0.08098


## 6.
prop.table(table(U < 0.6))


FALSE  TRUE 
0.392 0.608

HW 1.2 Data Manipulation

## 1. Read in data into R
flow.occ.table <- read.csv("flow-occ-table.txt")
head(flow.occ.table)

    Occ1 Flow1   Occ2 Flow2   Occ3 Flow3
1 0.0100    14 0.0186    27 0.0137    17
2 0.0133    18 0.0250    39 0.0187    25
3 0.0088    12 0.0180    30 0.0095    11
4 0.0115    16 0.0203    33 0.0217    19
5 0.0069     8 0.0178    25 0.0123    13
6 0.0077    11 0.0151    24 0.0092    13


## 2. Column names change
names(flow.occ.table) <- gsub("[lowc]" , "", names(flow.occ.table))
head(flow.occ.table)

      O1 F1     O2 F2     O3 F3
1 0.0100 14 0.0186 27 0.0137 17
2 0.0133 18 0.0250 39 0.0187 25
3 0.0088 12 0.0180 30 0.0095 11
4 0.0115 16 0.0203 33 0.0217 19
5 0.0069  8 0.0178 25 0.0123 13
6 0.0077 11 0.0151 24 0.0092 13



## 3. Create new data frame specified
## plan A: Loop i = 1,2,3, get F max postion, get F max, get corresponding O
q3.list <- lapply(1:3,
                  function(i) {
                      F <- flow.occ.table[, paste("F", i, sep = "")]
                      O <- flow.occ.table[, paste("O", i, sep = "")]

                      which.max.F <- which.max(F)
                      max.F       <- F[which.max.F]
                      corres.O    <- O[which.max.F]

                      data.frame(max.F = max.F, corresponding.O = corres.O)
                  })

q3.df <- do.call(rbind, q3.list)
q3.df

  max.F corresponding.O
1   203          0.1744
2   174          0.1231
3   142          0.2017


## plan B: Loop i = 1,2,3, melt
library(reshape2)
q3.list <- lapply(1:3,
                  function(i) {
                      Oi <- paste("O", i, sep = "")
                      Fi <- paste("F", i, sep = "")

                      df <- melt(data = flow.occ.table[,c(Oi, Fi)], id.vars = Oi)
                      names(df)[1] <- "O"

                      df
                  })

q3.df <- do.call(rbind, q3.list)

library(plyr)
ddply(q3.df, "variable", summarise, max.F = max(value), corresponding.O = O[which.max(value)])[,2:3]

  max.F corresponding.O
1   203          0.1744
2   174          0.1231
3   142          0.2017


## plan C: melt O columns and F columns separately, combine, manipulation
library(reshape2)
o.df <- melt(flow.occ.table[,c(1,3,5)], variable.name = "O.var", value.name = "O.value")
f.df <- melt(flow.occ.table[,c(2,4,6)], variable.name = "F.var", value.name = "F.value")
combo.df <- cbind(f.df, o.df)

library(plyr)
ddply(combo.df, "F.var", summarise, max.F = max(F.value), corresponding.O = O.value[which.max(F.value)])[,2:3]

  max.F corresponding.O
1   203          0.1744
2   174          0.1231
3   142          0.2017



## 4. Writing to a tab-deliminated file
write.table(q3.df, file = "flow-occ-table-clean.txt", sep = "\t")