BIO 509 Homework 1
## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE,
echo = TRUE, fig.width = 10, fig.height = 8)
options(width = 116, scipen = 10)
setwd("~/statistics/bio509/")
HW 1.1 Generation of pseudo-random numbers
## 1. generation by for loop
fun.1 <- function(m, b, x0, end) {
x <- vector(length = end)
x[1] <- (b * x0) %% m
for (i in seq(1, end - 1, 1)) {
x[i + 1] <- (b * x[i]) %% m
}
data.frame(x = x, u = x / m)
}
## 2.
fun.1(m = 7, b = 3, x0 = 2, end = 20)
x u
1 6 0.8571
2 4 0.5714
3 5 0.7143
4 1 0.1429
5 3 0.4286
6 2 0.2857
7 6 0.8571
8 4 0.5714
9 5 0.7143
10 1 0.1429
11 3 0.4286
12 2 0.2857
13 6 0.8571
14 4 0.5714
15 5 0.7143
16 1 0.1429
17 3 0.4286
18 2 0.2857
19 6 0.8571
20 4 0.5714
## 3.
fun.1(m = 29241, b = 171, x0 = 3, end = 20)
x u
1 513 0.01754
2 0 0.00000
3 0 0.00000
4 0 0.00000
5 0 0.00000
6 0 0.00000
7 0 0.00000
8 0 0.00000
9 0 0.00000
10 0 0.00000
11 0 0.00000
12 0 0.00000
13 0 0.00000
14 0 0.00000
15 0 0.00000
16 0 0.00000
17 0 0.00000
18 0 0.00000
19 0 0.00000
20 0 0.00000
## 4.
fun.1(m = 30269, b = 171, x0 = 27218, end = 50)
x u
1 23121 0.76385
2 18721 0.61849
3 23046 0.76137
4 5896 0.19479
5 9339 0.30853
6 22981 0.75923
7 25050 0.82758
8 15621 0.51607
9 7519 0.24841
10 14451 0.47742
11 19332 0.63867
12 6451 0.21312
13 13437 0.44392
14 27552 0.91024
15 19697 0.65073
16 8328 0.27513
17 1445 0.04774
18 4943 0.16330
19 27990 0.92471
20 3788 0.12514
21 12099 0.39972
22 10637 0.35142
23 2787 0.09207
24 22542 0.74472
25 10519 0.34752
26 12878 0.42545
27 22770 0.75225
28 19238 0.63557
29 20646 0.68208
30 19262 0.63636
31 24750 0.81767
32 24859 0.82127
33 13229 0.43705
34 22253 0.73517
35 21638 0.71486
36 7280 0.24051
37 3851 0.12723
38 22872 0.75562
39 6411 0.21180
40 6597 0.21795
41 8134 0.26872
42 28809 0.95177
43 22761 0.75196
44 17699 0.58472
45 29898 0.98774
46 27366 0.90409
47 18160 0.59995
48 17922 0.59209
49 7493 0.24755
50 10005 0.33054
## 5. variance is 1/12 * (b - a)
U <- runif(n = 1000)
summary(U)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0006 0.2570 0.4910 0.4970 0.7440 1.0000
var(U)
[1] 0.08098
## 6.
prop.table(table(U < 0.6))
FALSE TRUE
0.392 0.608
HW 1.2 Data Manipulation
## 1. Read in data into R
flow.occ.table <- read.csv("flow-occ-table.txt")
head(flow.occ.table)
Occ1 Flow1 Occ2 Flow2 Occ3 Flow3
1 0.0100 14 0.0186 27 0.0137 17
2 0.0133 18 0.0250 39 0.0187 25
3 0.0088 12 0.0180 30 0.0095 11
4 0.0115 16 0.0203 33 0.0217 19
5 0.0069 8 0.0178 25 0.0123 13
6 0.0077 11 0.0151 24 0.0092 13
## 2. Column names change
names(flow.occ.table) <- gsub("[lowc]" , "", names(flow.occ.table))
head(flow.occ.table)
O1 F1 O2 F2 O3 F3
1 0.0100 14 0.0186 27 0.0137 17
2 0.0133 18 0.0250 39 0.0187 25
3 0.0088 12 0.0180 30 0.0095 11
4 0.0115 16 0.0203 33 0.0217 19
5 0.0069 8 0.0178 25 0.0123 13
6 0.0077 11 0.0151 24 0.0092 13
## 3. Create new data frame specified
## plan A: Loop i = 1,2,3, get F max postion, get F max, get corresponding O
q3.list <- lapply(1:3,
function(i) {
F <- flow.occ.table[, paste("F", i, sep = "")]
O <- flow.occ.table[, paste("O", i, sep = "")]
which.max.F <- which.max(F)
max.F <- F[which.max.F]
corres.O <- O[which.max.F]
data.frame(max.F = max.F, corresponding.O = corres.O)
})
q3.df <- do.call(rbind, q3.list)
q3.df
max.F corresponding.O
1 203 0.1744
2 174 0.1231
3 142 0.2017
## plan B: Loop i = 1,2,3, melt
library(reshape2)
q3.list <- lapply(1:3,
function(i) {
Oi <- paste("O", i, sep = "")
Fi <- paste("F", i, sep = "")
df <- melt(data = flow.occ.table[,c(Oi, Fi)], id.vars = Oi)
names(df)[1] <- "O"
df
})
q3.df <- do.call(rbind, q3.list)
library(plyr)
ddply(q3.df, "variable", summarise, max.F = max(value), corresponding.O = O[which.max(value)])[,2:3]
max.F corresponding.O
1 203 0.1744
2 174 0.1231
3 142 0.2017
## plan C: melt O columns and F columns separately, combine, manipulation
library(reshape2)
o.df <- melt(flow.occ.table[,c(1,3,5)], variable.name = "O.var", value.name = "O.value")
f.df <- melt(flow.occ.table[,c(2,4,6)], variable.name = "F.var", value.name = "F.value")
combo.df <- cbind(f.df, o.df)
library(plyr)
ddply(combo.df, "F.var", summarise, max.F = max(F.value), corresponding.O = O.value[which.max(F.value)])[,2:3]
max.F corresponding.O
1 203 0.1744
2 174 0.1231
3 142 0.2017
## 4. Writing to a tab-deliminated file
write.table(q3.df, file = "flow-occ-table-clean.txt", sep = "\t")