Seminar2.R

Abrar — Apr 2, 2014, 1:30 PM

# rnorm(n=10)
set.seed(540)
# rnorm(n=10, mean=100, sd=3)

n <- 10
B <- 4
x <- matrix(rnorm(n*B), nrow=n)
# str(x)
# head(x)
rownames(x) <- sprintf("obs%02d", 1:n)
colnames(x) <- sprintf("samp%02d", 1:B)
x
       samp01    samp02  samp03  samp04
obs01 -1.6145 -0.008744 -0.8413 -1.2752
obs02  0.7340  0.108303 -1.0581  0.3885
obs03  2.0947  0.806000 -0.2552 -0.1950
obs04  0.5009 -1.504498  0.3213  1.4259
obs05  1.7355 -0.417628  0.7739 -0.6323
obs06 -2.0584  0.480787 -0.1932  0.6684
obs07 -1.0339 -0.168417 -1.7138  0.5225
obs08  1.4914  0.760428  1.2975 -0.2879
obs09 -0.5068  0.624589  1.0531 -0.3869
obs10 -0.7001  0.580386  0.3417  0.3060
# dim(x)
# mean(x[ , 2])
colMeans(x)
  samp01   samp02   samp03   samp04 
 0.06427  0.12612 -0.02743  0.05340 
# apply(x,1,head)

mean(x)
[1] 0.05409



B <- 1000
x10 <- matrix(rnorm(10 * B), nrow = 10)
x100 <- matrix(rnorm(100 * B), nrow = 100)
x1000 <- matrix(rnorm(1000 * B), nrow = 1000)
x10000 <- matrix(rnorm(10000 * B), nrow = 10000)
xBar10 <- colMeans(x10)
xBar100 <- colMeans(x100)
xBar1000 <- colMeans(x1000)
xBar10000 <- colMeans(x10000)
xBarSd10 <- sd(colMeans(x10))
xBarSd100 <- sd(colMeans(x100))
xBarSd1000 <- sd(colMeans(x1000))
xBarSd10000 <- sd(colMeans(x10000))

cbind(sampSize = c(10, 100, 1000, 10000),
      trueSEM = 1 / sqrt(c(10, 100, 1000, 10000)),
      obsSEM = c(xBarSd10, xBarSd100, xBarSd1000, xBarSd10000))
     sampSize trueSEM  obsSEM
[1,]       10 0.31623 0.31136
[2,]      100 0.10000 0.10099
[3,]     1000 0.03162 0.03285
[4,]    10000 0.01000 0.01014
IQR(colMeans(x10000))
[1] 0.01333
mad(colMeans(x10000))
[1] 0.009798

B <- 1000
n <- (10^(1:4))
names(n) <- paste0("sample",n)

n <- 100
x <- matrix(rnorm(n * 4), nrow = n)
mean(x>0&x<=0.1)
[1] 0.0325
pnorm(c(0,0.1))
[1] 0.5000 0.5398
x<=0.1
        [,1]  [,2]  [,3]  [,4]
  [1,] FALSE FALSE  TRUE FALSE
  [2,]  TRUE  TRUE  TRUE  TRUE
  [3,] FALSE  TRUE FALSE  TRUE
  [4,] FALSE  TRUE  TRUE FALSE
  [5,]  TRUE FALSE  TRUE  TRUE
  [6,] FALSE FALSE FALSE FALSE
  [7,]  TRUE FALSE  TRUE FALSE
  [8,] FALSE FALSE FALSE FALSE
  [9,] FALSE  TRUE  TRUE FALSE
 [10,]  TRUE  TRUE FALSE FALSE
 [11,] FALSE FALSE  TRUE FALSE
 [12,]  TRUE FALSE  TRUE FALSE
 [13,]  TRUE  TRUE  TRUE FALSE
 [14,]  TRUE  TRUE  TRUE  TRUE
 [15,]  TRUE  TRUE FALSE  TRUE
 [16,] FALSE FALSE FALSE  TRUE
 [17,] FALSE FALSE  TRUE  TRUE
 [18,]  TRUE FALSE FALSE FALSE
 [19,]  TRUE FALSE FALSE FALSE
 [20,]  TRUE  TRUE FALSE  TRUE
 [21,] FALSE FALSE  TRUE FALSE
 [22,] FALSE  TRUE FALSE FALSE
 [23,]  TRUE  TRUE  TRUE  TRUE
 [24,]  TRUE  TRUE FALSE FALSE
 [25,] FALSE FALSE FALSE  TRUE
 [26,] FALSE FALSE FALSE  TRUE
 [27,] FALSE  TRUE FALSE FALSE
 [28,]  TRUE  TRUE FALSE FALSE
 [29,] FALSE FALSE  TRUE FALSE
 [30,]  TRUE  TRUE FALSE  TRUE
 [31,]  TRUE FALSE  TRUE  TRUE
 [32,] FALSE FALSE FALSE FALSE
 [33,] FALSE FALSE FALSE FALSE
 [34,]  TRUE FALSE FALSE FALSE
 [35,]  TRUE  TRUE  TRUE FALSE
 [36,] FALSE  TRUE  TRUE FALSE
 [37,]  TRUE FALSE  TRUE FALSE
 [38,] FALSE  TRUE  TRUE  TRUE
 [39,]  TRUE  TRUE FALSE  TRUE
 [40,]  TRUE  TRUE  TRUE FALSE
 [41,]  TRUE  TRUE  TRUE  TRUE
 [42,]  TRUE  TRUE  TRUE FALSE
 [43,] FALSE  TRUE  TRUE FALSE
 [44,] FALSE FALSE FALSE  TRUE
 [45,] FALSE FALSE  TRUE  TRUE
 [46,] FALSE FALSE  TRUE  TRUE
 [47,] FALSE  TRUE FALSE  TRUE
 [48,] FALSE FALSE  TRUE FALSE
 [49,] FALSE FALSE FALSE  TRUE
 [50,] FALSE FALSE  TRUE FALSE
 [51,] FALSE FALSE FALSE FALSE
 [52,]  TRUE FALSE  TRUE  TRUE
 [53,]  TRUE FALSE  TRUE  TRUE
 [54,]  TRUE FALSE  TRUE  TRUE
 [55,] FALSE  TRUE  TRUE FALSE
 [56,]  TRUE  TRUE FALSE  TRUE
 [57,]  TRUE  TRUE FALSE FALSE
 [58,] FALSE FALSE  TRUE FALSE
 [59,]  TRUE  TRUE FALSE  TRUE
 [60,] FALSE  TRUE FALSE  TRUE
 [61,]  TRUE  TRUE FALSE FALSE
 [62,]  TRUE FALSE  TRUE  TRUE
 [63,] FALSE FALSE  TRUE FALSE
 [64,] FALSE FALSE  TRUE FALSE
 [65,] FALSE  TRUE  TRUE  TRUE
 [66,] FALSE FALSE FALSE  TRUE
 [67,]  TRUE FALSE FALSE  TRUE
 [68,] FALSE FALSE FALSE  TRUE
 [69,] FALSE FALSE FALSE  TRUE
 [70,]  TRUE  TRUE  TRUE FALSE
 [71,]  TRUE  TRUE FALSE FALSE
 [72,] FALSE  TRUE  TRUE  TRUE
 [73,]  TRUE  TRUE  TRUE  TRUE
 [74,]  TRUE  TRUE FALSE FALSE
 [75,] FALSE FALSE  TRUE FALSE
 [76,] FALSE  TRUE FALSE  TRUE
 [77,]  TRUE FALSE FALSE  TRUE
 [78,]  TRUE  TRUE FALSE  TRUE
 [79,]  TRUE FALSE  TRUE  TRUE
 [80,] FALSE  TRUE FALSE FALSE
 [81,] FALSE  TRUE  TRUE FALSE
 [82,]  TRUE FALSE  TRUE FALSE
 [83,]  TRUE FALSE  TRUE  TRUE
 [84,]  TRUE  TRUE FALSE FALSE
 [85,] FALSE FALSE  TRUE FALSE
 [86,] FALSE  TRUE FALSE  TRUE
 [87,]  TRUE FALSE FALSE FALSE
 [88,] FALSE FALSE  TRUE FALSE
 [89,] FALSE  TRUE FALSE FALSE
 [90,] FALSE  TRUE  TRUE  TRUE
 [91,] FALSE  TRUE  TRUE  TRUE
 [92,]  TRUE  TRUE FALSE FALSE
 [93,] FALSE FALSE  TRUE FALSE
 [94,]  TRUE  TRUE FALSE  TRUE
 [95,]  TRUE  TRUE FALSE  TRUE
 [96,]  TRUE  TRUE  TRUE FALSE
 [97,] FALSE FALSE  TRUE FALSE
 [98,]  TRUE FALSE FALSE  TRUE
 [99,]  TRUE FALSE  TRUE FALSE
[100,]  TRUE FALSE  TRUE  TRUE
pt(0.1,1)
[1] 0.5317


min.x <- -5

max.x <- 5

num.samples <- 1000

x <- seq(from = min.x, to = max.x, length = num.samples)
# Open new blank plot with x limits from -5 to 5, and y limits from 0 to 1
plot(c(-5, 5), c(0, 1), xlab = "x", ylab = "f(x)", main = "Normal probability density function", 
     type = "n")

# Add each density plot one at a time
lines(x, dnorm(x, mean = 0, sd = 0.5), lwd = 2, col = "red")

lines(x, dnorm(x, mean = 0, sd = 1), lwd = 2, col = "green")

lines(x, dnorm(x, mean = 0, sd = 2), lwd = 2, col = "blue")

lines(x, dnorm(x, mean = -2, sd = 1), lwd = 2, col = "magenta")

# We can also add a legend to the plot
legend("topright", c("mean=0, sd=0.5", "mean=0, sd=1", "mean=0, sd=2", "mean=-2, sd=1"), 
       col = c("red", "green", "blue", "magenta"), lty = 1, lwd = 2)

plot of chunk unnamed-chunk-1


normal.mean <- c(0, 0, 0, -2)

normal.sd <- c(0.5, 1, 2, 1)

colors <- c("red", "green", "blue", "magenta")

# Open new plot with x limits from -5 to 5, and y limits from 0 to 1
plot(c(-5, 5), c(0, 1), xlab = "x", ylab = "f(x)", main = "Normal probability density function", 
     type = "n")

# Add density plots with a for loop
for (i in 1:length(normal.mean)) {
  lines(x, dnorm(x, mean = normal.mean[i], sd = normal.sd[i]), lwd = 2, col = colors[i])
}

# Add a legend to the plot
legend("topright", paste0("mean=", normal.mean, ", sd=", normal.sd), col = colors, 
       lty = 1, lwd = 2)

plot of chunk unnamed-chunk-1


# Open new plot with x limits from -5 to 5, and y limits from 0 to 1
plot(c(-5, 5), c(0, 1), xlab = "x", ylab = "f(x)", main = "Normal probability density function", 
     type = "n")

# Create our own user-defined function for plotting Normal probability
# density function
f <- function(col, ...) {
  lines(x, dnorm(x, ...), col = col, lwd = 2)
}

# apply this function with different parameters
plot.status <- mapply(f, mean = normal.mean, sd = normal.sd, col = colors)

# Add a legend to the plot
legend("topright", paste0("mean=", normal.mean, ", sd=", normal.sd), col = colors, 
       lty = 1, lwd = 2)

plot of chunk unnamed-chunk-1


set.seed(1)

normal.mean <- 1
normal.sd <- 1
true.mean <- normal.mean
print(true.mean)
[1] 1

n <- 100

y <- rnorm(n, mean = normal.mean, sd = normal.sd)

# This is the sample mean
(y.mean <- mean(y))
[1] 1.109
# Compare the sample mean to the true mean
y.mean - true.mean
[1] 0.1089

set.seed(1)

# Number of samples
num.samp <- 100

# Size of each sample
samp.size <- 10

# Generate the samples in a matrix with num.samp rows and samp.size columns
y <- matrix(rnorm(n = num.samp * samp.size, mean = normal.mean, sd = normal.sd), 
            nrow = num.samp, ncol = samp.size)

y.mean <- apply(y, 1, mean)
length(y.mean)
[1] 100
head(y.mean)
[1] 0.7531 1.4308 1.1723 1.0057 0.8410 1.2483
y.mean <- rowMeans(y)



mean.diff <- y.mean - true.mean
boxplot(mean.diff)

plot of chunk unnamed-chunk-1


normalSampleMean <- function(normal.mean, normal.sd, num.samp, samp.size) {
  y <- matrix(rnorm(n = num.samp * samp.size, mean = normal.mean, sd = normal.sd), 
              nrow = num.samp, ncol = samp.size)

  y.mean <- rowMeans(y)

  return(y.mean)
}

samp.sizes <- c(10, 100, 1000, 10000)

names(samp.sizes) <- paste0("n=", samp.sizes)

samp.sizes
   n=10   n=100  n=1000 n=10000 
     10     100    1000   10000 

set.seed(1)

num.samp <- 100

y.mean <- sapply(samp.sizes, normalSampleMean, num.samp = num.samp, normal.mean = normal.mean, 
                 normal.sd = normal.sd)

set.seed(1)

num.samp <- 100

# Create an empty matrix to store y.means later
y.mean <- matrix(nrow = num.samp, ncol = length(samp.sizes), dimnames = list(c(1:num.samp), 
                                                                             names(samp.sizes)))

# Compute y.means
for (i in 1:length(samp.sizes)) {
  y.mean[, i] <- normalSampleMean(samp.size = samp.sizes[i], num.samp = num.samp, 
                                  normal.mean = normal.mean, normal.sd = normal.sd)
}

boxplot(y.mean - true.mean, xlab = "Sample size (n)", ylab = expression(bar(Y)[n] - 
                                                                          mu))

plot of chunk unnamed-chunk-1