title: “WPA#2” author: “Rebekka Herz” date: “23. April 2015” output: html_document
Coding Basics
mydata <- rnorm(100, mean = 0, sd = 1) # Can't have negative SD, can't have space in object name
data.mean <- mean(mydata) # Can't have space in object name
data.mean # Remove quotation marks to refer to object
## [1] -0.0486427
#correct code
mydata <- rnorm(100, mean = 0, sd = 1)
data.mean <- mean(mydata)
data.mean
## [1] -0.01412308
mixed <- c("batman", 20, "superman", 10)
mixed
## [1] "batman" "20" "superman" "10"
#In the Output there are now quotation marks aournd "20" and "10". It changed because R changed all the numeric scalers to characters.
#[1] "batman" "20" "superman" "10"
Celsius <- c(17,21,18,21,14)
Celsius * 9/5 + 32
## [1] 62.6 69.8 64.4 69.8 57.2
Fahrenheit <- c(62.6, 69.8, 64.4, 69.8, 57.2)
Fahrenheit # [1] 62.6 69.8 64.4 69.8 57.2
## [1] 62.6 69.8 64.4 69.8 57.2
a <- seq(from = 10, to = 50, by = 10)
a
## [1] 10 20 30 40 50
b <- 12:14
b
## [1] 12 13 14
c <- seq(from = 97, to = 117, by = 10)
c
## [1] 97 107 117
c(a,b,c)
## [1] 10 20 30 40 50 12 13 14 97 107 117
# [1] 10 20 30 40 50 12 13 14 97 107 117
rep(1:5, each = 2, times = 2)
## [1] 1 1 2 2 3 3 4 4 5 5 1 1 2 2 3 3 4 4 5 5
#[1] 1 1 2 2 3 3 4 4 5 5 1 1 2 2 3 3 4 4 5 5
"numbers" <- 1:10
numbers
## [1] 1 2 3 4 5 6 7 8 9 10
# [1] 1 2 3 4 5 6 7 8 9 10
"squares" <- numbers^2
squares
## [1] 1 4 9 16 25 36 49 64 81 100
# [1] 1 4 9 16 25 36 49 64 81 100
mean(squares) # [1] 38.5
## [1] 38.5
median(squares) # [1] 30.5
## [1] 30.5
#alternative solution:
squares <- (1:10) ^ 2
mean(squares)
## [1] 38.5
#[1] 38.5
Sampling Data and Descriptive Statistics
normal.samp <- rnorm(50, mean = 30, sd = 5)
normal.samp
## [1] 33.37964 27.31284 26.24168 27.93522 29.85343 24.44303 33.18505
## [8] 28.61569 31.53742 30.77116 18.72977 32.45014 32.89467 26.30186
## [15] 36.63767 30.36546 27.94786 37.33674 28.76847 33.85961 23.71574
## [22] 22.85559 21.17301 37.99507 32.13158 32.60755 22.80834 28.62620
## [29] 30.60299 25.67926 36.77393 34.60707 39.17245 31.00509 30.88363
## [36] 24.65534 27.74486 45.20054 31.67259 31.78011 38.10723 26.42067
## [43] 27.37119 37.04119 20.25604 38.86526 35.85145 32.85908 30.48115
## [50] 31.13338
# [1] 31.60301 23.15112 31.45615 36.92773 30.84523 35.34461 22.07055 28.08075 34.32417
# [10] 32.80625 22.80081 24.96917 25.29822 30.26726 27.13027 31.10274 25.82742 29.34823
# [19] 32.83665 36.04233 30.63610 25.24295 26.68743 31.72297 24.60073 31.04826 23.48540#
# [28] 28.15094 26.78531 35.45582 31.68297 28.42584 29.10464 30.26134 33.05441 28.53102
# [37] 16.60142 23.81078 32.88284 35.67945 33.47071 31.43286 20.00966 35.74889 37.17930
# [46] 27.60280 41.44553 14.81109 24.78237 31.23251
min(normal.samp)
## [1] 18.72977
#[1] 19.60059
max(normal.samp)
## [1] 45.20054
# 41.83516
runif(n=25, min = 19.60059, max = 41.83516)
## [1] 26.19445 22.08059 29.23311 40.34769 38.59218 29.98552 23.98050
## [8] 25.34036 24.19495 23.94771 26.66968 29.60815 27.05277 21.90252
## [15] 26.84882 39.12309 39.99840 29.62902 33.73654 25.73451 25.82408
## [22] 29.58814 28.50130 23.35433 19.69603
# [1] 36.48034 20.04680 28.06982 20.73189 37.34162 40.13864 31.66503 38.55255 32.57586
# [10] 34.46048 30.96945 36.56002 39.68646 37.84349 21.18885 19.68722 20.76120 38.86818
# [19] 32.41315 26.57874 40.93390 32.74553 31.41625 28.13726 26.70572
#alternative solution:
normal.samp <- rnorm(50, mean = 30, sd = 5)
runif(n = 25, min = min(normal.samp), max = median(normal.samp))
## [1] 18.30733 23.66720 20.47834 18.93364 28.57387 27.58160 29.68644
## [8] 23.92159 30.67031 31.65012 24.58033 25.83212 29.27750 30.31890
## [15] 22.52411 18.34373 23.14209 28.40405 21.16719 18.32079 25.17423
## [22] 25.78646 23.56804 28.08079 29.41126
# [1] 31.12966 19.21864 24.51438 19.56310 21.39904 23.57787 27.27583 29.42631 15.22104
# [10] 22.06297 29.24186 29.03857 17.40550 30.76306 17.89672 19.66210 19.94212 24.77739
#[19] 21.13504 26.23833 24.28667 26.43533 23.56719 17.68813 26.03520
# It should be 5. The mean of the sum of two random variables should be the sum of the means. In this case, that's 0 + 5.
sample.1 <- rnorm(1000, mean = 0, sd = 1)
sample.2 <- runif(1000, min = 0, max = 10)
sum.sample <- sample.1 + sample.2
mean(sum.sample)
## [1] 5.017553
# [1] 5.129353
sample(x = c(0, 1),
size = 100,
replace = T,
prob = c(.5, .5)
)
## [1] 0 1 1 0 1 0 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 0 0 0 0 1 0 1 1 1 1 1 1 0
## [36] 1 1 0 0 0 1 0 0 0 1 1 1 1 0 1 0 0 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0
## [71] 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 0
# [1] 1 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 0 1 1 1 0 0 0 1 0 1 1 1 1 0
# [45] 1 1 0 1 0 1 1 1 0 1 0 0 0 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0 1 0 1 1 0
# [89] 1 0 1 0 1 0 0 1 1 1 1 1
sample(x = c("Heads", "Tails"),
size = 100,
replace = T,
prob = c(.5, .5)
)
## [1] "Tails" "Heads" "Tails" "Tails" "Heads" "Tails" "Tails" "Heads"
## [9] "Heads" "Heads" "Tails" "Tails" "Tails" "Tails" "Tails" "Tails"
## [17] "Heads" "Tails" "Tails" "Heads" "Heads" "Heads" "Tails" "Tails"
## [25] "Tails" "Tails" "Heads" "Tails" "Heads" "Tails" "Heads" "Heads"
## [33] "Tails" "Tails" "Tails" "Tails" "Tails" "Heads" "Tails" "Heads"
## [41] "Heads" "Heads" "Tails" "Heads" "Tails" "Heads" "Tails" "Heads"
## [49] "Heads" "Tails" "Tails" "Tails" "Tails" "Tails" "Tails" "Heads"
## [57] "Tails" "Heads" "Tails" "Heads" "Heads" "Heads" "Tails" "Tails"
## [65] "Heads" "Tails" "Tails" "Heads" "Tails" "Heads" "Heads" "Heads"
## [73] "Tails" "Heads" "Tails" "Heads" "Heads" "Tails" "Heads" "Tails"
## [81] "Heads" "Heads" "Tails" "Heads" "Tails" "Tails" "Tails" "Tails"
## [89] "Tails" "Tails" "Heads" "Tails" "Heads" "Tails" "Tails" "Tails"
## [97] "Tails" "Tails" "Heads" "Tails"
# [1] "Tails" "Heads" "Tails" "Heads" "Tails" "Heads" "Heads" "Tails" "Tails" "Heads" #"Tails"
# [12] "Heads" "Heads" "Heads" "Tails" "Heads" "Heads" "Heads" "Tails" "Tails" "Tails" #"Tails"
# [23] "Tails" "Heads" "Heads" "Heads" "Tails" "Tails" "Heads" "Heads" "Heads" "Tails" #"Heads"
# [34] "Tails" "Heads" "Tails" "Heads" "Tails" "Tails" "Tails" "Heads" "Heads" "Tails" #"Heads"
# [45] "Heads" "Heads" "Heads" "Heads" "Tails" "Tails" "Heads" "Tails" "Tails" "Tails" #"Tails"
# [56] "Heads" "Tails" "Tails" "Tails" "Tails" "Heads" "Heads" "Tails" "Heads" "Tails" #"Tails"
# [67] "Heads" "Tails" "Heads" "Tails" "Tails" "Heads" "Tails" "Tails" "Tails" "Heads" #"Tails"
# [78] "Tails" "Tails" "Tails" "Heads" "Heads" "Heads" "Tails" "Tails" "Heads" "Tails" #"Heads"
# [89] "Heads" "Heads" "Tails" "Tails" "Tails" "Tails" "Heads" "Tails" "Heads" "Heads" #"Tails"
# [100] "Heads"
# vector representing the bag of balls
bag <- c(rep("red", 30), rep("green", 50), rep("yellow", 20))
# vector representing the probability of selecting each of the 100 balls
balls.prob <- rep(1/100, times = 100)
# Now, sample from the bag!
# With replacement
balls.replacement <-sample(x = bag,
size = 20,
replace = T,
prob = balls.prob
)
balls.replacement
## [1] "green" "green" "green" "green" "green" "green" "green"
## [8] "yellow" "green" "green" "yellow" "red" "green" "yellow"
## [15] "green" "yellow" "yellow" "red" "yellow" "green"
# [1] "red" "yellow" "yellow" "green" "green" "red" "green" "green" "green"
#[10] "green" "green" "green" "green" "red" "green" "red" "red" "green"
# [19] "green" "green"
# Without replacement
balls.noreplacement <- sample(x = bag,
size = 20,
replace = F,
prob = balls.prob
)
balls.noreplacement
## [1] "yellow" "green" "yellow" "red" "green" "green" "green"
## [8] "green" "red" "green" "green" "red" "green" "red"
## [15] "green" "red" "yellow" "red" "yellow" "yellow"
# [1] "green" "green" "red" "red" "red" "green" "red" "red" "red"
#[10] "red" "yellow" "red" "green" "green" "yellow" "red" "green" "green"
#[19] "green" "red"
Calculate the expected value of this gamble using the formula for expected values. Hint: Use vector multiplication and the function sum() Conduct a simulation using the functions sample() and mean() to test your calculation.
#Expected value
2000 * .1 + -100 * .9
## [1] 110
#[1] 110
#vector notation
outcomes <- c(2000, -100)
probabilities <- c(.1, .9)
sum(outcomes * probabilities)
## [1] 110
#[1] 110
# simulation
simulation <- sample(x = c(2000, -100),
replace = T,
size = 1e6,
prob = c(.1, .9)
)
mean(simulation)
## [1] 110.5124
#[1] 110.2478