####### QUESTION 1a #######
# In the 200m dash finals in the Olympics, 8 runners compete for 3 medals
# (order matters). In the 2012 Olympics, 3 of the 8 runners were from Jamaica
# and the other 5 were from different countries. The three medals were all
# won by Jamaica (Usain Bolt, Yohan Blake, and Warren Weir).
install.packages(“grid.arrange”) install.packages(“combinat”) library(“combinat”) install.packages(“gtools”) install.packages(“tidyverse”) library(gtools) library(tidyverse)
####### QUESTION 1b #######
#How many different ways can the 3 medals be distributed across 8 runners
#str(permutations(8,3))
P8 = factorial(8)/factorial(8-3)
P8
## [1] 336
####### QUESTION 1b #######
# How many different ways can the three medals be distributed among the 3 runners from Jamaica?
#str(permutations(3,3))
jamaica <- choose(3,3)*factorial(3)
jamaica
## [1] 6
####### QUESTION 1c #######
# What is the probability that all three medals are won by Jamaica?
# combinations(8,3)
jamaica/P8
## [1] 0.01785714
#### Question 2 ####
#Imagine deck of poker cards –13 different cards, 4 colors, hence 52 different cards. Obtain results analytically and via simulations of at least 1,000 runs for each of the following questions
#####Question 2a ####
#Imagine deck of poker cards – 13 different cards, 4 colors, hence 52 different cards. Obtain results analytically and via simulations of at least 1,000 runs for each of the following questions.
P5 <- 1*(12/51)*(11/50)*(10/49)*(9/48)
P5S <- c(13,5)/c(52,5)
#Simulated
set.seed(2)
red <- rep("red", 13)
green <- rep("green", 13)
black <- rep("black", 13)
yellow <- rep("yellow", 13)
cards <- c(red,green,black,yellow)
FiveCol <- c()
for(i in 1:1000) {
draw <- sample(cards,5,replace=F)
lucky <- max(table(draw))
if(lucky == 5){
FiveCol <- c(FiveCol, i)
}
}
length(FiveCol)/1000
## [1] 0.002
######Question 2b ######
#You sample 2 cards at random without replacement, what is a probability of having two Kings?
PK <- (4/52)*(3/51)
PKS <- c(4,2)/c(52,2)
#Simulated
set.seed(2)
king <- rep("king", 4)
nking <- rep("nking",48)
cardsk <- c(king,nking)
TwoK <- c()
for(i in 1:1000) {
drawk <- sample(cardsk,2,replace=F)
if(drawk[1] == "king" & drawk[2]=="king"){
TwoK <- c(TwoK, i)
}
}
length(TwoK)/1000
## [1] 0.005
##### Question 2c ####
#What is a probability of having the second King in the second draw?
#without replacement
P2K = 3/51
P2K
## [1] 0.05882353
#with replacement
P2K1 = 4/52
P2K1
## [1] 0.07692308
#sequential order, if the first king is drawn
P2K2 = 1 * 3/51
P2K1
## [1] 0.07692308
#d. You sample 5 cards at random without replacement, what is a probability of having exactly
#three different colors of one kind of a card.
#All possible combination: 52!/5!(52−5)!
All_combinations <- factorial(52)/(factorial(5)*factorial(52-5))
(13*4*48*47/2)/All_combinations
## [1] 0.02256903
#### Question 3 ####
#Assuming having a standardized admissions test has a -0.25 point penalty for every incorrect answer and awards 1 point for a correct answer. The quantitative test consists of 8 multiple-choice questions each with 5 answer choices. Suppose a student chooses answers by guessing for all questions on the test.
####Question 3a
#Write down a probability function for total number of points obtained.
no_of_questions <- 44
award <- 1
penalty <- -0.25
p <- 1/5 #probability of guessing a question correctly
#total number of points obtained.
T = no_of_questions * p
T
## [1] 8.8
#What is an expected value and what is a standard error for total number of points?
expected_value <- (award*p) + (penalty * (1-p))
expected_value
## [1] 0
#standard deviation of total points#
T = no_of_questions * p
se <- sqrt(no_of_questions) * abs(penalty-award) * sqrt(p*(1-p))
se
## [1] 3.316625
#Simulate 1,000 students taking this test, guessing at random. What is their mean number of points and sample standard deviation?
B <- 10000
S <- replicate(B, {
Results <- sample(c(award, penalty),no_of_questions, replace = TRUE, prob = c(p, 1-p))
sum (Results)
})
m = mean(S)
m
## [1] 0.029375
#Standard deviation of an urn with two values
# abs(b - a) * sqrt(p(1 - p))
#4. In a previous example, assume a student knows answers to 80 % of questions.
8*80/100 #Hence student knows answer to 6 full questions. Each Q has a probability of 6.4/8 of being known by the student.
## [1] 6.4
#Now, each Q has 80% of being known, hence being answered correctly b the student. Which is the same if a test had 4 correct answers out of 5.
#a. What is his/her expected number of points and standard deviation assuming he/she skips
#answering those questions he/she does not know an answer to (and takes automatically 0
#points for these)?
#Incorrect in this case means 0, rather then -0.25
nq <- 8
cor <- +1
incor80 <- 0
pcor80 <- 4/5
pincor80 <- 1/5
points80 <- c(cor, cor, cor, cor, incor80)
set.seed(2)
j<-1
s80<-c()
repeat{
s80[j]<-sum(sample(x=points80, size=nq, replace=T))
j <- j+1
if(j>(5^8))
break
}
table(s80)
## s80
## 1 2 3 4 5 6 7 8
## 44 467 3677 17903 57373 114843 130905 65413
#EV80 <- EVf(x=c(1,2,3,4,5,6,7,8),y=table(s80)/390625)
#EV2.80 <- EV2f(a=c(1,2,3,4,5,6,7,8),b=table(s80)/390625)
S=sqrt(var(s80))
S
## [1] 1.132909
#b. What is his/her expected number of points and standard deviation assuming he/she answers
#randomly those questions he/she does not know an answer to?
#In this case a wrong answer means -0.25.
nq <- 8
cor <- +1
incor <- -0.25
pcor80 <- 4/5
pincor80 <- 1/5
points80 <- c(cor, cor, cor, cor, incor)
set.seed(2)
o<-1
s80.2<-c()
repeat{
s80.2[o]<-sum(sample(x=points80, size=nq, replace=T))
o <- o+1
if(o>(5^8))
break
}
table(s80.2)
## s80.2
## -0.75 0.5 1.75 3 4.25 5.5 6.75 8
## 44 467 3677 17903 57373 114843 130905 65413
#EV80.2 <- EVf(x=c(-0.75,0.5,1.75,3,4.25,5.5,6.75,8),y=table(s80.2)/390625)
#EV2.80.2 <- EV2f(a=c(-0.75,0.5,1.75,3,4.25,5.5,6.75,8),b=table(s80.2)/390625)
sqrt(var(s80.2))
## [1] 1.416136
#c. Simulate 1,000 pairs of students. The first student from the pair knows answers to 80 % of
#question and the second knows answers to 90 % of questions. What is the probability, that
#the first student has higher number of total points than the second assuming they skip
#answering questions they do not know the answer to?
#In this case wrong answer means 0, and not -0.25
nq <- 8
cor <- +1
incor.pair <- 0
pcor80 <- 4/5
pincor80 <- 1/5
pcor90 <- 9/10
pincor90 <- 1/10
points.pair90 <- c(cor, cor, cor, cor,cor, cor, cor, cor,cor, incor)
points.pair80 <- c(cor,cor,cor,cor,incor.pair)
set.seed(2)
f<-1
pair1000.90<-c()
pair1000.80<-c()
repeat{
pair1000.90[f]<-sum(sample(x=points.pair90, size=nq, replace=T))
pair1000.80[f]<-sum(sample(x=points80, size=nq, replace=T))
f <- f+1
if(f>1000)
break
}
table(pair1000.90)/1000
## pair1000.90
## 1.75 3 4.25 5.5 6.75 8
## 0.001 0.004 0.022 0.155 0.370 0.448
table(pair1000.80)/1000
## pair1000.80
## 1.75 3 4.25 5.5 6.75 8
## 0.010 0.047 0.146 0.272 0.356 0.169
table(pair1000.80>pair1000.90)
##
## FALSE TRUE
## 836 164
164/1000 #Probability of 1st student (80%) having more than 2nd student (90%)
## [1] 0.164
##### Question 5 ####
#For a random variable with following probability density function calculate cumulative distribution function and quantile function. Calculate expected value, standard deviation, median and inter-quartile range. Plot probability density function and highlight expected value, median, range of E(X) ± S.D.(X) and inter-quartile range. Plot cumulative distribution function and quantile function and highlight quartiles (median included) in both.
#fx <- function(x) {1/26*(3*x^2)}
#Cumulative distribution function: X^3/26
Fx <- function(x) {(x^3-1)/26}
#Quantile function: XP=(26*P)^(1/3)
Qp <- function(P) {(26*P+1)^(1/3)}
#expected value:
EXcalc <- function(x) {x*1/26*(3*x^2)}
(EX <- integrate(EXcalc, 1, 3)$value)
## [1] 2.307692
#Expectation of X^2:
EX2calc <- function(x) {x^2*1/26*(3*x^2)}
(EX2 <- integrate(EX2calc, 1, 3)$value)
## [1] 5.584615
#Standard deviation:
Std = sqrt(EX2-EX)
Std
## [1] 1.810227
#Median:
Qp = function(P){(26*P+1)^(1/3)}
Qp(0.5)
## [1] 2.410142
#Interquartile range:
Qp(0.75)-Qp(0.25)
## [1] 0.779418
#install.packages("gridExtra")
library(ggplot2)
library(gridExtra)
x = seq(1,3,0.02)
p = seq(0,1,0.01)
Fx = (x^3-1)/26
Qp = (26*p+1)^(1/3)
data = data.frame(x, p, Fx, Qp)
plot_Fx = ggplot(data) + geom_line(mapping = aes(x = x, y = Fx)) +
geom_line(data = data.frame(x = c(-1, 1), y = c(0,0)), mapping = aes(x = x, y = y))+
geom_line(data = data.frame(x = c(3, 4), y = c(1,1)), mapping = aes(x = x, y = y))+
geom_hline(yintercept = c(0.25, 0.5, 0.75), col = "red")+
scale_y_continuous("F(x)")
plot_Px = ggplot(data) + geom_line(mapping = aes(x = p, y = Qp)) +
geom_vline(xintercept = c(0.25, 0.5, 0.75), col = "red") +
scale_y_continuous("Q(P)")
grid.arrange(plot_Fx, plot_Px)