####### QUESTION 1a #######
# In the 200m dash finals in the Olympics, 8 runners compete for 3 medals
#  (order matters). In the 2012 Olympics, 3 of the 8 runners were from Jamaica
#  and the other 5 were from different countries. The three medals were all
#  won by Jamaica (Usain Bolt, Yohan Blake, and Warren Weir).

install.packages(“grid.arrange”) install.packages(“combinat”) library(“combinat”) install.packages(“gtools”) install.packages(“tidyverse”) library(gtools) library(tidyverse)

####### QUESTION 1b #######
 #How many different ways can the 3 medals be distributed across 8 runners
#str(permutations(8,3))
P8 = factorial(8)/factorial(8-3)
P8
## [1] 336
####### QUESTION 1b #######
# How many different ways can the three medals be distributed among the 3 runners from Jamaica?
#str(permutations(3,3))
jamaica <- choose(3,3)*factorial(3)
jamaica
## [1] 6
####### QUESTION 1c #######
# What is the probability that all three medals are won by Jamaica?
# combinations(8,3)
jamaica/P8
## [1] 0.01785714
#### Question 2 ####
#Imagine deck of poker cards –13 different cards, 4 colors, hence 52 different cards. Obtain results analytically and via simulations of at least 1,000 runs for each of the following questions
#####Question 2a ####
#Imagine deck of poker cards – 13 different cards, 4 colors, hence 52 different cards. Obtain results analytically and via simulations of at least 1,000 runs for each of the following questions.


P5 <- 1*(12/51)*(11/50)*(10/49)*(9/48)
P5S <- c(13,5)/c(52,5)

#Simulated

set.seed(2)

red <- rep("red", 13)
green <- rep("green", 13)
black <- rep("black", 13)
yellow <- rep("yellow", 13)

cards <- c(red,green,black,yellow)

FiveCol <- c()

for(i in 1:1000) {
  draw <- sample(cards,5,replace=F)
  lucky <- max(table(draw))
  if(lucky == 5){
    FiveCol <- c(FiveCol, i)
  }
}

length(FiveCol)/1000
## [1] 0.002
######Question 2b ######
#You sample 2 cards at random without replacement, what is a probability of having two Kings?

PK <- (4/52)*(3/51)
PKS <- c(4,2)/c(52,2)

#Simulated

set.seed(2)

king <- rep("king", 4)
nking <- rep("nking",48)

cardsk <- c(king,nking)

TwoK <- c()

for(i in 1:1000) {
  drawk <- sample(cardsk,2,replace=F)
  
  if(drawk[1] == "king" & drawk[2]=="king"){
    TwoK <- c(TwoK, i)
  }
}

length(TwoK)/1000
## [1] 0.005
##### Question 2c ####

#What is a probability of having the second King in the second draw?
#without replacement

P2K = 3/51
P2K
## [1] 0.05882353
#with replacement

P2K1 = 4/52
P2K1
## [1] 0.07692308
#sequential order, if the first king is drawn

P2K2 = 1 * 3/51
P2K1
## [1] 0.07692308
#d. You sample 5 cards at random without replacement, what is a probability of having exactly
#three different colors of one kind of a card.

#All possible combination: 52!/5!(52−5)!
All_combinations <- factorial(52)/(factorial(5)*factorial(52-5))

(13*4*48*47/2)/All_combinations
## [1] 0.02256903
#### Question 3 ####
#Assuming having a standardized admissions test has a -0.25 point penalty for every incorrect answer and awards 1 point for a correct answer. The quantitative test consists of 8 multiple-choice questions each with 5 answer choices. Suppose a student chooses answers by guessing for all questions on the test.
####Question 3a
#Write down a probability function for total number of points obtained.
no_of_questions <- 44
award <- 1
penalty <- -0.25
p <- 1/5 #probability of guessing a question correctly

#total number of points obtained.
T = no_of_questions * p
T
## [1] 8.8
#What is an expected value and what is a standard error for total number of points?

expected_value <- (award*p) + (penalty * (1-p))
expected_value
## [1] 0
#standard deviation of total points#
T = no_of_questions * p

se <- sqrt(no_of_questions) * abs(penalty-award) * sqrt(p*(1-p))
se
## [1] 3.316625
#Simulate 1,000 students taking this test, guessing at random. What is their mean number of points and sample standard deviation?

B <- 10000
S <- replicate(B, {
  Results <- sample(c(award, penalty),no_of_questions, replace = TRUE, prob = c(p, 1-p))
  sum (Results)
})

m = mean(S)
m
## [1] 0.029375
#Standard deviation of an urn with two values
# abs(b - a) * sqrt(p(1 - p))
#4. In a previous example, assume a student knows answers to 80 % of questions.

8*80/100 #Hence student knows answer to 6 full questions. Each Q has a probability of 6.4/8 of being known by the student.
## [1] 6.4
#Now, each Q has 80% of being known, hence being answered correctly b the student. Which is the same if a test had 4 correct answers out of 5.
#a. What is his/her expected number of points and standard deviation assuming he/she skips
#answering those questions he/she does not know an answer to (and takes automatically 0
#points for these)?

#Incorrect in this case means 0, rather then -0.25

nq <- 8
cor <- +1
incor80 <- 0
pcor80 <- 4/5
pincor80 <- 1/5
points80 <- c(cor, cor, cor, cor, incor80)

set.seed(2)

j<-1
s80<-c()
repeat{
  s80[j]<-sum(sample(x=points80, size=nq, replace=T))
  j <- j+1
  if(j>(5^8))
    break
}

table(s80)
## s80
##      1      2      3      4      5      6      7      8 
##     44    467   3677  17903  57373 114843 130905  65413
#EV80 <- EVf(x=c(1,2,3,4,5,6,7,8),y=table(s80)/390625)

#EV2.80 <- EV2f(a=c(1,2,3,4,5,6,7,8),b=table(s80)/390625)

S=sqrt(var(s80))
S
## [1] 1.132909
#b. What is his/her expected number of points and standard deviation assuming he/she answers
#randomly those questions he/she does not know an answer to?

#In this case a wrong answer means -0.25.

nq <- 8
cor <- +1
incor <- -0.25
pcor80 <- 4/5
pincor80 <- 1/5
points80 <- c(cor, cor, cor, cor, incor)

set.seed(2)

o<-1
s80.2<-c()
repeat{
  s80.2[o]<-sum(sample(x=points80, size=nq, replace=T))
  o <- o+1
  if(o>(5^8))
    break
}

table(s80.2)
## s80.2
##  -0.75    0.5   1.75      3   4.25    5.5   6.75      8 
##     44    467   3677  17903  57373 114843 130905  65413
#EV80.2 <- EVf(x=c(-0.75,0.5,1.75,3,4.25,5.5,6.75,8),y=table(s80.2)/390625)

#EV2.80.2 <- EV2f(a=c(-0.75,0.5,1.75,3,4.25,5.5,6.75,8),b=table(s80.2)/390625)

sqrt(var(s80.2))
## [1] 1.416136
#c. Simulate 1,000 pairs of students. The first student from the pair knows answers to 80 % of
#question and the second knows answers to 90 % of questions. What is the probability, that
#the first student has higher number of total points than the second assuming they skip
#answering questions they do not know the answer to?

#In this case wrong answer means 0, and not -0.25

nq <- 8
cor <- +1
incor.pair <- 0
pcor80 <- 4/5
pincor80 <- 1/5
pcor90 <- 9/10
pincor90 <- 1/10
points.pair90 <- c(cor, cor, cor, cor,cor, cor, cor, cor,cor, incor)
points.pair80 <- c(cor,cor,cor,cor,incor.pair)

set.seed(2)

f<-1
pair1000.90<-c()
pair1000.80<-c()
repeat{
  pair1000.90[f]<-sum(sample(x=points.pair90, size=nq, replace=T))
  pair1000.80[f]<-sum(sample(x=points80, size=nq, replace=T))
  f <- f+1
  if(f>1000)
    break
}
table(pair1000.90)/1000
## pair1000.90
##  1.75     3  4.25   5.5  6.75     8 
## 0.001 0.004 0.022 0.155 0.370 0.448
table(pair1000.80)/1000
## pair1000.80
##  1.75     3  4.25   5.5  6.75     8 
## 0.010 0.047 0.146 0.272 0.356 0.169
table(pair1000.80>pair1000.90)
## 
## FALSE  TRUE 
##   836   164
164/1000 #Probability of 1st student (80%) having more than 2nd student (90%)
## [1] 0.164
##### Question 5 ####
#For a random variable with following probability density function calculate cumulative distribution function and quantile function. Calculate expected value, standard deviation, median and inter-quartile range. Plot probability density function and highlight expected value, median, range of E(X) ± S.D.(X) and inter-quartile range. Plot cumulative distribution function and quantile function and highlight quartiles (median included) in both.
#fx <- function(x) {1/26*(3*x^2)}
#Cumulative distribution function: X^3/26
Fx <- function(x) {(x^3-1)/26}
#Quantile function: XP=(26*P)^(1/3)
Qp <- function(P) {(26*P+1)^(1/3)}
#expected value:
EXcalc <- function(x) {x*1/26*(3*x^2)}
(EX <- integrate(EXcalc, 1, 3)$value)
## [1] 2.307692
#Expectation of X^2:
EX2calc <- function(x) {x^2*1/26*(3*x^2)}
(EX2 <- integrate(EX2calc, 1, 3)$value)
## [1] 5.584615
#Standard deviation:
Std = sqrt(EX2-EX)
Std
## [1] 1.810227
#Median:
Qp = function(P){(26*P+1)^(1/3)}
Qp(0.5) 
## [1] 2.410142
#Interquartile range:
Qp(0.75)-Qp(0.25) 
## [1] 0.779418
#install.packages("gridExtra")
library(ggplot2)
library(gridExtra)
x = seq(1,3,0.02)
p = seq(0,1,0.01)
Fx = (x^3-1)/26
Qp = (26*p+1)^(1/3)
data = data.frame(x, p, Fx, Qp)
plot_Fx = ggplot(data) + geom_line(mapping = aes(x = x, y  = Fx)) + 
  geom_line(data = data.frame(x = c(-1, 1), y = c(0,0)), mapping = aes(x = x, y = y))+
  geom_line(data = data.frame(x = c(3, 4), y = c(1,1)), mapping = aes(x = x, y = y))+
  geom_hline(yintercept = c(0.25, 0.5, 0.75), col = "red")+
  scale_y_continuous("F(x)")
plot_Px = ggplot(data) + geom_line(mapping = aes(x = p, y  = Qp)) + 
  geom_vline(xintercept = c(0.25, 0.5, 0.75), col = "red") + 
  scale_y_continuous("Q(P)")
grid.arrange(plot_Fx, plot_Px)