Completed 17/17

1

tab <- matrix(c(0.9212, 0.7455, 0.9932, 0.4482, 1.0, 0.7703), ncol=2, byrow=TRUE)
colnames(tab) <- c('Fingerprint experts','Novices')
rownames(tab) <- c('Match','Similar Distracter','Non-similar Distracter')
tab <- as.table(tab)
tab
##                        Fingerprint experts Novices
## Match                               0.9212  0.7455
## Similar Distracter                  0.9932  0.4482
## Non-similar Distracter              1.0000  0.7703

a

#this is the probability that the expert is wrong given a matched set of prints
prob <- (1-tab[1,1])/(sum(tab["Match",]))
prob
## [1] 0.04727905

b

#this is the probability that the novice is wrong given a matched set of prints
prob <- (1-tab[1,2])/(sum(tab["Match",]))
prob
## [1] 0.1526969

c

The participant is more likely to be a novice. Because the probability of a novice getting a matching pair wrong is higher than that of an expert.

2

tab <- matrix(c(50, 9, 50, 891), ncol=2, byrow=TRUE)
colnames(tab) <- c('Users','Non-users')
rownames(tab) <- c('Positive','Negative')
tab <- as.table(tab)
addmargins(tab)
##          Users Non-users  Sum
## Positive    50         9   59
## Negative    50       891  941
## Sum        100       900 1000

a

#probability of a positive result given the athlete is a user

prob1 <- (tab[1,1])/(sum(tab[,"Users"]))
prob1
## [1] 0.5

b

#probability of a negative result given the athlete is a non-user

prob <- (tab[2,2])/(sum(tab[,"Non-users"]))
prob
## [1] 0.99

c

# probability that the athlete is a user given a positive result using Baye's rule

prob_u <- (sum(tab[,"Users"]))/(1000)

prob_not_u <- (sum(tab[,"Non-users"]))/(1000)

prob2 <- (tab[1,2])/(sum(tab[,"Non-users"]))

prob_u_given_pos <- (prob_u*prob1)/(prob_u*prob1 +prob_not_u*prob2)

prob_u_given_pos
## [1] 0.8474576

3

Base Case:

You have a set \(n_{1}\) by taking 1 element from this set you can create \(n_{1}\) different samples. Thus, \(k=n_{1}\)

Induction Hypothesis:

Assume for sets \(n_{1}, n_{2},...,n_{k}\) the number of samples is \(n_{1}*n_{2}*...*n_{k}\).

Consider \(k+1\) sets. Then each sample from our hypothesis can be paired with an element from \(n_{k+1}\). Thus, making the number of samples \((n_{1}*n_{2}*...*n_{k})*n_{k+1}=n_{1}*n_{2}*...*n_{k}*n_{k+1}\). Therefore the Multiplicative rule holds for all \(k \in \mathbb{N}\)

4-6

7

tab <- matrix(c(0.09, 0.3, 0.37, 0.2, 0.04), ncol=5, byrow=TRUE)
colnames(tab) <- c('0','1','2','3','4')
rownames(tab) <- c('p(y)')
tab <- as.table(tab)
tab
##         0    1    2    3    4
## p(y) 0.09 0.30 0.37 0.20 0.04

a

sum(tab["p(y)",])
## [1] 1

b

#probability of y=3 or y=4

prob3 <- tab[1,4]
prob4 <- tab[1,5]
prob3_or_4 = prob3 + prob4
prob3_or_4
## [1] 0.24

c

#probability of y<2 which means the probability of y<=1

prob <- tab[1,1]+tab[1,2]
prob
## [1] 0.39

8

tab <- matrix(c(0, .17,1, .10,2, .11,3, .11,4, .10,5, .10,6, .07,7, .05,8, .03,9, .02,10, .02,11, .02,12, .02,13, .02,14, .01,15, .01,16, .01,17, .01,18, .01,19, .005,20, .005), ncol=2, byrow=TRUE)
colnames(tab) <- c('Number of “apps” used, y ','p(y)')
tab <- as.table(tab)
tab
##   Number of “apps” used, y    p(y)
## A                     0.000  0.170
## B                     1.000  0.100
## C                     2.000  0.110
## D                     3.000  0.110
## E                     4.000  0.100
## F                     5.000  0.100
## G                     6.000  0.070
## H                     7.000  0.050
## I                     8.000  0.030
## J                     9.000  0.020
## K                    10.000  0.020
## L                    11.000  0.020
## M                    12.000  0.020
## N                    13.000  0.020
## O                    14.000  0.010
## P                    15.000  0.010
## Q                    16.000  0.010
## R                    17.000  0.010
## S                    18.000  0.010
## T                    19.000  0.005
## U                    20.000  0.005

a

#1. all probabilities are between 0 and 1

outlier <- tab[tab[,"p(y)"]>1 && tab[,"p(y)"]<0]
outlier
## numeric(0)
#2. the sum of the probabilities is 1

sum(tab[,"p(y)"])
## [1] 1

b

#probability that y>=10

prob <- tab[11,2]+tab[12,2]+tab[13,2]+tab[14,2]+tab[15,2]+tab[16,2]+tab[17,2]+tab[18,2]+tab[19,2]+tab[20,2]+tab[21,2]
prob
## [1] 0.14

c

data <- tab[,"p(y)"]
mean(data)
## [1] 0.04761905
(sd(data))^2
## [1] 0.002316548

d

#the interval from 0 to 20 contains at least a 75% probability

sum(tab[,"p(y)"])
## [1] 1

9

\(Y= # of foreign students\)

\(n=25\)

\(p=0.7\)

a

dbinom(10,25,0.7)
## [1] 0.001324897

b

pbinom(5,25,0.7)
## [1] 3.457444e-07

c

\[\mu = n*p=25*0.7=17.5\] \[\sigma=\sqrt{n*p*q}=\sqrt{25*0.7*0.3}=2.29\]

d

Approximately 18 non-foreign students out of 25 get their PhD’s with a standard deviation of 2.29.

10

Number of tracks is 10 and the number of trains is 50.

a

There are \(\frac{50!}{40!}\) ways to arrange the trains on the tracks.

# probability of 5 trains being placed on each track

#number of ways to group the trains
N <-(factorial(50))/(factorial(45))

#total number of ways to group the trains
N1 <- (factorial(50))/(factorial(40))

P <- N/N1
P
## [1] 6.820767e-09

b

#probability that less than 2 trains are sent to track 1

prob <- dbinom(0,50,0.1)+dbinom(1,50,0.1)
prob
## [1] 0.03378586

11

Y represents the number of consumers who must be interviewed until one indicates something other than information given directly on the product’s label or packaging as the reason a product is green.

tab <- matrix(c("Reason for saying a product is green","Percentage of consumers","Certification mark on label", 45,"Packaging", 15,"Reading information about the product", 12,"Advertisement", 6,"Brand website", 4,"Other", 18,"TOTAL", 100), ncol=2, byrow=TRUE)
colnames(tab) <- c()
rownames(tab) <- c()
tab <- as.table(tab)
tab
##   A                                     B                      
## A Reason for saying a product is green  Percentage of consumers
## B Certification mark on label           45                     
## C Packaging                             15                     
## D Reading information about the product 12                     
## E Advertisement                         6                      
## F Brand website                         4                      
## G Other                                 18                     
## H TOTAL                                 100

a

p=(12+6+4+18)/(100)
p
## [1] 0.4

The probability distribution is \[P(Y=y)=pq^{y-1}\]

b

#expected value of a geometric distribution

E=1/p
E
## [1] 2.5

c

dgeom(1,0.4)
## [1] 0.24

d

1-pgeom(2,0.4)
## [1] 0.216

12

probability of treating on site \(P=\frac{8}{209}=0.0383\).

a

#expected value of treating on site in a sample of 10 

p=0.0383
n=10

E=n*p
E
## [1] 0.383

Thus, it is expected that a max of 1 facility treats hazardous waste on site.

b

dhyper(4,8,201,10)
## [1] 0.0001688459

13

\(E(y)=0.03\)

a

\(V(y)=E(y)=0.03\)

b

These are the assumptions the researchers have to make.

  1. The experiment consists of counting the number of times Y a particular (rare) event occurs during a given unit of time or in a given area or volume (or weight, distance, or any other unit of measurement).

  2. The probability that an event occurs in a given unit of time, area, or volume is the same for all the units. Also, units are mutually exclusive.

  3. The number of events that occur in one unit of time, area, or volume is independent of the number that occur in other units.

c

dpois(0,0.03)
## [1] 0.9704455

14

c

F <- function(y){(2/3)*(2*y-((y^2)/2))}
F(0.4)
## [1] 0.48

d

F(0.6)-F(0.1)
## [1] 0.55

15

16

Normal with,

\(\mu=50\)

\(\sigma= 3.2\)

a

1-pnorm(45,50,3.2)
## [1] 0.9409149

b

pnorm(55,50,3.2)
## [1] 0.9409149

c

pnorm(52,50,3.2)-pnorm(51,50,3.2)
## [1] 0.1113448

17

crash <- read.csv("CRASH.csv")
sev <- crash$DRIVHEAD
mean <- 605
standdev <- 185

a

pnorm(700,605,185)-pnorm(500,605,185)
## [1] 0.4110396

b

pnorm(500,605,185)-pnorm(400,605,185)
## [1] 0.1512568

c

pnorm(850,605,185)
## [1] 0.9073023

d

1-pnorm(1000,605,185)
## [1] 0.01637499

e

qnorm(0.9,605,185)
## [1] 842.087