Completed 17/17
tab <- matrix(c(0.9212, 0.7455, 0.9932, 0.4482, 1.0, 0.7703), ncol=2, byrow=TRUE)
colnames(tab) <- c('Fingerprint experts','Novices')
rownames(tab) <- c('Match','Similar Distracter','Non-similar Distracter')
tab <- as.table(tab)
tab
## Fingerprint experts Novices
## Match 0.9212 0.7455
## Similar Distracter 0.9932 0.4482
## Non-similar Distracter 1.0000 0.7703
#this is the probability that the expert is wrong given a matched set of prints
prob <- (1-tab[1,1])/(sum(tab["Match",]))
prob
## [1] 0.04727905
#this is the probability that the novice is wrong given a matched set of prints
prob <- (1-tab[1,2])/(sum(tab["Match",]))
prob
## [1] 0.1526969
The participant is more likely to be a novice. Because the probability of a novice getting a matching pair wrong is higher than that of an expert.
tab <- matrix(c(50, 9, 50, 891), ncol=2, byrow=TRUE)
colnames(tab) <- c('Users','Non-users')
rownames(tab) <- c('Positive','Negative')
tab <- as.table(tab)
addmargins(tab)
## Users Non-users Sum
## Positive 50 9 59
## Negative 50 891 941
## Sum 100 900 1000
#probability of a positive result given the athlete is a user
prob1 <- (tab[1,1])/(sum(tab[,"Users"]))
prob1
## [1] 0.5
#probability of a negative result given the athlete is a non-user
prob <- (tab[2,2])/(sum(tab[,"Non-users"]))
prob
## [1] 0.99
# probability that the athlete is a user given a positive result using Baye's rule
prob_u <- (sum(tab[,"Users"]))/(1000)
prob_not_u <- (sum(tab[,"Non-users"]))/(1000)
prob2 <- (tab[1,2])/(sum(tab[,"Non-users"]))
prob_u_given_pos <- (prob_u*prob1)/(prob_u*prob1 +prob_not_u*prob2)
prob_u_given_pos
## [1] 0.8474576
Base Case:
You have a set \(n_{1}\) by taking 1 element from this set you can create \(n_{1}\) different samples. Thus, \(k=n_{1}\)
Induction Hypothesis:
Assume for sets \(n_{1}, n_{2},...,n_{k}\) the number of samples is \(n_{1}*n_{2}*...*n_{k}\).
Consider \(k+1\) sets. Then each sample from our hypothesis can be paired with an element from \(n_{k+1}\). Thus, making the number of samples \((n_{1}*n_{2}*...*n_{k})*n_{k+1}=n_{1}*n_{2}*...*n_{k}*n_{k+1}\). Therefore the Multiplicative rule holds for all \(k \in \mathbb{N}\)
tab <- matrix(c(0.09, 0.3, 0.37, 0.2, 0.04), ncol=5, byrow=TRUE)
colnames(tab) <- c('0','1','2','3','4')
rownames(tab) <- c('p(y)')
tab <- as.table(tab)
tab
## 0 1 2 3 4
## p(y) 0.09 0.30 0.37 0.20 0.04
sum(tab["p(y)",])
## [1] 1
#probability of y=3 or y=4
prob3 <- tab[1,4]
prob4 <- tab[1,5]
prob3_or_4 = prob3 + prob4
prob3_or_4
## [1] 0.24
#probability of y<2 which means the probability of y<=1
prob <- tab[1,1]+tab[1,2]
prob
## [1] 0.39
tab <- matrix(c(0, .17,1, .10,2, .11,3, .11,4, .10,5, .10,6, .07,7, .05,8, .03,9, .02,10, .02,11, .02,12, .02,13, .02,14, .01,15, .01,16, .01,17, .01,18, .01,19, .005,20, .005), ncol=2, byrow=TRUE)
colnames(tab) <- c('Number of “apps” used, y ','p(y)')
tab <- as.table(tab)
tab
## Number of “apps” used, y p(y)
## A 0.000 0.170
## B 1.000 0.100
## C 2.000 0.110
## D 3.000 0.110
## E 4.000 0.100
## F 5.000 0.100
## G 6.000 0.070
## H 7.000 0.050
## I 8.000 0.030
## J 9.000 0.020
## K 10.000 0.020
## L 11.000 0.020
## M 12.000 0.020
## N 13.000 0.020
## O 14.000 0.010
## P 15.000 0.010
## Q 16.000 0.010
## R 17.000 0.010
## S 18.000 0.010
## T 19.000 0.005
## U 20.000 0.005
#1. all probabilities are between 0 and 1
outlier <- tab[tab[,"p(y)"]>1 && tab[,"p(y)"]<0]
outlier
## numeric(0)
#2. the sum of the probabilities is 1
sum(tab[,"p(y)"])
## [1] 1
#probability that y>=10
prob <- tab[11,2]+tab[12,2]+tab[13,2]+tab[14,2]+tab[15,2]+tab[16,2]+tab[17,2]+tab[18,2]+tab[19,2]+tab[20,2]+tab[21,2]
prob
## [1] 0.14
data <- tab[,"p(y)"]
mean(data)
## [1] 0.04761905
(sd(data))^2
## [1] 0.002316548
#the interval from 0 to 20 contains at least a 75% probability
sum(tab[,"p(y)"])
## [1] 1
\(Y= # of foreign students\)
\(n=25\)
\(p=0.7\)
dbinom(10,25,0.7)
## [1] 0.001324897
pbinom(5,25,0.7)
## [1] 3.457444e-07
\[\mu = n*p=25*0.7=17.5\] \[\sigma=\sqrt{n*p*q}=\sqrt{25*0.7*0.3}=2.29\]
Approximately 18 non-foreign students out of 25 get their PhD’s with a standard deviation of 2.29.
Number of tracks is 10 and the number of trains is 50.
There are \(\frac{50!}{40!}\) ways to arrange the trains on the tracks.
# probability of 5 trains being placed on each track
#number of ways to group the trains
N <-(factorial(50))/(factorial(45))
#total number of ways to group the trains
N1 <- (factorial(50))/(factorial(40))
P <- N/N1
P
## [1] 6.820767e-09
#probability that less than 2 trains are sent to track 1
prob <- dbinom(0,50,0.1)+dbinom(1,50,0.1)
prob
## [1] 0.03378586
Y represents the number of consumers who must be interviewed until one indicates something other than information given directly on the product’s label or packaging as the reason a product is green.
tab <- matrix(c("Reason for saying a product is green","Percentage of consumers","Certification mark on label", 45,"Packaging", 15,"Reading information about the product", 12,"Advertisement", 6,"Brand website", 4,"Other", 18,"TOTAL", 100), ncol=2, byrow=TRUE)
colnames(tab) <- c()
rownames(tab) <- c()
tab <- as.table(tab)
tab
## A B
## A Reason for saying a product is green Percentage of consumers
## B Certification mark on label 45
## C Packaging 15
## D Reading information about the product 12
## E Advertisement 6
## F Brand website 4
## G Other 18
## H TOTAL 100
p=(12+6+4+18)/(100)
p
## [1] 0.4
The probability distribution is \[P(Y=y)=pq^{y-1}\]
#expected value of a geometric distribution
E=1/p
E
## [1] 2.5
dgeom(1,0.4)
## [1] 0.24
1-pgeom(2,0.4)
## [1] 0.216
probability of treating on site \(P=\frac{8}{209}=0.0383\).
#expected value of treating on site in a sample of 10
p=0.0383
n=10
E=n*p
E
## [1] 0.383
Thus, it is expected that a max of 1 facility treats hazardous waste on site.
dhyper(4,8,201,10)
## [1] 0.0001688459
\(E(y)=0.03\)
\(V(y)=E(y)=0.03\)
These are the assumptions the researchers have to make.
The experiment consists of counting the number of times Y a particular (rare) event occurs during a given unit of time or in a given area or volume (or weight, distance, or any other unit of measurement).
The probability that an event occurs in a given unit of time, area, or volume is the same for all the units. Also, units are mutually exclusive.
The number of events that occur in one unit of time, area, or volume is independent of the number that occur in other units.
dpois(0,0.03)
## [1] 0.9704455
F <- function(y){(2/3)*(2*y-((y^2)/2))}
F(0.4)
## [1] 0.48
F(0.6)-F(0.1)
## [1] 0.55
Normal with,
\(\mu=50\)
\(\sigma= 3.2\)
1-pnorm(45,50,3.2)
## [1] 0.9409149
pnorm(55,50,3.2)
## [1] 0.9409149
pnorm(52,50,3.2)-pnorm(51,50,3.2)
## [1] 0.1113448
crash <- read.csv("CRASH.csv")
sev <- crash$DRIVHEAD
mean <- 605
standdev <- 185
pnorm(700,605,185)-pnorm(500,605,185)
## [1] 0.4110396
pnorm(500,605,185)-pnorm(400,605,185)
## [1] 0.1512568
pnorm(850,605,185)
## [1] 0.9073023
1-pnorm(1000,605,185)
## [1] 0.01637499
qnorm(0.9,605,185)
## [1] 842.087