CUNY 606

3.2

### Write function for shading
shade_area_normal <- function(shade_lower=-3, shade_upper=-1, xlim_lower=-3, xlim_upper=3, add=F){
  # Create data for the area to shade
  cord.x <- c(shade_lower,seq(shade_lower,shade_upper,0.01),shade_upper) 
  cord.y <- c(0,dnorm(seq(shade_lower,shade_upper,0.01)),0) 
  
  if (add==F){ 
    # Make a curve
    curve(dnorm(x,0,1), xlim=c(xlim_lower,xlim_upper), main='Standard Normal') }
   
  # Add the shaded area.
  polygon(cord.x,cord.y,col='skyblue')
}

#(a)
pnorm(-1.35)

## [1] 0.08850799

shade_area_normal(-3, -1.35)

#(b)
pnorm(1.48, lower.tail = F)

## [1] 0.06943662

shade_area_normal(1.48, 3)

#(c)
pnorm(1.5)-pnorm(-0.4)

## [1] 0.5886145

shade_area_normal(-0.4, 1.5)

#(d)
pnorm(-2)+pnorm(2, lower.tail = F)

## [1] 0.04550026

shade_area_normal(-3, -2)
shade_area_normal(2, 3, add=T)

3.4

#(a)
# Normal(4313, 583) and Normal(5261, 807)

#(b)
leo_z <- (4948-4313)/583
mary_z <- (5513-5261)/807
leo_z

## [1] 1.089194

mary_z

## [1] 0.3122677

# The z-scores tell us how many standard deviations their finishing times were higher or lower than average. In this case both of them were higher (i.e., worse/slower) than average.

#(c)
# Mary's z-score is better than Leo because it is lower, i.e., faster relative to average.

#(d)
pnorm(leo_z, lower.tail = F)

## [1] 0.1380342

#(e)
pnorm(mary_z, lower.tail = F)

## [1] 0.3774186

#(f)
# Yes, z-score analysis is based on an assumption of normality.

3.18

#(a) Yes, it does:
heights <- c(54,55,56,56,57,58,58,59,60,60,60,61,61,62,62,63,63,63,64,65,65,67,67,69,73)
heights_z <- (heights-mean(heights))/sd(heights)
sum(heights_z> -1 & heights_z<1)/length(heights_z)

## [1] 0.68

sum(heights_z> -2 & heights_z<2)/length(heights_z)

## [1] 0.96

sum(heights_z> -3 & heights_z<3)/length(heights_z)

## [1] 1

#(b) Yes, it looks reasonable. The density curve fits the histogram decently, and the qqnorm plot looks fairly linear, particularly for the sample size.

3.22

#(a)
p <- 0.02
q <- 1-p
n <- 10
q^9*p

## [1] 0.01667496

#(b)
q^100

## [1] 0.1326196

#(c)
1/p   #mean

## [1] 50

sqrt(q/p^2)   #sd

## [1] 49.49747

#(d)
p <- 0.05
1/p   #mean

## [1] 20

sqrt(q/p^2)   #sd

## [1] 19.79899

#(e)
# Increasing the probability has the effect of lowering the mean and standard deviation of the geometric distribution.

3.38

#(a)
p_b <- 0.51
p_g <- 1-p_b
dbinom(2, 3, p_b)

## [1] 0.382347

#(b)
p_b*p_b*p_g + p_b*p_g*p_b + p_g*p_b*p_b

## [1] 0.382347

#(c)
# Because:
choose(8,3)

## [1] 56

# is a lot of combinations.

3.42

#(a)
p <- 0.15
n <- 10
k <- 3
choose(n-1, k-1)*p^k*(1-p)^(n-k)

## [1] 0.03895012

#(b)
p

## [1] 0.15

#(c)
# The probability in (a) is lower than in (b) because getting to the 10th trial with two successes is itself improbable.

CUNY 606

Chapter 3 HW Assignment

mehtablocker

February 26, 2019

3.2

3.4

3.18

3.22

3.38

3.42