Problem 1

\[X \sim N(\mu, \sigma) \\ \mu = 100 \\ \sigma = 15\]

a)

\[\bar{x} \sim N(\mu, \sigma_{\bar{x}}) \\ \sigma_{\bar{x}} = \frac{\sigma}{\sqrt{n}} \\ n = 25 \\ \bar{x} \sim N(100,3) \]

b)

\[\text{let}\ Z = \frac{X-\mu}{\sigma} = 2.7\overline{33}\\ \mathbb{P}[X < 141] \implies \mathbb{P}[Z<2.7\overline{33}] \approx 0.9969\]

c)

qnorm(.99,100,15)
## [1] 134.8952

d)

\[\text{let:}\ \bar{x} = 118,\ s=20,\ n =100,\ s_{\bar{x}} = 2,\ \alpha = 0.05,\ t_{\alpha/2,99} \approx 1.98 \\ \mathbb{P}[\bar{x}-t_{\alpha/2,99}s_{\bar{x}} < \mu_{UT} < \bar{x}+t_{\alpha/2,99}s_{\bar{x}}] = 1-\alpha \\ \approx \mathbb{P}[114.0316 < \mu_{UT} < 121.9684]\]

\[ \text{interpretation: If we repeat the survey 99 times with the same conditions as the first survey } \]

\[ \text{then we expect to capture the true mean within a} \pm \text{3.97 interval 95 times.} \]

e)

\[ H_{0}: \mu_{UT} = 100 \\ H_{A}: \mu_{UT} > 100 \\ t = \frac{\bar{x}-\mu_{UT}}{s_{\bar{x}}} = 9 \]

pt(9,99)
## [1] 1

\[\text{We can reject the null hypothesis.}\]

Problem 2

\[ f(x,y) = \left\{\begin{aligned} &2(x+y) && x \epsilon (0,1), \ y \epsilon (0,1), \ , \ y< x \\ &0 && else \end{aligned} \right.\]

a)

\[ f_{X}(x) = \int_{y=0}^x f(x,y)dy = 3x^2 \\ \]

b)

\[ \text{Note:}\\ f_{Y}(x) = \int_{x=y}^1 f(x,y)dx = 1+2y-3y^2 \\ E[Y] = \int_{y=0}^1yf_{Y}(y)dy = \frac{5}{12} \\ E[XY] = \int_{x=0}^1\int_{y=0}^xxyf(x,y)dydx = \frac{5}{3} \]

\[ E[X] = \int_{x=0}^1xf_{X}(x)dx = \frac{3}{4} \\ E[X^2] = \int_{x=0}^1x^2f_{X}(x)dx = \frac{3}{5} \\ Var[X] = E[X^2] - E[X]^2 = \frac{3}{80} \\ Cov[X,Y] = E[XY] - E[X]E[Y] = \frac{65}{48} \]

c)

\[ \text{Let} \ f_{Y}(y) = \left\{\begin{aligned} &3y^2 && y \epsilon (0,1)\\ &0 && else \end{aligned} \right.\\ \mathbb{P}[X|Y] = \frac{f(x,y)}{f_{Y}(y)} = \frac{2(x+y)}{3y^2} \]

Problem 3

\[ \text{Let}\ [x_1, ..., x_n] = X \sim b(\theta) \\ L(\theta|x_i) = \theta^{x_i}(1-\theta)^{1-x_i} \\ \]

a)

\[ L(\theta | X) = \prod_{i=1}^{n}L(\theta|x_i) \\ L(\theta | X) = \theta^{\sum x_i}(1-\theta)^{\sum(1-x_i)} \\ L(\theta | X) = \theta^{\sum x_i}(1-\theta)^{n-\sum x_i} \]

b)

\[ \text{Take natural log} \\ \ln(L(\theta | X)) = (\sum x_i) (\ln(\theta))+(n-\sum x_i)\ln(1-\theta) \\ \]

\[ \text{Partial derive with respect to}\ \theta \\ \frac{\partial \ln(L(\theta|X))}{\partial \theta} = \frac{\sum x_i}{\theta}-\frac{n-\sum x_i}{1- \theta} \]

\[ \text{Set equal to zero and multiply by} \ \theta(1-\theta) \\ 0=\sum x_i(1-\theta)-(n-\sum x_i)\theta \]

\[ \text{Simplify} \\ \sum x_i(1-\theta)-(n-\sum x_i)\theta = \sum x_i - n\theta \\ \sum x_i - n\theta \implies n \theta = \sum x_i \\ \theta = \frac{1}{n}\sum x_i \\ \theta = E[X] \]

\[ \text{interpretation: The maximum liklihood for the value of} \ \theta \ \text{is the expected value of X} \]

c)

\[ \text{Let}\ [x_1, ..., x_n]\ = X \sim N(\mu, 1) \\ L(\mu|\sigma = 1, x_i) = (2 \pi)^{-\frac{1}{2}}e^{-\frac{(x_i-\mu)^2}{2}} \]

\[ L(\mu|\sigma = 1,X)=\prod_{i=1}^{n}L(\mu|\sigma = 1,x_i) \\ L(\mu|\sigma = 1,X)= (2 \pi)^{-\frac{n}{2}}e^{-\frac{\sum(x_i-\mu)^2}{2}} \]

\[ \text{Take natural log} \\ \ln(L(\mu|\sigma = 1,X)) = \sum_{i=1}^n\ln\Bigg((2 \pi)^{-\frac{n}{2}}e^{-\frac{\sum(x_i-\mu)^2}{2}}\Bigg) \\ \]

\[ \text{Simplify} \\ \ln(L\mu|\sigma = 1,X)) = -\frac{n}{2}\ln(2\pi)-\frac{1}{2}\sum(x_i-\mu)^2 \\ \]

\[ \text{Partial derive with respect to}\ \mu \ \text{and set equal to 0} \\ \frac{\partial \ln(L(\mu|\sigma = 1,X))}{\partial \mu} = 0-\frac{-2}{2}\sum(x_i-\mu) \\ 0= \frac{\sum(x_i)-n\mu}{2} \]

\[ \text{Multiply by 2 and solve for} \ \mu \\ 0=\sum(x_i)-n\mu \\ \mu = \frac{1}{n}\sum_{i=1}^n x_i \\ \mu = E[X] \]

Problem 4

ABIA = read.csv("C:/Users/Will/OneDrive/Documents/School/SDS 323/HW 0/ABIA.csv", header = TRUE)
delayKnown <- subset(ABIA, !is.na(ABIA$ArrDelay))
ciPoints <- data.frame("DayOfWeek"=factor(), "ArrDelay"=double())
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=1, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=2, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=3, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=4, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=5, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=6, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=7, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"])))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=1, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=2, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=3, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=4, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=5, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=6, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=7, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"]) - qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=1, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==1, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=2, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==2, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=3, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==3, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=4, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==4, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=5, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==5, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=6, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==6, "ArrDelay"]))))
ciPoints <- rbind(ciPoints, data.frame("DayOfWeek"=7, "ArrDelay"=mean(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"]) + qnorm(0.975) * sd(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"]) / sqrt(length(delayKnown[delayKnown$DayOfWeek==7, "ArrDelay"]))))
plot(ciPoints$DayOfWeek, ciPoints$ArrDelay, xlab="Day of Week", ylab="95% CI for Delay (min)")

\[ \text{1 indicates Monday and 7 indicates Sunday.} \] \[ \text{Interpretation: The best days to travel to minimize flight delays within the provided data are Wednesday and Saturday.} \]