library(tidyverse)
\(\underline{\textbf{Oxygen Purity Data:}}\)
\(\bullet\ \ Y =\) Purity of Oxygen
\(\bullet\ \ X =\) Percentage of Hydrocarbons
df1 <- read_csv("problem1-oxygenpurity.csv")
attach(df1)
\(\large \hat{Y} = \widehat{\beta}_0 + \widehat{\beta}_1 X\)
y_diffs <- purity - mean(purity)
x_diffs <- hydro - mean(hydro)
Cov <- sum(y_diffs * x_diffs)
Var_x <- sum(x_diffs^2)
# Beta 1 (Coefficient) Calculations
hat_beta_1 <- Cov / Var_x
\(\widehat{\beta}_1 = \dfrac{ \sum (y_i - \overline{y})(x_i - \overline{x}) }{ \sum (x_i - \overline{x})^2 } = \dfrac{Cov(Y, X)}{Var(X)} =\) 11.801
# Beta 0 (Intercept) Calculations
hat_beta_0 <- mean(purity) - (hat_beta_1 * mean(hydro))
\(\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1 \overline{x} =\) 77.8633
\(\underline{\textbf{SLR Fitted Model:}} \hspace{0.25cm} \hat Y =\) 77.8633 \(+\) 11.801 \(X\)
\(\large H_0: \ \beta_1 = 0 \hspace{0.75cm} \text{and} \hspace{0.75cm} H_1: \ \beta_1 \neq 0\)
The estimated \(\hat\beta_1\) is statistically significant if the test statistic \((t^*)\) has a p-value \((p)\) is less than the desired significance level \((\alpha = 0.05)\).
fitted <- hat_beta_0 + hat_beta_1 * hydro
SSE <- sum((purity - fitted)^2)
hat_variance <- SSE / (20-2)
seBeta_1 <- sqrt(hat_variance / Var_x)
tBeta_1 <- hat_beta_1 / seBeta_1
\(\text{s.e.} \left ( \hat\beta_1 \right ) =\) 3.4851 , \(\hspace{1cm} t^* = \dfrac{\hat\beta_1}{\text{s.e.} \left ( \hat\beta_1 \right )} =\) 3.3861 , \(\hspace{1cm} p =\) .00329 \(\hspace{0.15cm} \therefore \hspace{0.15cm} p < \alpha\)
\(\underline{\textbf{Conclusion:}} \hspace{0.25cm}\) Reject \(H_0\) and conclude that \(\hat\beta_1\) is statistically significant.
\(\hspace{0.7cm} \text{SST} \hspace{0.6cm} = \hspace{0.8cm} \text{SSR} \hspace{0.7cm} + \hspace{0.7cm} \text{SSE}\)
\(\sum (y_i - \overline{y})^2 \ = \ \sum (\hat{y}_i - \overline{y})^2 \ + \ \sum (y_i - \overline{y})^2\)
SST <- sum(y_diffs^2)
rSquared <- 1 - (SSE / SST)
\(\large r^2 = 1 - \frac {\text{SSE}} {\text{SST}} =\) 0.3891
Fit1 <- lm(purity ~ hydro, data = df1)
sumFit1 <- summary(Fit1)
confint.lm(Fit1)
## 2.5 % 97.5 %
## (Intercept) 69.042 86.68
## hydro 4.479 19.12
predict(Fit1, newdata = tibble(hydro = 1.05), interval = "confidence")
## fit lwr upr
## 1 90.25 88.31 92.2
Rxy <- sqrt(rSquared)
\(\large r_{xy} = \sqrt{r^2} =\) 0.6238
\(\large H_0: \ \rho = 0 \hspace{0.75cm} \text{and} \hspace{0.75cm} H_A: \ \rho \neq 0\)
tCor <- (Rxy * sqrt(18)) / sqrt(1 - Rxy^2)
cor.test(purity, hydro)
##
## Pearson's product-moment correlation
##
## data: purity and hydro
## t = 3.4, df = 18, p-value = 0.003
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2504 0.8356
## sample estimates:
## cor
## 0.6238
\(\underline{\textbf{Conclusion:}} \hspace{0.25cm}\) Reject \(H_0\). There is significant evidence supporting the alternative hypothesis that the \(\rho \neq 0\)
detach(df1)