Interval Estimation

Use the confint function to estimate the confidence interval

## Estimate the confidence interval for the variable Ozone in "airquality" data (R built-in data)
fit <- lm(Ozone ~ 1, data = airquality)
summary(fit)
## 
## Call:
## lm(formula = Ozone ~ 1, data = airquality)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41.13 -24.13 -10.63  21.12 125.87 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   42.129      3.063   13.76   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.99 on 115 degrees of freedom
##   (37 observations deleted due to missingness)
## Calculate the confidence interval when alpha = 0.05 (confidence level of 95%)
confint(fit, level=0.95)
##               2.5 %   97.5 %
## (Intercept) 36.0624 48.19622
## Calculate the confidence interval when alpha = 0.01 (confidence level of 99%)
confint(fit, level=0.99)
##                0.5 %  99.5 %
## (Intercept) 34.10692 50.1517

We can achieve the same results by individually calculating the lower and upper bounds

## Confidence level of 95%

mean_Ozone <- mean(airquality$Ozone, na.rm = TRUE) # mean
sd_Ozone <- sd(airquality$Ozone, na.rm = TRUE)  # standard deviation
n_Ozone <- length(airquality$Ozone[!is.na(airquality$Ozone)])  # sample size

se_Ozone <- sd_Ozone / sqrt(n_Ozone)  # standard error

ci_Ozone <- mean_Ozone + c(-1, 1) * qt(0.975, df = n_Ozone - 1) * se_Ozone  # confidence interval at alpha/2
ci_Ozone
## [1] 36.06240 48.19622

Visualize the results

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ggplot(airquality, aes(x = factor(1), y = Ozone)) +
  geom_point() +
  geom_errorbar(aes(ymin = ci_Ozone[1], ymax = ci_Ozone[2]), width = 0.1) +
  theme_minimal() +
  labs(title = "95% Confidence Interval for Mean Zone",
       x = "",
       y = "Ozone")
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Hypothesis Testing

Create a model including both IV(s) and DV, then interpret the results

## Assume that the level of Ozone (DV) can be predicted by Solar.R and Temp (IVs)
fit <- lm(Ozone ~ Solar.R + Temp, data = airquality)
summary(fit)
## 
## Call:
## lm(formula = Ozone ~ Solar.R + Temp, data = airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.610 -15.976  -2.928  12.371 115.555 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -145.70316   18.44672  -7.899 2.53e-12 ***
## Solar.R        0.05711    0.02572   2.221   0.0285 *  
## Temp           2.27847    0.24600   9.262 2.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.5 on 108 degrees of freedom
##   (42 observations deleted due to missingness)
## Multiple R-squared:  0.5103, Adjusted R-squared:  0.5012 
## F-statistic: 56.28 on 2 and 108 DF,  p-value: < 2.2e-16

T-test

vs0 <- mtcars %>%
  filter(vs==0)
vs1 <- mtcars %>%
  filter(vs==1)

t.test(vs0, vs1)
## 
##  Welch Two Sample t-test
## 
## data:  vs0 and vs1
## t = 2.9373, df = 285.75, p-value = 0.003581
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   7.986944 40.434537
## sample estimates:
## mean of x mean of y 
##  50.20073  25.98999