*Note: This first part is taken as notes for myself, not required for this week’s discussion.

Part I Degrees of freedom

Part II Normal Distribution Plots

# Set up a sequence of values for the x-axis
x_values <- seq(-4, 4, 0.01)

# Create data frames for normal and t-distributions with different degrees of freedom
normal_data <- data.frame(x = x_values, y = dnorm(x_values))
t_dist_df2 <- data.frame(x = x_values, y = dt(x_values, df = 2))
t_dist_df5 <- data.frame(x = x_values, y = dt(x_values, df = 5))
t_dist_df15 <- data.frame(x = x_values, y = dt(x_values, df = 15))
t_dist_df30 <- data.frame(x = x_values, y = dt(x_values, df = 30))
t_dist_df120 <- data.frame(x = x_values, y = dt(x_values, df = 120))

# Combine data frames
combined_data <- bind_rows(
  data.frame(distribution = "Normal", normal_data),
  data.frame(distribution = "t (df=2)", t_dist_df2),
  data.frame(distribution = "t (df = 5)", t_dist_df5), 
  data.frame(distribution = "t (df = 15)", t_dist_df15),
  data.frame(distribution = "t (df=30)", t_dist_df30),
  data.frame(distribution = "t (df=120)", t_dist_df120)
)

# Plot the distributions with a bold and thick line for the normal distribution
ggplot(data = combined_data, 
       mapping = aes(x = x, 
                     y = y, 
                     color = distribution)
       ) +
  geom_line(size = 1.5, 
            linetype = ifelse(test = combined_data$distribution == "Normal", 
                              yes = "solid",
                              no =  "dashed"
                              )
            ) +
  labs(title = "Normal and Student's t-Distributions",
       x = "Value",
       y = "Density") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Discovery based on the graph:

Part II

set.seed(123)  # Set seed for reproducibility

# Set parameters for the normal distribution 
mu      <-  108
sigma <-  7.2
data_values <- rnorm(n = 1000,   mean = mu,  sd = sigma   ) 

# turn random data into a dataframe 
data <- data.frame(data_values)
# Using ggplot to graph the T-distribution 
ggplot(data = data, aes(x = data_values)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black")+
  labs(title = "OrginalNormal Distribution Histogram Plot",
       x = "Value",
       y = "Frenquency"
       ) +
  theme_classic()

# Z-scale the data
z_data <- scale(data)

# Plot the Z-scaled distribution
ggplot(data.frame(x = z_data), aes(x = data_values)) +
  geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +
  labs(title = "Z-scaled Distribution",
       x = "Z-score",
       y = "Frequency") +
  theme_classic()

Part III P-value