Part 1. The (student) t distribution converges to normal distribution as the degrees of freedom increase (beyond 120). Please plot a normal distribution, and a few t distributions on the same chart with 2, 5, 15, 30, 120 degrees of freedom.

library(ggplot2)

x = seq(-5, 5, by = 0.1)

normal_data = data.frame(x = x, y = dnorm(x, mean = 0, sd = 1))
t_2_df = data.frame(x = x, y = dt(x, df = 2))
t_5_df = data.frame(x = x, y = dt(x, df = 5))
t_15_df = data.frame(x = x, y = dt(x, df = 15))
t_30_df = data.frame(x = x, y = dt(x, df = 30))
t_120_df = data.frame(x = x, y = dt(x, df = 120))

plot_data = rbind(cbind(Distribution = "Normal", normal_data), cbind(Distribution = "t(2)", t_2_df), cbind(Distribution = "t(5)", t_5_df), cbind(Distribution = "t(15)", t_15_df), cbind(Distribution = "t(30)", t_30_df), cbind(Distribution = "t(120)", t_120_df))

ggplot(plot_data, aes(x = x, y = y, color = Distribution)) + geom_line(linewidth = 1) + labs(title = "Normal and Student t-distributions", x = "Z-Score", y = "Density")

Part 2. Lets work with normal data below (1000 observations with mean of 108 and sd of 7.2). Plot two charts - the normally distributed data (above) and the Z score distribution of the same data. Do they have the same distributional shape? Why or why not?

set.seed(123)  # Set seed for reproducibility
mu = 108
sigma = 7.2
data_values = rnorm(n = 1000, mean = mu, sd = sigma) 
df = data.frame(data_values)

z_scores = ((data_values - mu) / sigma)
df_z = data.frame(z_scores)

ggplot(df, aes(x = data_values)) + geom_histogram() + labs(title = "Histogram With Normal Distribution Data", x = "", y = "Density")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(df_z, aes(x = z_scores)) + geom_histogram() + labs(title = "Histogram With Z-Scores", x = "", y = "Density")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Part 3. In your own words, please explain what is p-value?