rm(list = ls()) # Clear all files from your environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 526428 28.2 1169365 62.5 NA 669428 35.8
## Vcells 970675 7.5 8388608 64.0 16384 1851821 14.2
cat("\f") # Clear the console
graphics.off() # Clear all graphs
The (student) t distribution converges to normal distribution as the degrees of freedom increase (beyond 120). Please plot a normal distribution, and a few t distributions on the same chart with 2, 5, 15, 30, 120 degrees of freedom.
library(ggplot2)
# Set seed for reproducibility
set.seed(123)
# Generating a sequence of values from -4 to 4 with 200 points in between
n_data <- seq(-4, 4, length.out = 200)
# Creating a data frame for nomal distribution
normal_data <- data.frame(x = n_data,
y = dnorm(n_data),
distribution = 'Normal')
# Creating a data frame for t distributions with different degrees of freedom
t_data_2 <- data.frame(x = n_data, y = dt(n_data, df = 2), distribution = 't (df=2)')
t_data_5 <- data.frame(x = n_data, y = dt(n_data, df = 5), distribution = 't (df=5)')
t_data_15 <- data.frame(x = n_data, y = dt(n_data, df = 15), distribution = 't (df=15)')
t_data_30 <- data.frame(x = n_data, y = dt(n_data, df = 30), distribution = 't (df=30)')
t_data_120 <- data.frame(x = n_data, y = dt(n_data, df = 120), distribution = 't (df=120)')
# Combine everything
t_data_all <- rbind(normal_data, t_data_2, t_data_5, t_data_15, t_data_30, t_data_120)
# Plot Graph
ggplot(t_data_all,
aes(x = x,
y = y,
color = distribution)) +
geom_line() +
theme_minimal() +
labs(title = "Normal and t Distributions",
x = "Value",
y = "Density")
# Set seed for reproducibility
set.seed(123)
# Generate normal data for graph
mu <- 108
sigma <- 7.2
data_values <- rnorm(n = 1000,
mean = mu, # 108
sd = sigma) # 7.2
# Calculate Z-scores
z_scores <- (data_values - mu) / sigma
# Plot the original normal data
hist(data_values,
main = "Original Normal Data",
xlab = "Value",
ylab = "Frequency",
col = "orange",
border = "orange")
# Plot the Z-score distribution
hist(z_scores,
main = "Z-Score Distribution",
xlab = "Z-Score",
ylab = "Frequency",
col = "lightgreen",
border = "lightgreen")
The overall distribution shape is similar between both graphs. You can see the Z Score distribution remains a normal distribution but simply centered at 0.
In your own words, please explain what is p-value?
The P Value quantifies the strength of evidence against the null hypothesis,
A low p-value indicates that the observed results are unlikely to occur by random chance alone, suggesting stronger evidence against the null hypothesis.
A higher P value would support that the null hypothesis is true, indicating weaker evidence against the null.
For example a P Value of 0.05 would indicate there is evidence against the null hypothesis as the observed results are statistically significant.