rm(list = ls()) # Clear all files from your environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 521103 27.9 1158672 61.9 660385 35.3
## Vcells 947054 7.3 8388608 64.0 1769625 13.6
cat("\f")
graphics.off()
The (student) t distribution converges to normal distribution as the degrees of freedom increase (beyond 120). Please plot a normal distribution, and a few t distributions on the same chart with 2, 5, 15, 30, 120 degrees of freedom.
library(ggplot2)
# Set seed
set.seed(123)
# Generating a sequence of values from -4 to 4 with 200 points in between
t_data <- seq(-4, 4, length.out = 200)
# nomal distribution
norm_data <- data.frame(x = t_data,
y = dnorm(t_data),
distribution = 'Normal')
# Creating a data frame for t distributions with different degrees of freedom
t_data_2 <- data.frame(x = t_data, y = dt(t_data, df = 2), distribution = 't (df=2)')
t_data_5 <- data.frame(x = t_data, y = dt(t_data, df = 5), distribution = 't (df=5)')
t_data_15 <- data.frame(x = t_data, y = dt(t_data, df = 15), distribution = 't (df=15)')
t_data_30 <- data.frame(x = t_data, y = dt(t_data, df = 30), distribution = 't (df=30)')
t_data_120 <- data.frame(x = t_data, y = dt(t_data, df = 120), distribution = 't (df=120)')
# Combine data
t_datas <- rbind(norm_data, t_data_2, t_data_5, t_data_15, t_data_30, t_data_120)
# Plot Graph
ggplot(t_datas,
aes(x = x,
y = y,
color = distribution)) +
geom_line() +
theme_minimal() +
labs(title = "Normal and t Distributions",
x = "Value",
y = "Density")
set.seed(123) # Set seed
mu <- 108
sigma <- 7.2
data_val <- rnorm(n = 1000, mean = mu, sd = sigma )
#create data frame for ggplot2
dataf <- data.frame(data_val)
#Plot graph
ggplot(data = dataf, aes(x = data_val)) +
geom_histogram(binwidth = 5, fill = "orange", color = "black")+
labs(title = "OrginalNormal Distribution Histogram Plot",
x = "Value",
y = "Frenquency"
) +
theme_classic()
#Calculate z
z_data <- (data_val - mu) / sigma
ggplot(data.frame(x = z_data), aes(x = data_val)) +
geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Z-scaled Distribution",
x = "Z-score",
y = "Frequency") +
theme_classic()
What is p value?
It indicates the probability that the observed data occurred under the null hypothesis. High p value is consistent with a true null hypothesis. Low p value is not consistent with a null hypothesis.