Week_10

Part 1

The (student) t distribution converges to normal distribution as the degrees of freedom increase (beyond 120). Please plot a normal distribution, and a few t distributions on the same chart with 2, 5, 15, 30, 120 degrees of freedom.

library(ggplot2)

# Set seed
set.seed(123)

# Generating a sequence of values from -4 to 4 with 200 points in between
t_data <- seq(-4, 4, length.out = 200)

# nomal distribution
norm_data <- data.frame(x = t_data, 
                          y = dnorm(t_data), 
                          distribution = 'Normal')

# Creating a data frame for t distributions with different degrees of freedom
t_data_2 <- data.frame(x = t_data, y = dt(t_data, df = 2), distribution = 't (df=2)')
t_data_5 <- data.frame(x = t_data, y = dt(t_data, df = 5), distribution = 't (df=5)')
t_data_15 <- data.frame(x = t_data, y = dt(t_data, df = 15), distribution = 't (df=15)')
t_data_30 <- data.frame(x = t_data, y = dt(t_data, df = 30), distribution = 't (df=30)')
t_data_120 <- data.frame(x = t_data, y = dt(t_data, df = 120), distribution = 't (df=120)')

# Combine data
t_datas <- rbind(norm_data, t_data_2, t_data_5, t_data_15, t_data_30, t_data_120)

# Plot Graph
ggplot(t_datas, 
       aes(x = x, 
           y = y, 
           color = distribution)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Normal and t Distributions",
       x = "Value",
       y = "Density")

Part 2

set.seed(123)  # Set seed

mu      <-  108
sigma <-  7.2
data_val <- rnorm(n = 1000,   mean = mu,  sd = sigma   ) 

#create data frame for ggplot2
dataf <- data.frame(data_val)

#Plot graph
ggplot(data = dataf, aes(x = data_val)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black")+
  labs(title = "OrginalNormal Distribution Histogram Plot",
       x = "Value",
       y = "Frenquency"
       ) +
  theme_classic()

#Calculate z 
z_data <- (data_val - mu) / sigma

ggplot(data.frame(x = z_data), aes(x = data_val)) +
  geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +
  labs(title = "Z-scaled Distribution",
       x = "Z-score",
       y = "Frequency") +
  theme_classic()

What is p value?

It indicates the probability that the observed data occurred under the null hypothesis. High p value is consistent with a true null hypothesis. Low p value is not consistent with a null hypothesis.

Week_10

Ganesh Kumar

2023-11-10

Part 1

Part 2