############################################################
# Clean R Environment
############################################################
# Remove all objects from the workspace
rm(list = ls())
############################################################
### Set the working directory where your files are located
setwd("D:/D Drive/Ph.D. Course Work/PhD_2025/DataFile")
### Verify the current working directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/PhD_2025/DataFile"
### R provides built-in functions for many probability distributions. Most distributions have **four main functions**:
## d - density (probability mass or density function)
## p - cumulative probability (CDF)
## q - quantile function (inverse CDF)
## r - random number generation
### Main Content: Normal, Binomial, t, and F distributions.
### Normal Distribution
###The Normal distribution is continuous with parameters mean (\(\mu\)) and standard deviation (\(\sigma\)).
### The histogram represents simulated data, while the smooth curve shows the theoretical normal density. As sample size increases, the histogram closely follows the curve.
#dnorm(x, mean, sd) :# Probability density at x
#pnorm(q, mean, sd) :# Cumulative probability \(P(X \le q)\)
#qnorm(p, mean, sd) :# Quantile for probability p
#rnorm(n, mean, sd) :# Generate n random numbers
###Example
# Probability density at x = 1 for standard normal
dnorm(1, mean=0, sd=1)
## [1] 0.2419707
# Cumulative probability P(X <= 1)
pnorm(1, mean=0, sd=1)
## [1] 0.8413447
# 95th percentile of standard normal
qnorm(0.95, mean=0, sd=1)
## [1] 1.644854
# Generate 10 random numbers from standard normal
rnorm(10, mean=0, sd=1)
## [1] 1.1780225751 -0.1125760413 0.6042174831 0.3747222012 0.0580397269
## [6] 1.4891216226 0.0008279701 0.7257822225 0.4392525916 0.2342541718
### Explanation:
### dnorm -> gives the height of the normal curve at x=1.
### pnorm -> gives area under the curve left of x=1.
### qnorm -> finds the x value corresponding to cumulative probability 0.95.
### rnorm -> simulates random observations.
### Simulation of Normal Distribution
# Simulate 1000 observations from Normal distribution
x <- rnorm(1000, mean = 0, sd = 1)
df <- data.frame(x)
library(ggplot2)
# Plot density
ggplot(df, aes(x = x)) +
geom_density(color = "blue", linewidth = 1) +
labs(title = "Normal Distribution (Mean = 0, SD = 1)",x = "Values",y = "Density")

### Explanation:
### The density curve represents the probability density of simulated normal data. As the sample size increases, the density curve closely matches the theoretical normal distribution.
### Binomial Distribution
### The Binomial distribution models number of successes in \(n\) trials with success probability \(p\).
### Functions
### dbinom(x, size, prob) : Probability of exactly x successes
### pbinom(q, size, prob) : Cumulative probability \(P(X \le q)\)
### qbinom(p, size, prob) : Quantile for probability p
### rbinom(n, size, prob) : Generate n random numbers
### Example
# Probability of exactly 3 heads in 5 coin tosses
dbinom(3, size=5, prob=0.5)
## [1] 0.3125
# Cumulative probability of at most 3 heads
pbinom(3, size=5, prob=0.5)
## [1] 0.8125
# Number of heads corresponding to 80th percentile
qbinom(0.8, size=5, prob=0.5)
## [1] 3
# Generate 10 random outcomes of 5 coin tosses
rbinom(10, size=5, prob=0.5)
## [1] 1 3 2 1 2 5 1 4 1 0
###t-Distribution
### Used for small samples when population standard deviation is unknown. Parameter: degrees of freedom (\(df\)).
###Functions
### dt(x, df) : Density
### pt(q, df) : Cumulative probability
### qt(p, df) : Quantile
### rt(n, df) : Random numbers
### Example
# Density at x = 1 with 10 df
dt(1, df=10)
## [1] 0.230362
# Cumulative probability P(T <= 1)
pt(1, df=10)
## [1] 0.8295534
# 95th percentile
qt(0.95, df=10)
## [1] 1.812461
# Generate 5 random t-values
rt(5, df=10)
## [1] -1.5301471 0.2804416 -1.1281615 -0.7360617 -0.2161421
###Simulation of Binomial Distribution
library(ggplot2)
# Simulate 1000 observations from Binomial distribution
x <- rbinom(1000, size = 10, prob = 0.5)
df <- data.frame(x)
# Plot bar plot
ggplot(df, aes(x = x)) +
geom_bar(color = "black",fill = "blue") +
labs(title = "Binomial Distribution (n = 10, p = 0.5)",
x = "Number of Successes")

### Explanation
### Although the binomial distribution is discrete, \texttt{geom\_density()} provides a smoothed representation of the probability mass function.
### t-Distribution
### Used for small samples when population standard deviation is unknown. Parameter: degrees of freedom (\(df\)).
### Functions
### dt(x, df) : Density
### pt(q, df) : Cumulative probability
### qt(p, df) : Quantile
### rt(n, df) : Random numbers
### Example:
# Density at x = 1 with 10 df
dt(1, df=10)
## [1] 0.230362
# Cumulative probability P(T <= 1)
pt(1, df=10)
## [1] 0.8295534
# 95th percentile
qt(0.95, df=10)
## [1] 1.812461
# Generate 5 random t-values
rt(5, df=10)
## [1] 1.3523378 0.2685272 -0.3224113 0.6418358 0.2620845
### Simulation of t-Distribution
library(ggplot2)
# Simulate 1000 observations from t-distribution
x <- rt(1000, df = 10)
df <- data.frame(x)
# Plot density
ggplot(df, aes(x = x)) +
geom_density(color = "darkgreen", linewidth = 1) +
labs(title = "t-Distribution (df = 10)",
x = "Values",
y = "Density")

### F-Distribution
### Used in ANOVA and ratio of variances. Parameters: numerator df (\(df1\)) and denominator df (\(df2\)).
### Functions
### df(x, df1, df2) : Density
### pf(q, df1, df2) : Cumulative probability
### qf(p, df1, df2) : Quantile
### rf(n, df1, df2) : Random numbers
### Example:
# Density at x = 2 with df1=5, df2=10
df(2, df1=5, df2=10)
## [1] 0.1620057
# Cumulative probability P(F <= 2)
pf(2, df1=5, df2=10)
## [1] 0.835805
# 90th percentile of F-distribution
qf(0.9, df1=5, df2=10)
## [1] 2.521641
# Generate 5 random F-values
rf(5, df1=5, df2=10)
## [1] 1.3105756 0.7183489 0.8355460 0.2635007 1.3553711
### Simulation of F-Distribution
library(ggplot2)
# Simulate 1000 observations from F-distribution
x <- rf(1000, df1 = 5, df2 = 10)
df <- data.frame(x)
# Plot density
ggplot(df, aes(x = x)) +
geom_density(color = "purple", linewidth = 1) +
labs(title = "F-Distribution (df1 = 5, df2 = 10)",
x = "Values",
y = "Density") +
xlim(0, 5)
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_density()`).

### Explanation:
### The F-distribution is right-skewed and defined only for positive values. Density estimation highlights its asymmetric nature.