# Lab 4: Distributions in R
# 1. Uniform Distribution
# a. Generate a Sample
# Generate 500 random numbers from U(0, 10)
uniform_sample <- runif(500, min = 0, max = 10)
# Summary and visualization
summary(uniform_sample)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.04021 2.37882 4.72349 4.85390 7.26791 9.98140
hist(uniform_sample, main = "Uniform Distribution (0, 10)", xlab = "Values", col = "lightblue")

# b. Calculate Probability
# CDF for uniform distribution: P(X <= 4)
p_uniform <- punif(4, min = 0, max = 10)
print(p_uniform)
## [1] 0.4
# 2. Exponential Distribution
# a. Generate a Sample
# Generate 500 random numbers from Exp(0.1)
exponential_sample <- rexp(500, rate = 0.1)
# Summary and visualization
summary(exponential_sample)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0014 2.8979 7.4235 10.2519 13.6548 73.0231
hist(exponential_sample, main = "Exponential Distribution (rate = 0.1)", xlab = "Values", col = "lightgreen")

# b. Calculate Probability
# Survival function: P(X > 7)
p_exponential <- 1 - pexp(7, rate = 0.1)
print(p_exponential)
## [1] 0.4965853
# 3. Poisson Distribution
# a. Generate a Sample
# Generate 500 random numbers from Poisson(3)
poisson_sample <- rpois(500, lambda = 3)
# Summary and visualization
summary(poisson_sample)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 3.000 3.044 4.000 11.000
hist(poisson_sample, main = "Poisson Distribution (lambda = 3)", xlab = "Values", col = "lightcoral")

# b. Calculate Probability
# Probability of exactly 4 events: P(X = 4)
p_poisson <- dpois(4, lambda = 3)
print(p_poisson)
## [1] 0.1680314
# 4. Binomial Distribution
# a. Generate a Sample
# Generate 500 random numbers from Binomial(10, 0.5)
binomial_sample <- rbinom(500, size = 10, prob = 0.5)
# Summary and visualization
summary(binomial_sample)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 4.000 5.000 4.958 6.000 9.000
hist(binomial_sample, main = "Binomial Distribution (n = 10, p = 0.5)", xlab = "Values", col = "lightyellow")

# b. Calculate Probability
# Probability of exactly 3 successes: P(X = 3)
p_binomial <- dbinom(3, size = 10, prob = 0.5)
print(p_binomial)
## [1] 0.1171875
# 5. Bonus: Visualization of Probability Distributions
# a. Overlay Histograms
# Overlay the histograms of the uniform, exponential, Poisson, and binomial distributions
hist(uniform_sample, main = "Comparison of Distributions", xlab = "Values", col = rgb(0.2, 0.5, 0.7, 0.5), xlim = c(0, 30), breaks = 15)
hist(exponential_sample, col = rgb(0.3, 0.7, 0.3, 0.5), add = TRUE, breaks = 15)
hist(poisson_sample, col = rgb(0.8, 0.3, 0.3, 0.5), add = TRUE, breaks = 15)
hist(binomial_sample, col = rgb(0.7, 0.7, 0.1, 0.5), add = TRUE, breaks = 15)
legend("topright", legend = c("Uniform", "Exponential", "Poisson", "Binomial"), fill = c(rgb(0.2, 0.5, 0.7, 0.5), rgb(0.3, 0.7, 0.3, 0.5), rgb(0.8, 0.3, 0.3, 0.5), rgb(0.7, 0.7, 0.1, 0.5)))

# Plot density for each distribution
# Set up the plotting area
plot(density(uniform_sample), main = "Comparison of Distributions", xlab = "Values", col = "blue", lwd = 2, xlim = c(0, 30), ylim = c(0, 0.4))
lines(density(exponential_sample), col = "green", lwd = 2)
lines(density(poisson_sample), col = "red", lwd = 2)
lines(density(binomial_sample), col = "yellow", lwd = 2)
# Add a legend to identify the distributions
legend("topright", legend = c("Uniform", "Exponential", "Poisson", "Binomial"),
col = c("blue", "green", "red", "yellow"), lwd = 2)

# Load ggplot2 library
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)
# Create data frames for each distribution
uniform_df <- data.frame(values = uniform_sample, distribution = "Uniform")
exponential_df <- data.frame(values = exponential_sample, distribution = "Exponential")
poisson_df <- data.frame(values = poisson_sample, distribution = "Poisson")
binomial_df <- data.frame(values = binomial_sample, distribution = "Binomial")
# Combine the data frames into one
combined_df <- rbind(uniform_df, exponential_df, poisson_df, binomial_df)
# Create the plot with ggplot2
ggplot(combined_df, aes(x = values, fill = distribution)) +
geom_histogram(alpha = 0.5, position = "identity", bins = 15) +
scale_fill_manual(values = c("lightblue", "lightgreen", "lightcoral", "lightyellow")) +
labs(title = "Comparison of Distributions", x = "Values", y = "Frequency") +
theme_minimal() +
theme(legend.title = element_blank())
