Gaussian Function
# Define
GaussPDF = function(x,mu, sig) {
res = (1./(sig*sqrt(2.*pi)))*exp( -(x - mu)^2./(2*sig^2.) )
return(res)
}
# Now Testing
x=seq(-5, 5, 0.01)
p1 = GaussPDF(x, mu=0, sig=0.2)
p2 = GaussPDF(x, mu=0, sig=1.0)
p3 = GaussPDF(x, mu=0, sig=3.0)
p4 = GaussPDF(x, mu=2, sig=0.5)
# Plot the results
plot(x, p1, type="l", col="red")
lines(x, p2, col="blue")
lines(x, p3, col="orange")
lines(x, p4, col="green")
legend(-4, 1.8,
legend=c("mu=0; sig=0.2",
"mu=0; sig=1.0",
"mu=0; sig=3.0",
"mu=-2; sig=0.5"),
col=c("red", "blue", "orange", "green"), lty=1:2, cex=0.8)

set.seed(73)
hist(rnorm(length(x), mean=0,sd=0.2), col="red", main="", border="red",
probability=TRUE, breaks="Sturges", xlim=c(-4,4), ylim=c(0,2))

hist(rnorm(length(x), mean=0,sd=1.0), col="blue", main="", border="blue",
probability=TRUE, breaks="Sturges", xlim=c(-4,4), ylim=c(0,2))

hist(rnorm(length(x), mean=0,sd=3.0), col="orange", main="", border="orange",
probability=TRUE, breaks="Sturges", xlim=c(-4,4), ylim=c(0,2))

hist(rnorm(length(x), mean=-2,sd=0.5), col="green", main="", border="green",
probability=TRUE, breaks="Sturges", xlim=c(-4,4), ylim=c(0,2))

Central Limit Theorem (Bootstraping with Mean function)
library(boot)
set.seed(73)
boot.Mean = function(data, i) {
y <- data[i]
return(mean(y))
}
layout(matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol=4, nrow=2, byrow=TRUE))
par(mai=c(0.6, 0.3, 0.1, 0.1))
# Try with sample following Gaussian Distribution
oriData = rnorm(10000, mean=0, sd=1)
hist(oriData, main="Original data (Gauss. Dist.)")

boot.Out <- boot(oriData, boot.Mean, R = 100)
hist(boot.Out$t, main="Bootstrap 100 Times", xlab="Mean of each sub-sample")

boot.Out <- boot(oriData, boot.Mean, R = 1000)
hist(boot.Out$t, main="Bootstrap 1000 Times", xlab="Mean of each sub-sample")

boot.Out <- boot(oriData, boot.Mean, R = 5000)
hist(boot.Out$t, main="Bootstrap 5000 Times", xlab="Mean of each sub-sample")

# Try on a sample following Gamma Distribution
oriData = rgamma(10000, shape=2, scale=3)
hist(oriData, main="Original data (Gamma. Dist.)")

boot.Out <- boot(oriData, boot.Mean, R = 100)
hist(boot.Out$t, main="Bootstrap 100 Times", xlab="Mean of each sub-sample")

boot.Out <- boot(oriData, boot.Mean, R = 1000)
hist(boot.Out$t, main="Bootstrap 1000 Times", xlab="Mean of each sub-sample")

boot.Out <- boot(oriData, boot.Mean, R = 5000)
hist(boot.Out$t, main="Bootstrap 5000 Times", xlab="Mean of each sub-sample")

Central Limit Theorem (Bootstraping with Variance function)
boot.Var = function(data, i) {
y <- data[i]
return(var(y))
}
boot.Out <- boot(oriData, boot.Var, R = 100)
# Try with sample following Gaussian Distribution
oriData = rnorm(10000, mean=0, sd=1)
hist(oriData, main="Original data (Gauss. Dist.)")

boot.Out <- boot(oriData, boot.Var, R = 100)
hist(boot.Out$t, main="Bootstrap 100 Times", xlab="Var of each sub-sample")

boot.Out <- boot(oriData, boot.Var, R = 1000)
hist(boot.Out$t, main="Bootstrap 1000 Times", xlab="Var of each sub-sample")

boot.Out <- boot(oriData, boot.Var, R = 5000)
hist(boot.Out$t, main="Bootstrap 5000 Times", xlab="Var of each sub-sample")

# Try on a sample following Gamma Distribution
oriData = rgamma(10000, shape=2, scale=3)
hist(oriData, main="Original data (Gamma. Dist.)")

boot.Out <- boot(oriData, boot.Var, R = 100)
hist(boot.Out$t, main="Bootstrap 100 Times", xlab="Var of each sub-sample")

boot.Out <- boot(oriData, boot.Var, R = 1000)
hist(boot.Out$t, main="Bootstrap 1000 Times", xlab="Var of each sub-sample")

boot.Out <- boot(oriData, boot.Var, R = 5000)
hist(boot.Out$t, main="Bootstrap 5000 Times", xlab="Var of each sub-sample")

Central Limit Theorem (example)
# Generate random daily precipitation (~Gamma distribution)
daily_precip <- rgamma(18250, shape=2, scale=3)
# Reshape into 50 years × 365 days
daily_precip_matrix <- matrix(daily_precip, nrow=50, ncol=365, byrow=TRUE)
# Approximate number of days per month (ignoring leap years)
days_per_month <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
# Compute monthly totals
monthly_totals <- t(sapply(1:12, function(m) {
start <- sum(days_per_month[1:(m-1)]) + 1
end <- sum(days_per_month[1:m])
rowSums(daily_precip_matrix[, start:end])
}))
# Compute seasonal totals (Winter: DJF, Spring: MAM, Summer: JJA, Fall: SON)
seasonal_totals <- cbind(
rowSums(daily_precip_matrix[, 1:59]), # DJF (Winter)
rowSums(daily_precip_matrix[, 60:151]), # MAM (Spring)
rowSums(daily_precip_matrix[, 152:243]),# JJA (Summer)
rowSums(daily_precip_matrix[, 244:365]) # SON (Fall)
)
# Bootstrap resampling function
bootstrap_sample <- function(data, indices) {
return(mean(data[indices]))
}
library(ggplot2)
# Apply bootstrap on monthly totals (Flattened)
boot_monthly <- boot(as.vector(monthly_totals), statistic=bootstrap_sample, R=1000)
# Plot Bootstrap Distribution of Monthly Means
ggplot(data.frame(mean=boot_monthly$t), aes(x=mean)) +
geom_histogram(bins=30, fill="orange", color="black", alpha=0.7) +
ggtitle("Bootstrap Distribution of Monthly Means") +
xlab("Mean Precipitation (inches)") + ylab("Frequency")

# Apply bootstrap on seasonal totals (Flattened)
boot_seasonal <- boot(as.vector(seasonal_totals), statistic=bootstrap_sample, R=1000)
# Plot Bootstrap Distribution of Seasonal Means
ggplot(data.frame(mean=boot_seasonal$t), aes(x=mean)) +
geom_histogram(bins=30, fill="red", color="black", alpha=0.7) +
ggtitle("Bootstrap Distribution of Seasonal Means") +
xlab("Mean Precipitation (inches)") + ylab("Frequency")

# Precipitation Transformation Analysis
library(MASS) # For Box-Cox Transformation
library(dplyr) # For data manipulation
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr) # For reshaping data
# Generate daily precipitation using a Gamma distribution
n_years <- 50
days_in_jan <- 31
daily_precip <- matrix(rgamma(n_years * days_in_jan, shape=2, scale=3), nrow=n_years, ncol=days_in_jan)
# Compute monthly total precipitation for each year
monthly_totals <- rowSums(daily_precip)
# Box-Cox transformation function
boxcox_transform <- function(y, lambda) {
if (lambda == 0) return(log(y)) # Log transformation
return((y^lambda - 1) / lambda) # General Box-Cox transformation
}
# Apply transformations
lambda_values <- c(1, 0.5, 0, -0.5)
transformed_data <- data.frame(
original = monthly_totals,
lambda_1 = boxcox_transform(monthly_totals, 1),
lambda_05 = boxcox_transform(monthly_totals, 0.5),
lambda_0 = boxcox_transform(monthly_totals, 0),
lambda_neg05 = boxcox_transform(monthly_totals, -0.5)
)
# Compute dispersion (standard deviation) d_lambda
dispersion_values <- sapply(transformed_data[-1], sd)
log_likelihoods <- sapply(lambda_values, function(lambda) {
model <- lm(boxcox_transform(monthly_totals, lambda) ~ 1)
return(logLik(model)[1]) # Extract log-likelihood
})
# Create summary table
summary_table <- data.frame(
Lambda = lambda_values,
Dispersion_d_lambda = round(dispersion_values, 2),
LogLikelihood_L_lambda = round(log_likelihoods, 2)
)
# Print table
summary_table
## Lambda Dispersion_d_lambda LogLikelihood_L_lambda
## lambda_1 1.0 19.97 -220.15
## lambda_05 0.5 1.47 -89.70
## lambda_0 0.0 0.11 40.57
## lambda_neg05 -0.5 0.01 170.68
# Convert to long format for ggplot
transformed_melted <- transformed_data %>%
pivot_longer(cols = everything(), names_to = "Transformation", values_to = "Precipitation")
ggplot(transformed_melted, aes(x=Transformation, y=Precipitation)) +
geom_boxplot(fill="white", color="black", outlier.color="black") +
ggtitle("Monthly Precipitation - Box-Cox Transformations") +
xlab("Transformation") + ylab("Precipitation") +
scale_x_discrete(labels=c("λ=1", "λ=0.5", "Log (λ=0)", "λ=-0.5")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))

library(knitr)
kable(summary_table, caption="Final Computed Values of λ, dλ, and L(λ)")
Final Computed Values of λ, dλ, and L(λ)
| lambda_1 |
1.0 |
19.97 |
-220.15 |
| lambda_05 |
0.5 |
1.47 |
-89.70 |
| lambda_0 |
0.0 |
0.11 |
40.57 |
| lambda_neg05 |
-0.5 |
0.01 |
170.68 |
Log-normal Distributions
set.seed(73)
# Define parameters for Normal distribution
mu_y <- 0 # Mean of log(X)
sigma_y <- 0.5 # Standard deviation of log(X)
# Generate 10,000 log-normal random values
lognormal_data <- rlnorm(10000, meanlog = mu_y, sdlog = sigma_y)
ggplot(data.frame(x=lognormal_data), aes(x)) +
geom_histogram(aes(y=..density..), bins=50, fill="blue", alpha=0.5, color="black") +
stat_function(fun=dlnorm, args=list(meanlog=mu_y, sdlog=sigma_y), color="red", linewidth=1.2) +
ggtitle("Log-Normal Distribution") +
xlab("x") + ylab("Density") +
theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Transform log-normal data to normal using ln(x)
normal_data <- log(lognormal_data)
# Plot the histogram of transformed data
ggplot(data.frame(x=normal_data), aes(x)) +
geom_histogram(aes(y=..density..), bins=50, fill="green", alpha=0.5, color="black") +
stat_function(fun=dnorm, args=list(mean=mu_y, sd=sigma_y), color="red", linewidth=1.2) +
ggtitle("Transformed Normal Distribution (ln(X))") +
xlab("ln(x)") + ylab("Density") +
theme_minimal()

# Generate normal data with the same parameters
normal_data_sample <- rnorm(10000, mean=mu_y, sd=sigma_y)
# Combine data into a single dataframe
comparison_data <- data.frame(
x = c(lognormal_data, normal_data_sample),
Distribution = rep(c("Log-Normal", "Normal"), each = 10000)
)
# Plot comparison (Fixing fill mapping)
ggplot(comparison_data, aes(x=x, fill=Distribution)) +
geom_histogram(aes(y=..density..), bins=50, alpha=0.5, color="black", position="identity") +
scale_fill_manual(name="Distribution", values=c("Log-Normal"="blue", "Normal"="red")) +
ggtitle("Comparison of Log-Normal and Normal Distributions") +
xlab("x") + ylab("Density") +
theme_minimal()

Multivariate Gaussian (Normal) Distributions
library(MASS)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ plotly::select() masks dplyr::select(), MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
set.seed(73)
# Define mean vector (μ_x, μ_y)
mu <- c(0, 0)
# Define standard deviations and correlation coefficient
sigma_x <- 1
sigma_y <- 1
rho <- 0.7 # Correlation between X and Y
# Covariance matrix Σ
Sigma <- matrix(c(sigma_x^2, rho * sigma_x * sigma_y,
rho * sigma_x * sigma_y, sigma_y^2), ncol=2)
# Generate 5000 bivariate normal samples
bivariate_data <- mvrnorm(n=5000, mu=mu, Sigma=Sigma)
colnames(bivariate_data) <- c("X", "Y")
#Plot of Bivariate Normal data
ggplot(data.frame(bivariate_data), aes(x=X, y=Y)) +
geom_point(alpha=0.3, color="blue") +
ggtitle("Scatter Plot of Bivariate Normal Data") +
xlab("X") + ylab("Y") +
theme_minimal()

# Bivariate Gaussian PDF
# install.packages("mvtnorm")
library(mvtnorm)
library(ggplot2)
library(plotly)
library(tidyverse)
# Define grid for X and Y
x_vals <- seq(-3, 3, length.out=100)
y_vals <- seq(-3, 3, length.out=100)
grid <- expand.grid(X = x_vals, Y = y_vals)
# Compute the bivariate normal PDF
bivariate_pdf <- function(x, y, mu, Sigma) {
dmvnorm(cbind(x, y), mean=mu, sigma=Sigma)
}
grid$Density <- bivariate_pdf(grid$X, grid$Y, mu, Sigma)
# Convert to matrix for contour plot
ggplot(grid, aes(x=X, y=Y, z=Density)) +
geom_contour_filled() +
ggtitle("Bivariate Gaussian PDF Contour Plot") +
xlab("X") + ylab("Y") +
theme_minimal()

# Create a 3D surface plot
p <- plot_ly(x = ~grid$X, y = ~grid$Y, z = ~grid$Density, type="surface",
colorscale="Viridis")
p
# Conditional Gaussian Distribution
# Given Y = 1
y_given <- 1
# Compute conditional mean and variance
mu_x_given_y <- mu[1] + rho * (sigma_x / sigma_y) * (y_given - mu[2])
sigma_x_given_y <- sigma_x * sqrt(1 - rho^2)
# Generate samples from conditional distribution
x_given_y_samples <- rnorm(5000, mean=mu_x_given_y, sd=sigma_x_given_y)
# Plot the conditional distribution
ggplot(data.frame(X_given_Y = x_given_y_samples), aes(x=X_given_Y)) +
geom_histogram(aes(y=after_stat(density)), bins=50, fill="orange", alpha=0.7, color="black") +
ggtitle("Conditional Distribution of X | Y=1") +
xlab("X given Y=1") + ylab("Density") +
theme_minimal()

# Example
# Given data
mu_x <- 32 # Canandaigua mean Tmax
mu_y <- 30 # Ithaca mean Tmax
sigma_x <- 7.9
sigma_y <- 7.7
rho <- 0.96
# Given Y = 25°F
y_given <- 25
# Compute conditional mean and standard deviation
mu_x_given_y <- mu_x + rho * (sigma_x / sigma_y) * (y_given - mu_y)
sigma_x_given_y <- sigma_x * sqrt(1 - rho^2)
# Print results
cat("Estimated Canandaigua Tmax given Ithaca Tmax = 25°F:\n")
## Estimated Canandaigua Tmax given Ithaca Tmax = 25°F:
cat("μ_X|Y =", round(mu_x_given_y, 2), "°F\n")
## μ_X|Y = 27.08 °F
cat("σ_X|Y =", round(sigma_x_given_y, 3), "°F\n")
## σ_X|Y = 2.212 °F
Gamma Distribution
# Gamma distribution PDF
set.seed(73)
# Define shape (α) and scale (β) parameters
alpha <- 3.76 # Shape parameter
beta <- 0.52 # Scale parameter
# Generate 10,000 Gamma-distributed random values
gamma_data <- rgamma(10000, shape=alpha, scale=beta)
ggplot(data.frame(x=gamma_data), aes(x)) +
geom_histogram(aes(y=after_stat(density)), bins=50, fill="blue", alpha=0.5, color="black") +
stat_function(fun=dgamma, args=list(shape=alpha, scale=beta), color="red", linewidth=1.2) +
ggtitle("Gamma Distribution PDF") +
xlab("x") + ylab("Density") +
theme_minimal()

# Shape parameter alpha = 1 & Scale parameter beta
x_vals <- seq(0, 40, length.out=500)
# Define a fixed shape parameter α = 1
alpha <- 1
# Define multiple scale parameter (β) values
beta_values <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
# Define colors and line types for each beta value
colors <- c("magenta", "red", "orange", "yellow", "green", "darkgreen", "cyan", "blue", "navy", "black")
line_types <- c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4) # Different line styles
# Open a blank plot
plot(x_vals, dgamma(x_vals, shape=alpha, scale=beta_values[1]),
type="l", col=colors[1], lty=line_types[1], lwd=2,
xlab="x", ylab="f(x)", main=expression(paste("Gamma distributions with ", alpha, "=1")),
ylim=c(0, 0.5))
# Add lines for each β value
for (i in 2:length(beta_values)) {
lines(x_vals, dgamma(x_vals, shape=alpha, scale=beta_values[i]),
col=colors[i], lty=line_types[i], lwd=2)
}
# Add legend
legend("topright", legend=paste("β=", beta_values), col=colors, lty=line_types, lwd=2, bty="n")

# Shape parameter alpha = 2 & Scale parameter beta
x_vals <- seq(0, 40, length.out=500)
# Define a fixed shape parameter α = 2
alpha <- 2
# Define multiple scale parameter (β) values
beta_values <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
# Define colors and line types for each beta value
colors <- c("magenta", "red", "orange", "yellow", "green", "darkgreen", "cyan", "blue", "navy", "black")
line_types <- c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4) # Different line styles
# Open a blank plot
plot(x_vals, dgamma(x_vals, shape=alpha, scale=beta_values[1]),
type="l", col=colors[1], lty=line_types[1], lwd=2,
xlab="x", ylab="f(x)", main=expression(paste("Gamma distributions with ", alpha, "=2")),
ylim=c(0, 0.5))
# Add lines for each β value
for (i in 2:length(beta_values)) {
lines(x_vals, dgamma(x_vals, shape=alpha, scale=beta_values[i]),
col=colors[i], lty=line_types[i], lwd=2)
}
# Add legend
legend("topright", legend=paste("β=", beta_values), col=colors, lty=line_types, lwd=2, bty="n")

# Gamma & Normal distribution
# Fit Normal Distribution
normal_fit <- fitdistrplus::fitdist(gamma_data, "norm")
# Fit Gamma Distribution
gamma_fit <- fitdistrplus::fitdist(gamma_data, "gamma")
# Plot comparison
ggplot(data.frame(x=gamma_data), aes(x)) +
geom_histogram(aes(y=after_stat(density)), bins=50, fill="blue", alpha=0.5, color="black") +
stat_function(fun=dgamma, args=list(shape=gamma_fit$estimate["shape"], scale=gamma_fit$estimate["scale"]),
color="red", linewidth=1.2, linetype="solid") +
stat_function(fun=dnorm, args=list(mean=normal_fit$estimate["mean"], sd=normal_fit$estimate["sd"]),
color="black", linewidth=1.2, linetype="dashed") +
ggtitle("Comparison of Gamma and Normal Distributions") +
xlab("x") + ylab("Density") +
theme_minimal() +
annotate("text", x=5, y=0.2, label="Gamma Fit (Red - Solid)", color="red", size=5) +
annotate("text", x=5, y=0.18, label="Normal Fit (Black - Dashed)", color="black", size=5)
## Warning: Removed 101 rows containing missing values or values outside the scale range
## (`geom_function()`).

# Methods of Moments & Maximum Likelihood Estimation
# Evaluating Gamma Distribution Probabilities
set.seed(429)
JanRain33_82 = rgamma(50, shape=3.5, scale=0.55)+rnorm(50,0,0.01)
# Moment estimators
(alpha1 = mean(JanRain33_82)^2/var(JanRain33_82))
## [1] 4.705943
(beta1 = var(JanRain33_82)/mean(JanRain33_82))
## [1] 0.3450994
#MLE by Thom (1958), Greenwood and Durand (1960)
(Dstats = log(mean(JanRain33_82))-mean(log(JanRain33_82)))
## [1] 0.1146829
#Thom Estimator
(alphaThom = (1+sqrt(1+4*Dstats/3))/4/Dstats)
## [1] 4.520589
# Greenwood and Durand
(alphaGD = (0.5000876+0.1648852*Dstats-0.0544274*Dstats^2)/Dstats)
## [1] 4.519256
(thebeta = mean(JanRain33_82)/4.18)
## [1] 0.3885211
# MLE
library(MASS)
od = options(digits = 3)
fitdistr(JanRain33_82, "gamma", start = list(shape=3, scale=0.4))
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## shape scale
## 4.5196 0.3593
## (0.8724) (0.0734)
# Example
library(MASS)
library(ggplot2)
library(fitdistrplus)
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
##
## aml
# Simulated precipitation data based on the image
set.seed(42) # Ensure reproducibility
gamma_shape <- 3.76
gamma_scale <- 0.52
precipitation_data <- rgamma(1000, shape=gamma_shape, scale=gamma_scale) # Simulated dataset
# Fit distributions to precipitation data
gamma_fit <- fitdist(precipitation_data, "gamma")
normal_fit <- fitdist(precipitation_data, "norm")
# Plot histogram with fitted Gamma and Gaussian PDFs
ggplot(data.frame(x=precipitation_data), aes(x)) +
geom_histogram(aes(y=after_stat(density)), bins=30, fill="gray", alpha=0.6, color="black") +
stat_function(fun=dgamma, args=list(shape=gamma_fit$estimate["shape"], scale=gamma_fit$estimate["scale"]),
color="black", linewidth=1.2, linetype="solid") +
stat_function(fun=dnorm, args=list(mean=normal_fit$estimate["mean"], sd=normal_fit$estimate["sd"]),
color="black", linewidth=1.2, linetype="dashed") +
geom_vline(xintercept=3.15, color="orange", linewidth=1.5, linetype="solid") +
annotate("text", x=3.15, y=0.1, label="3.15 in", color="orange", angle=90, vjust=-0.5, size=5) +
ggtitle("Histogram of Ithaca January Precipitation Data") +
xlab("Precipitation (inches)") + ylab("Density") +
theme_minimal()
## Warning: Removed 101 rows containing missing values or values outside the scale range
## (`geom_function()`).

# Given Gamma distribution parameters for 50-year dataset
alpha_50y <- 4.52
beta_50y <- 0.36
# Compute probability of precipitation ≤ 3.15 inches
prob_1987 <- pgamma(3.15, shape=alpha_50y, scale=beta_50y)
# Print probability and conclusion
cat("\nProbability of January 1987 precipitation (≤ 3.15 in):", round(prob_1987, 6), "\n")
##
## Probability of January 1987 precipitation (≤ 3.15 in): 0.958
if (prob_1987 > 0.95) {
cat("Conclusion → January 1987 was quite unusually wet!\n")
} else {
cat("Conclusion → January 1987 precipitation was within normal range.\n")
}
## Conclusion → January 1987 was quite unusually wet!
Special Gamma Exponential Distributions
library(ggplot2)
# Define Exponential Distribution parameters
beta <- 2 # Scale parameter (same as λ^-1 in other notation)
x_vals <- seq(0, 10, length.out=100)
# Compute PDF and CDF of Exponential Distribution
exp_pdf <- dexp(x_vals, rate=1/beta) # Exponential PDF: f(x) = (1/beta) * exp(-x/beta)
exp_cdf <- pexp(x_vals, rate=1/beta) # CDF: F(x) = 1 - exp(-x/beta)
# Plot Exponential PDF
ggplot(data.frame(x=x_vals, y=exp_pdf), aes(x, y)) +
geom_line(color="black", linewidth=1.2) +
geom_vline(xintercept=0, linetype="dashed", color="red") +
ggtitle("Exponential Distribution") +
xlab("x") + ylab("f(x)") +
theme_minimal()

# Plot Exponential CDF
ggplot(data.frame(x=x_vals, y=exp_cdf), aes(x, y)) +
geom_line(color="blue", linewidth=1.2) +
geom_vline(xintercept=0, linetype="dashed", color="red") +
ggtitle("Exponential Distribution CDF") +
xlab("x") + ylab("F(x)") +
theme_minimal()

Special Gamma Erlang Distributions
library(ggplot2)
# Define Erlang Distribution parameters
alpha <- 3 # Shape parameter (integer)
beta <- 2 # Scale parameter
# Generate x values
x_vals <- seq(0, 15, length.out=500)
# Compute PDF of Erlang Distribution (Gamma with integer shape)
erlang_pdf <- dgamma(x_vals, shape=alpha, scale=beta)
# Plot Erlang PDF
ggplot(data.frame(x=x_vals, y=erlang_pdf), aes(x, y)) +
geom_line(color="blue", linewidth=1.2) +
geom_vline(xintercept=alpha * beta, linetype="dashed", color="black") +
ggtitle("Erlang Distribution (Gamma with Integer Shape)") +
xlab("Time (Waiting for α-th Poisson Event)") + ylab("Probability Density") +
theme_minimal()

Special Gamma chi-square (χ^2) Distributions
library(ggplot2)
x_vals <- seq(0, 8, 0.1)
# Compute Chi-square PDFs for different degrees of freedom
y1 <- dchisq(x_vals, df = 1)
y2 <- dchisq(x_vals, df = 2)
y3 <- dchisq(x_vals, df = 3)
y4 <- dchisq(x_vals, df = 4)
y5 <- dchisq(x_vals, df = 6)
y6 <- dchisq(x_vals, df = 9)
# Create data frames for each df value
df1 <- data.frame(x = x_vals, y = y1, df = "1")
df2 <- data.frame(x = x_vals, y = y2, df = "2")
df3 <- data.frame(x = x_vals, y = y3, df = "3")
df4 <- data.frame(x = x_vals, y = y4, df = "4")
df5 <- data.frame(x = x_vals, y = y5, df = "6")
df6 <- data.frame(x = x_vals, y = y6, df = "9")
# Combine into a single data frame
df <- rbind(df1, df2, df3, df4, df5, df6)
# Define custom colors for each DF
color_palette <- c("1" = "yellow", "2" = "green", "3" = "lightblue",
"4" = "blue", "6" = "purple", "9" = "red")
# Plot Chi-square Distributions for different degrees of freedom
ggplot(data=df, aes(x = x, y = y, color = df)) +
geom_line(linewidth=1) +
scale_color_manual(name = "Degrees of Freedom (DF)", values = color_palette) +
labs(y = expression(f[k](x)), x = expression(x)) +
scale_x_continuous(breaks = seq(0, 8, 1)) +
scale_y_continuous(limits = c(0, 0.5), breaks = seq(0, 0.5, 0.1)) +
theme_minimal() +
theme(
panel.grid.major = element_line(color="gray90"),
panel.grid.minor = element_blank(),
axis.text = element_text(size=14),
axis.title = element_text(size=16, face="bold"),
legend.position=c(0.8, 0.75),
legend.background = element_rect(fill="gray90", color="black"),
legend.title = element_text(size=14),
legend.text = element_text(size=12)
)
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Special Gamma pearson type III distributions
#install.packages("PearsonDS")
library(PearsonDS)
library(ggplot2)
# Define Pearson Type III parameters
shape <- 3 # Shape parameter (like Gamma shape)
location <- 1 # Shift parameter (ζ)
scale <- 2 # Scale parameter (like Gamma scale)
# Define x values covering the shifted range
x_vals <- seq(location - 5, location + 15, length.out=500)
# Compute Pearson III PDF using PearsonDS package
pearson_pdf <- dpearsonIII(x_vals, shape=shape, location=location, scale=scale)
# Create data frame for plotting
pearson_df <- data.frame(x = x_vals, y = pearson_pdf)
# Plot Pearson Type III Distribution
ggplot(pearson_df, aes(x, y)) +
geom_line(color="blue", linewidth=1.5) +
geom_vline(xintercept=location, linetype="dashed", color="red", linewidth=1) +
ggtitle(expression("Pearson Type III Distribution")) +
xlab(expression(x)) +
ylab(expression(f(x))) +
theme_minimal() +
annotate("text", x=location, y=max(pearson_pdf) * 0.9,
label=expression(zeta), color="red", size=5, vjust=-1)
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

Beta Distributions
library(ggplot2)
library(fitdistrplus)
# Define x values for Beta distribution
x_vals <- seq(0, 1, length.out=500)
# Define different pairs of (alpha, beta) for plotting
beta_params <- list(
c(0.5, 0.5),
c(5, 1),
c(1, 3),
c(2, 2),
c(6, 2),
c(2, 6)
)
# Generate density data
beta_df <- data.frame()
for (params in beta_params) {
alpha <- params[1]
beta <- params[2]
density_vals <- dbeta(x_vals, shape1=alpha, shape2=beta)
temp_df <- data.frame(x = x_vals, y = density_vals,
label = paste0("α = ", alpha, ", β = ", beta))
beta_df <- rbind(beta_df, temp_df)
}
# Plot Beta Distributions
ggplot(beta_df, aes(x=x, y=y, color=label)) +
geom_line(linewidth=1) +
ggtitle("Beta Distributions for Different α, β") +
xlab("x") + ylab("PDF") +
scale_color_manual(name="Parameters", values=c("blue", "red", "green", "purple", "orange", "brown")) +
theme_minimal()

# --------------------------
# Fit Beta Distribution to Data
# --------------------------
library(MASS)
set.seed(429)
x = rgamma(1e5, 2, .2)
plot(density(x))

# normalize the gamma so it's between 0 & 1
# .0001 added because having exactly 1 causes fail
xt = x / (max(x) +.0001)
fit.beta1 = fitdistr(xt, "beta", start = list(shape1=2, shape2=5))
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
x.beta = rbeta(1e5, fit.beta1$estimate[[1]],fit.beta1$estimate[[2]])
# plot the pdfs on top of each other
plot(density(xt))
lines(density(x.beta), col="red")

# plot the qqplots
qqplot(xt, x.beta)

Extreme value distributions
#install.packages("evd")
#install.packages("ismev")
library(ggplot2)
library(evd)
library(ismev)
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
# --------------------------
# Simulate Extreme-Value Data
# --------------------------
set.seed(42)
gev_data <- rgev(1000, loc = 3, scale = 1, shape = 0.2) # Generate synthetic data
# Fit GEV distribution to the data
gev_fit <- fgev(gev_data) # Maximum Likelihood Estimation (MLE)
# Extract fitted parameters
fitted_loc <- gev_fit$estimate["loc"]
fitted_scale <- gev_fit$estimate["scale"]
fitted_shape <- gev_fit$estimate["shape"]
# Define x values for plotting
x_vals <- seq(min(gev_data), max(gev_data), length.out=500)
# Compute GEV PDF with fitted parameters
gev_pdf <- dgev(x_vals, loc=fitted_loc, scale=fitted_scale, shape=fitted_shape)
# --------------------------
# Plot GEV Distribution
# --------------------------
gev_df <- data.frame(x = x_vals, y = gev_pdf)
ggplot(gev_df, aes(x, y)) +
geom_line(color="blue", linewidth=1.5) +
ggtitle("Generalized Extreme Value (GEV) Distribution") +
xlab("x (Extreme Value)") + ylab("Density") +
theme_minimal()

# --------------------------
# Fit GEV to Real Data (Charleston Precipitation)
# --------------------------
# Annual maxima of daily precipitation (inches) at Charleston, South Carolina (1951-1970)
charleston_precip <- c(2.01, 3.52, 2.61, 3.89, 1.82, 3.86, 3.31, 4.28, 3.52, 4.51,
3.48, 3.96, 4.60, 3.58, 3.56, 6.23, 5.02, 3.73, 3.50, 3.00)
# Fit GEV distribution to real precipitation data
gev_fit_real <- fgev(charleston_precip)
# Extract fitted parameters
real_loc <- gev_fit_real$estimate["loc"]
real_scale <- gev_fit_real$estimate["scale"]
real_shape <- gev_fit_real$estimate["shape"]
# Define x values for real data plot
x_real_vals <- seq(min(charleston_precip), max(charleston_precip), length.out=500)
# Compute GEV PDF for real precipitation data
gev_real_pdf <- dgev(x_real_vals, loc=real_loc, scale=real_scale, shape=real_shape)
# --------------------------
# Plot Fitted GEV for Charleston Data
# --------------------------
gev_real_df <- data.frame(x = x_real_vals, y = gev_real_pdf)
ggplot(gev_real_df, aes(x, y)) +
geom_line(color="red", linewidth=1.5) +
ggtitle("GEV Fit for Charleston Precipitation (1951-1970)") +
xlab("Precipitation (inches)") + ylab("Density") +
theme_minimal()

Gumbel (Fisher-tippett type I) distributions
#install.packages("locfit")
#install.packages("logspline")
library(locfit)
## locfit 1.5-9.11 2025-01-27
##
## Attaching package: 'locfit'
## The following object is masked from 'package:purrr':
##
## none
library(logspline)
# Clear previous plots & reset the graphics device
par(mfrow=c(3,1), mar=c(4,4,2,1)) # Set 3-row layout & reduce margins
# --------------------------
# Normal Distribution Plot
# --------------------------
x = rnorm(10000, 0, 1) # Generate normal data
plot(logspline(x), xlim=c(-5,5), ylim=c(0,0.5), font=2, cex.lab=2, font.lab=2,
xlab="Normal Distribution")
# --------------------------
# Gumbel Extreme Value Simulation
# --------------------------
N = 1000
ave = numeric(N)
max1 = numeric(N)
for (i in 1:N) {
x = rnorm(N,0,1) # Generate N normal values
lines(locfit(~x), col="grey") # Add grey density curve
points(mean(x), 0, col="blue", pch=17) # Mean value point
points(max(x), 0, col="red", pch=17) # Maximum value point
ave[i] = mean(x)
max1[i] = max(x)
}
plot(locfit(~ave), xlim=c(-5,5), ylim=c(0,9), ylab="", cex.lab=2, font=2, font.lab=2,
xlab="Normal Distribution", col="white")
lines(locfit(~ave), lwd=2, col="blue") # Blue line for normal distribution
plot(locfit(~max1), xlim=c(-5,5), font=2, font.lab=2, ylab="",
xlab="Extreme Value Distribution (Gumbel)", cex.lab=2, col="white")
lines(locfit(~max1), lwd=2, col="red") # Red line for Gumbel distribution

# --------------------------
# EVD from Exponential
# --------------------------
par(mfrow=c(3,1), mar=c(4,4,2,1))
set.seed(42)
x = rexp(10000, rate=0.5)
hist(x,prob=T,xlim=c(0,25),ylab="",main="",font=2,cex.lab=2,
font.lab=2,xlab="Exponential Distribution")
plot(logspline(x),add=T)
N = 1000
ave = matrix(NA,nrow=N,ncol=1)
max1 = matrix(NA,nrow=N,ncol=1)
for (i in 1:N)
{
x = rexp(N,0.5)
lines(locfit(~x), col="grey")
points(mean(x), 0, col="blue", pch=17)
points(max(x), 0, col="red", pch=17)
ave[i] = mean(x)
max1[i] = max(x)
Sys.sleep(0)
}
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : max_nr not converged
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : max_nr not converged
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : max_nr not converged
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : max_nr not converged
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : max_nr not converged
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : max_nr not converged
Sys.sleep(1)
plot(locfit(~ave), xlim=c(0,25), ylim=c(0,4), ylab="", cex.lab=2, font=2, font.lab=2, xlab="Normal Distribution", col="white")
lines(locfit(~ave), lwd=2, col="blue")
Sys.sleep(1)
plot(locfit(~max1), xlim=c(0,25), font=2, font.lab=2, ylab="", xlab="Extreme Value Distribution (Gumbel)", cex.lab=2, col="white")
lines(locfit(~max1), lwd=2, col="red")

# --------------------------
# Uniform origins
# --------------------------
x = runif(100000,0,1)
#hist(x,prob=T,xlim=c(0,1),ylab="",main="",font=2,cex.lab=2,font.lab=2,xlab="Uniform Distribution")
plot(density(x),xlim=c(0,1),ylab="",main="",font=2,cex.lab=2,font.lab=2,xlab="Uniform Distribution")
N = 200
ave = matrix(NA,nrow=N,ncol=1)
max1 = matrix(NA,nrow=N,ncol=1)
for (i in 1:N) {
x = runif(N,0,1)
# lines(locfit(~x),col="grey")
points(mean(x),0,col="blue",pch=17)
points(max(x),0,col="red",pch=17)
ave[i] = mean(x)
max1[i] = max(x)
Sys.sleep(0)
}
Sys.sleep(1)
plot(locfit(~ave), xlim=c(0,1), ylim=c(0,30), ylab="", cex.lab=2, font=2, font.lab=2, xlab="Normal Distribution", col="white")
lines(locfit(~ave),lwd=2,col="blue")
Sys.sleep(1)
plot(locfit(~max1), xlim=c(0,1), ylim=c(0,65), font=2, font.lab=2, ylab="", xlab="Extreme Value Distribution (Weibull)", cex.lab=2, col="white")
lines(density(max1),lwd=2,col="red")

# Example annual max of daily precipitation amounts (inches) at Charleston, SC
data2 = data.frame(year=seq(1951,1970,by=1),
precip=c(2.01,3.52,2.61,3.89,1.82,3.86,3.31,4.20,
4.48,4.51,3.48,4.6,5.3,4.93,3.5,4.58,6.23,2.67,5.24,3))
Frechet (Fisher-tippett type II) distributions
library(ggplot2)
library(evd)
# --------------------------
# Define GEV Parameters
# --------------------------
loc <- 0 # Location parameter (ζ)
scale <- 1 # Scale parameter (β)
shape_weibull <- -0.3 # Shape parameter for Weibull (κ < 0)
shape_frechet <- 0.3 # Shape parameter for Fréchet (κ > 0)
shape_gumbel <- 0 # Shape parameter for Gumbel (κ = 0)
# Generate x values
x_vals <- seq(-3, 5, length.out=500)
# Compute PDF for each GEV type
pdf_weibull <- dgev(x_vals, loc=loc, scale=scale, shape=shape_weibull)
pdf_frechet <- dgev(x_vals, loc=loc, scale=scale, shape=shape_frechet)
pdf_gumbel <- dgev(x_vals, loc=loc, scale=scale, shape=shape_gumbel)
# --------------------------
# Create Data Frame for Plotting
# --------------------------
gev_df <- data.frame(
x = rep(x_vals, 3),
pdf = c(pdf_weibull, pdf_frechet, pdf_gumbel),
type = rep(c("Weibull", "Frechet", "Gumbel"), each=length(x_vals))
)
# --------------------------
# Plot GEV Distributions with Correct Colors & Line Styles
# --------------------------
ggplot(gev_df, aes(x=x, y=pdf, color=type, linetype=type)) +
geom_line(linewidth=1.2) +
scale_color_manual(values=c("Weibull"="blue", "Frechet"="purple", "Gumbel"="red")) +
scale_linetype_manual(values=c("Weibull"="solid", "Frechet"="dashed", "Gumbel"="dotdash")) +
ggtitle("GEV Distribution") +
xlab("x") + ylab("Probability Density Function") +
theme_minimal() +
theme(legend.title=element_blank(), legend.position="right")

Weibull (Fisher-tippett type III) distributions
#install.packages("extRemes")
library(extRemes)
## Loading required package: Lmoments
## Loading required package: distillery
##
## Attaching package: 'extRemes'
## The following object is masked from 'package:evd':
##
## mrlplot
## The following objects are masked from 'package:stats':
##
## qqnorm, qqplot
# generate 10000 GEV random variables with a location of 0, scale of 1 and shape of 0.
# So, this is a Gumbel distribution.
# Generate Gumbel Random numbers
x = revd(10000,loc=0,scale=1,shape=0)
hist(x,prob=T,xlab="Random Variables from Gumbel (location=0,scale=1,shape=0)",
main="Gumbel Distribution",ylab="f(x)",ylim=c(0,0.4),font=2,family="serif",font.lab=2,cex.lab=1)
plot(logspline(x),add=T,col='red')

# Frechet distribution plot
y = revd(10000,loc=0,scale=1,shape=0.2)
hist(y,prob=T,ylim=c(0,0.4),xlab="Random Variables from Frechet
(location=0, scale=1, shape=0.2)",main="Frechet Distribution",ylab="f(x)",
font=2,family="serif",font.lab=2,cex.lab=1)
plot(logspline(y),add=T,col="red")
plot(logspline(x),add=T)

# Weibull Distribution Plot
z = revd(10000,loc=0,scale=1,shape=-0.6)
hist(z,prob=T,ylim=c(0,0.5),xlab="Random Variables from Weibull
(location=0,scale=1,shape=-0.6)",main="Weibull Distribution",ylab="f(x)",
font=2,family="serif",font.lab=2,cex.lab=1)
plot(logspline(y),add=T,col="red")
plot(logspline(x),add=T)
plot(logspline(z),add=T,col="green")

# Example Annual Max of daily precipitation amounts (inches) at Charleston, SC
data2 = data.frame(year=seq(1951,1970,by=1),
precip=c(2.01,3.52,2.61,3.89,1.82,3.86,3.31,4.20,
4.48,4.51,3.48,4.6,5.3,4.93,3.5,4.58,6.23,2.67,5.24,3))
Peak-Over-Threshold (POT) sampling & Generalized Pareto
distribution
#install.packages("POT")
library(POT)
## Registered S3 methods overwritten by 'POT':
## method from
## print.bvpot evd
## plot.bvpot evd
##
## Attaching package: 'POT'
## The following object is masked from 'package:extRemes':
##
## mrlplot
## The following objects are masked from 'package:evd':
##
## dens, dgpd, exiplot, mrlplot, pgpd, pp, qgpd, qq, rgpd, tcplot
rgpd(5, loc = 1, scale = 2, shape = -0.2)
## [1] 2.20 1.20 3.08 7.19 1.93
rgpd(6, c(1, -5), 2, -0.2)
## [1] 1.86 -4.82 1.65 -4.56 4.17 -3.32
rgpd(6, 0, c(2, 3), 0)
## [1] 2.109 0.914 0.802 4.397 2.383 0.119
pgpd(c(9, 15, 20), 1, 2, 0.25)
## [1] 0.938 0.983 0.992
qgpd(c(.25, .5, .75), 1, 2, 0)
## [1] 1.58 2.39 3.77
dgpd(c(9, 15, 20), 1, 2, 0.25)
## [1] 0.01563 0.00318 0.00114
x <- runif(10000)
par(mfrow=c(1,2))
tcplot(x, u.range = c(0.9, 0.995))
## Warning in gpdmle(data, u[1], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''
## Warning in gpdmle(data, u[i], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''
## Warning in gpdmle(data, u[i], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''
## Warning in gpdmle(data, u[i], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''
## Warning in gpdmle(data, u[i], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''
## Warning in gpdmle(data, u[i], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''
## Warning in gpdmle(data, u[i], corr = TRUE, ...): observed information matrix is
## singular; passing std.err.type to ``expected''

x <- rnorm(10000)
mrlplot(x, u.range = c(1, quantile(x, probs = 0.995)), col = c("green", "black", "green"), nt = 200)
