gdp_distribution

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stats)
library(tidyr)
library(purrr)
library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(maxLik)

## Loading required package: miscTools

## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/

library(stats4)
library(moments)
library(univariateML)

library(readxl)
ip <-read_excel("DP_LIVE_09082023174326289.xlsx",col_types = c("text", "text", "text", 
         "text", "text", "text", "numeric", 
         "skip"))
View(ip)
usGDPC1_1_ <- read_excel("GDPC1 (1).xlsx")
View(usGDPC1_1_)

options(scipen = 999)

ip <- ip %>%
  group_by(LOCATION) %>%
  mutate(growthrate = (Value - dplyr::lag(Value, 1)) / dplyr::lag(Value, 1))

usGDPC1_1_=usGDPC1_1_ %>%
  mutate(growthrate = ( realgdp- dplyr::lag(realgdp, 1)) / dplyr::lag(realgdp, 1))

ip <- ip %>%
  mutate(TIME = yearmonth(TIME))

ip <- ip %>%
  group_by(LOCATION) %>%
  slice(-1) %>%
  as_tsibble(key = "LOCATION", index = "TIME")

calculate_acf_with_leads <- function(series, lag.max = 6, lead.max = 6) {
  # Remove missing values
  series <- na.omit(series)
  
  # Calculate ACF for lags
  acf_lags <- acf(series, lag.max = lag.max, plot = FALSE)$acf
  
  # Calculate ACF for leads (by reversing the series and calculating lags)
  acf_leads <- rev(acf(series[length(series):1], lag.max = lead.max, plot = FALSE)$acf)
  
  # Combine results, excluding the 0 lag from acf_lags as it's already included in acf_leads
  combined_acf <- c(acf_leads, acf_lags[-1])
  
  return(combined_acf)
}

lag_max = 6
 lead_max = 6
 
 # Calculate average growth rate by location and time

average_growthrate_by_time <- ip %>%
  index_by(TIME) %>%
  summarize(growthrate = mean(growthrate, na.rm = TRUE), .groups = "drop")
# Apply the calculate_acf_with_leads function to the averaged series
 acf_with_leads_series1 <- calculate_acf_with_leads(average_growthrate_by_time$growthrate, lag.max = lag_max, lead.max = lead_max)
 
 # Apply the function to the other series
 acf_with_leads_series2 <- calculate_acf_with_leads(usGDPC1_1_$growthrate, lag.max = lag_max, lead.max = lead_max)
 
# Define the lags and leads
 lags_and_leads <- (-lead_max:lag_max)
 
 # Plot the results for series 1
 plot(lags_and_leads, acf_with_leads_series1, type = "l", ylim = c(-1, 1), main = "Comparison of Two Autocorrelations", xlab = "Lag/Lead", ylab = "ACF", col = "black")
 
 # Add the line for series 2
 lines(lags_and_leads, acf_with_leads_series2, col = "red")
 
 # Add a legend
 legend("topright", legend = c("ip oecd", "usgdpgrowthrate"), col = c("black", "red"), lty = 1)

#turn to list to get ready for mle
usgdp_list=usGDPC1_1_%>%drop_na%>%as.list(usGDPC1_1_$growthrate)

#use simple mle to get rough estimates 
dlaplace_standard <- function(x, mu = 0, a = 1) {
  if (a <= 0) return(NA)
  (1 / (2 * a)) * exp(1)^(-abs(x - mu) / a)
}

fit_standard <- MASS::fitdistr(usgdp_list$growthrate, dlaplace_standard, start = list(mu = 0, a = 1))
print(fit_standard)

##         mu             a      
##   0.0085194258   0.0073293483 
##  (0.0004995481) (0.0004551209)

dlaplace_modified <- function(x, mu, a, b) {
  if(a <= 0 || b <= 0) return(NA)
  
  gamma_val <- gamma(1 + 1/b)
  part1 <- 1 / (2 * a * b^(1/b) * gamma_val)
  part2 <- exp((-1/b) * abs((x - mu) / a)^b)
  
  result <- part1 * part2
  return(result)
}

#i tried to derive b pretending I didn't know it 
# Objective function only for b
objective_function_b <- function(b, data, m, a) {
  log_density <- log(dlaplace_modified(data, m = m, a = a, b = b))
  neg_log_likelihood <- -sum(log_density, na.rm = TRUE)
  return(neg_log_likelihood)
}

# Take m and a from the fit_standard
m <- fit_standard$estimate['mu']
a <- fit_standard$estimate['a']

# Optimization for b, given m and a
result_b <- optim(1, fn = objective_function_b, data = usgdp_list$growthrate, m = m, a = a, method ="L-BFGS-B", lower = 0.6, upper = 3.3)

# Resulting estimate for b
b_estimate <- result_b$par
print(b_estimate)

## [1] 1.108408

#make sure the parameters mu and b doesn't change with the new b parameter and new distribution 
fit <- MASS::fitdistr(usgdp_list$growthrate, dlaplace_modified, start = list(mu= fit_standard$estimate["mu"] ,a=fit_standard$estimate["a"] ,b =b_estimate ))
print(fit)

##         mu             a              b      
##   0.0085286073   0.0075865476   1.1084084984 
##  (0.0005182001) (0.0005823879) (0.1450989289)

# Function to estimate b from data
estimate_b <- function(data) {
    fit <- MASS::fitdistr(data, dlaplace_modified, start = list(mu = fit$estimate['mu'], a = fit$estimate["a"], b = fit$estimate["b"]))
    return(fit$estimate['b'])
}


# Bootstrap
bootstrap_b <- replicate(4000, {
  sample_data <- sample(usgdp_list$growthrate, size = length(usgdp_list$growthrate), replace = TRUE)
  estimate_b(sample_data)
})

#only a d Small samples give Gaussian curves 
length(which(bootstrap_b>=2))

## [1] 5

#mean
mean(bootstrap_b)

## [1] 1.227707

sd(bootstrap_b)

## [1] 0.1829435

se=mean(bootstrap_b)+(2*sd(bootstrap_b))*c(1,-1)

# Perform a two-tailed t-test to test whether the mean of bootstrap_b is 1
 t.test(bootstrap_b, alternative = "two.sided", mu = 1)

## 
##  One Sample t-test
## 
## data:  bootstrap_b
## t = 78.721, df = 3999, p-value < 0.00000000000000022
## alternative hypothesis: true mean is not equal to 1
## 95 percent confidence interval:
##  1.222036 1.233378
## sample estimates:
## mean of x 
##  1.227707

#use ttest to see if b could be 2 which would be a normal distribution 
 t.test(bootstrap_b, alternative = "greater", mu = 2)

## 
##  One Sample t-test
## 
## data:  bootstrap_b
## t = -266.99, df = 3999, p-value = 1
## alternative hypothesis: true mean is greater than 2
## 95 percent confidence interval:
##  1.222948      Inf
## sample estimates:
## mean of x 
##  1.227707

library(ggplot2)
pdf_plot <- ggplot() +
  stat_function(fun = dlaplace_modified, args = list(m = fit$estimate['mu'], a = fit$estimate['a'], b = fit$estimate['b']), geom = "line", color = 'blue') +
  geom_point(data = data.frame(x = usgdp_list$growthrate, y = rep(0, length(usgdp_list$growthrate))), aes(x = x, y = y), color = 'red') +
  xlab('Growth Rates') +
  ylab('PDF') +
  ggtitle('PDF of Modified Laplace Distribution') +
  theme_minimal()

# Display the plot
pdf_plot

library(moments)

#test for skewness 
agostino.test(usgdp_list$growthrate,alternative = "two.sided")

## 
##  D'Agostino skewness test
## 
## data:  usgdp_list$growthrate
## skew = -0.076779, z = -0.493133, p-value = 0.6219
## alternative hypothesis: data have a skewness

#we can't reject the null so we can have confidence that the skew is marginal

# Step 1: Define the CDF for the modified Laplace distribution
plaplace_modified <- function(x, mu, a, b) {
  sapply(x, function(x_i) {
    integrate(function(u) dlaplace_modified(u, mu, a, b), -Inf, x_i)$value
  })
}

# Step 2: Create a custom function capturing the parameters
custom_cdfUS <- function(x) {
  plaplace_modified(x, m = fit$estimate['mu'], a = fit$estimate['a'], b = fit$estimate['b'])
}


# Step 2: Create a custom function capturing the parameters For the hypnosis b=1
custom_cdfoneUS <- function(x) {
  plaplace_modified(x, m = fit$estimate['mu'], a = fit$estimate['a'], b = 1)
}

#  Perform the KS test We will be running a series of tests to ensure we picked a distractionsdistribution that match the data H=B
ks_result <- ks.test(usgdp_list$growthrate, "custom_cdfUS",alternative = "two.sided")

# Print the result
print(ks_result)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  usgdp_list$growthrate
## D = 0.051388, p-value = 0.5669
## alternative hypothesis: two-sided

library(goftest)
#Cramer-Von Mises Test
goftest::cvm.test(usgdp_list$growthrate,"custom_cdfUS",nullname = "custom_cdfoneUS")

## 
##  Cramer-von Mises test of goodness-of-fit
##  Null hypothesis: custom_cdfoneUS
##  Parameters assumed to be fixed
## 
## data:  usgdp_list$growthrate
## omega2 = 0.046257, p-value = 0.899

#H=1

#print the results

#check please 
# Get the log-likelihood of the null model (b = 1)
logLik_null <- sum(log(sapply(usgdp_list$growthrate, custom_cdfoneUS)))

# Get the log-likelihood of the alternative model (b not restricted)
logLik_alternative <- sum(log(sapply(usgdp_list$growthrate, custom_cdfUS)))

# Compute the likelihood ratio test statistic
lr_stat <- 2 * (logLik_alternative - logLik_null)

# Find the p-value (chi-squared distribution with 1 degree of freedom)
p_value <- 1 - pchisq(lr_stat, df = 1)

# Print the result
cat("Likelihood Ratio Test Statistic:", lr_stat, "\n")

## Likelihood Ratio Test Statistic: -8.91999

cat("p-value:", p_value, "\n")

## p-value: 1

# Create an empty dataframe to store results
result_mle <- data.frame()

# Loop through unique locations
for (location in unique(ip$LOCATION)) {
  # Subset the data for this location
  data <- ip[ip$LOCATION == location,]
  
  # Fit the standard distribution
  fit_standard_gen <- MASS::fitdistr(data$growthrate, dlaplace_standard, start = list(mu = 0, a = 1))
  
  # Define the objective function for b
  objective_function_b <- function(b, data, m, a) {
    log_density <- log(dlaplace_modified(data, m = m, a = a, b = b))
    neg_log_likelihood <- -sum(log_density, na.rm = TRUE)
    return(neg_log_likelihood)
  }
  
  # Take m and a from the fit_standard_gen
  m <- fit_standard_gen$estimate['mu']
  a <- fit_standard_gen$estimate['a']
  
  # Optimization for b, given m and a
  result_bgen <- optim(1, fn = objective_function_b, data = data$growthrate, m = m, a = a, method ="L-BFGS-B", lower = 0.6, upper = 3.3)

  # Resulting estimate for b
  b_estimates <- result_bgen$par

  # Fit the modified distribution with the new b parameter
  fit_gen <- MASS::fitdistr(data$growthrate, dlaplace_modified, start = list(mu = m, a = a, b = b_estimates))
  
  # Extract the estimated parameters
  mu <- fit_gen$estimate["mu"]
  a <- fit_gen$estimate["a"]
  b <- fit_gen$estimate["b"]
  
  # Append the results to the result dataframe
  result_mle <- rbind(result_mle, data.frame(LOCATION = location, mu = mu, a = a, b = b))
}
# After your existing loop

# Calculating mean and standard deviation of b
mean_b <- mean(result_mle$b)
sd_b <- sd(result_mle$b)

# Adding the new column with the calculated values
result_mle$desired_values <- mean_b + (2 * sd_b) * c(1, -1)

# Function to perform hypothesis testing
calculate_p_value <- function(b_value, b_estimate, se) {
  z_score <- abs(b_estimate - b_value) / se
  p_value <- 2 * (1 - pnorm(z_score))
  return(p_value)
}

# Performing the hypothesis tests and adding p-values to the dataframe

# Create columns for the p-values
result_mle$p_value_b1 <- NA
result_mle$p_value_b2 <- NA

# Loop through unique locations
for (location_idx in 1:length(unique(ip$LOCATION))) {
  location <- unique(ip$LOCATION)[location_idx]
  # ... (rest of your existing code)
  
  # Extracting the standard error for b
  se_b <- sqrt(diag(vcov(fit_gen))["b"])
  
  # P-value for b = 1
  result_mle$p_value_b1[location_idx] <- calculate_p_value(1, result_mle$b[location_idx], se_b)
  
  # P-value for b = 2
  result_mle$p_value_b2[location_idx] <- calculate_p_value(2, result_mle$b[location_idx], se_b)
}

# Remove row names
row.names(result_mle) <- NULL

library(nortest) # for lillie.test (Jacques Bera LM test)

## 
## Attaching package: 'nortest'

## The following objects are masked from 'package:goftest':
## 
##     ad.test, cvm.test

library(tseries)# for jarque.bera.test (Jacques Bera ALM test)
library(CircStats)#for kuiper test

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: boot

tests_df <- data.frame(
  LOCATION = character(),
  agostino_statistic = numeric(),
  agostino_p_value = numeric(),
  Shapiro_Wilk_W = numeric(),
  Shapiro_Wilk_P = numeric(),
  LM_Statistic = numeric(),
  LM_P_Value = numeric(),
  ALM_Statistic = numeric(),
  ALM_P_Value = numeric(),
  stringsAsFactors = FALSE
)

# Loop through each unique location
for (location in unique(ip$LOCATION)) {
  # Subset the data for this location
  data <- ip[ip$LOCATION == location,]



  # Perform the Agostino test
  agostino <- agostino.test(data$growthrate, alternative = "two.sided")

  # Perform Shapiro-Wilk test
  shapiro_wilk <- shapiro.test(data$growthrate)

  # Perform Jacques-Bera LM test
  lm_test <- lillie.test(data$growthrate)

  # Perform Jacques-Bera ALM test
  alm_test <- jarque.bera.test(data$growthrate)

  # Temporary results
  result_tests <- data.frame(
    LOCATION = location,
    agostino_statistic = agostino$statistic,
    agostino_p_value = agostino$p.value,
    Shapiro_Wilk_W = shapiro_wilk$statistic,
    Shapiro_Wilk_P = shapiro_wilk$p.value,
    LM_Statistic = lm_test$statistic,
    LM_P_Value = lm_test$p.value,
    ALM_Statistic = alm_test$statistic,
    ALM_P_Value = alm_test$p.value
  )

  # Add the temporary results to the main results data frame
  tests_df <- rbind(tests_df, result_tests)
}
  
# Remove row names
row.names(tests_df) <- NULL
first_tests <- tests_df

# Create a new dataframe for the new tests
new_tests_df <- data.frame(
  LOCATION = character(),
  CVM_Statistic = numeric(),
  CVM_P_Value = numeric(),
  KS_Statistic = numeric(),
  KS_P_Value = numeric(),
  AD_Statistic = numeric(),
  AD_P_Value = numeric(),
  stringsAsFactors = FALSE
)

# Loop through each unique location
for (location in unique(ip$LOCATION)) {
  # Subset the data for this location
  data <- ip[ip$LOCATION == location,]
  
  # Retrieve the estimated parameters for this location from result_mle
  mu <- result_mle[result_mle$LOCATION == location, "mu"]
  a <- result_mle[result_mle$LOCATION == location, "a"]
  b <- result_mle[result_mle$LOCATION == location, "b"]
  
  # Custom CDFs
  custom_cdfgen <- function(x) {
    plaplace_modified(x, m = mu, a = a, b = b)
  }
  
 
  
  # Perform the CVM test
  cvm_test <- goftest::cvm.test(data$growthrate,nullname = "custum_cdfgen",estimated = TRUE)
  
  # Perform the KS test (ignoring the ties warning)
  ks_test <- ks.test(data$growthrate, "custom_cdfgen")
  
  # Perform the Anderson-Darling test
  ad_test <- goftest::ad.test(data$growthrate, "custom_cdfgen")
  
  
  
  # Create a temporary data frame for this location
  temp_new_tests <- data.frame(
    LOCATION = location,
    CVM_Statistic = cvm_test$statistic,
    CVM_P_Value = cvm_test$p.value,
    KS_Statistic = ks_test$statistic,
    KS_P_Value = ks_test$p.value,
    AD_Statistic = ad_test$statistic,
    AD_P_Value = ad_test$p.value,
    stringsAsFactors = FALSE
  )
  
  # Append to the new_tests_df
  new_tests_df <- rbind(new_tests_df, temp_new_tests)
}

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(data$growthrate, "custom_cdfgen"): ties should not
## be present for the Kolmogorov-Smirnov test

# Remove row names
row.names(new_tests_df) <- NULL
# Check the results
print(new_tests_df)

##     LOCATION CVM_Statistic CVM_P_Value KS_Statistic KS_P_Value AD_Statistic
## 1        AUT      6.840690           0   0.02626325 0.93295327    0.5242094
## 2        BEL      6.839434           0   0.04727286 0.30228427    1.1546632
## 3        BGR      3.153341           0   0.05525106 0.92671770    0.2920035
## 4        BRA      6.472535           0   0.05384958 0.20569314    1.0551119
## 5        CAN      6.811084           0   0.03341635 0.73364142    0.4325175
## 6        CHL      4.744237           0   0.03622612 0.95827619    0.3487869
## 7        COL      4.904178           0   0.04898478 0.68911464    0.3809371
## 8        CZE      4.795408           0   0.06973007 0.25397407    0.7382712
## 9        DEU      6.805557           0   0.04034735 0.49800913    0.5751870
## 10       DNK      6.787001           0   0.06153717 0.09378882    0.9215232
## 11      EA19      6.534645           0   0.03080705 0.85723867    0.5711358
## 12       ESP      6.653673           0   0.04361094 0.39844197    0.7001226
## 13       EST      3.541550           0   0.04032355 0.99166996    0.1781252
## 14 EU27_2020      6.543984           0   0.03078165 0.85791316    0.6310641
## 15       FIN      6.780422           0   0.03117144 0.80680282    0.3964707
## 16       FRA      6.836959           0   0.03689528 0.61384013    0.2390119
## 17       G-7      6.892043           0   0.03426315 0.70480763    0.7657385
## 18       GBR      6.840215           0   0.03741417 0.59598904    0.7763685
## 19       GRC      6.628284           0   0.03884463 0.54744685    0.6932776
## 20       HRV      3.575415           0   0.04359712 0.98016994    0.2608620
## 21       HUN      5.520531           0   0.03042130 0.96283389    0.3387494
## 22       IND      4.237703           0   0.03315110 0.99441447    0.2517268
## 23       IRL      6.305785           0   0.03292093 0.79696101    0.5141934
## 24       ISL      3.616025           0   0.05392443 0.88865855    0.4684780
## 25       ISR      4.910294           0   0.04599094 0.76099948    0.4454438
## 26       ITA      6.787498           0   0.03261139 0.76054006    0.3798622
## 27       KOR      4.829154           0   0.03419512 0.95590236    0.3204812
## 28       LTU      3.214317           0   0.05519043 0.87158504    0.3838319
## 29       LUX      6.523097           0   0.04063907 0.48864313    0.5564960
## 30       LVA      3.187233           0   0.06777362 0.79191737    0.4559849
## 31       MEX      6.261829           0   0.04645804 0.47064363    0.7438120
## 32       NLD      6.655158           0   0.05306050 0.18565470    0.8696826
## 33       NOR      6.587974           0   0.04350477 0.40148650    1.1350323
## 34      OECD      6.593498           0   0.03833639 0.61200149    0.6577788
## 35     OECDE      6.906057           0   0.02706966 0.91657809    0.4020900
## 36       POL      5.496672           0   0.04730600 0.57668244    0.7816362
## 37       PRT      6.662899           0   0.03155940 0.79462026    0.5786589
## 38       ROU      3.236859           0   0.09344335 0.39789422    0.8682117
## 39       RUS      4.580041           0   0.04859118 0.80027205    0.3796881
## 40       SVK      4.791431           0   0.05233552 0.57155963    0.7502016
## 41       SVN      4.565960           0   0.03798027 0.94909546    0.2290207
## 42       SWE      6.794399           0   0.03441085 0.69973726    0.6158595
## 43       TUR      5.507517           0   0.04178847 0.72914248    0.5312969
## 44       USA      6.929336           0   0.03626539 0.63561155    0.7650453
##    AD_P_Value
## 1   0.7221462
## 2   0.2854093
## 3   0.9439014
## 4   0.3295608
## 5   0.8158918
## 6   0.8974172
## 7   0.8672814
## 8   0.5275376
## 9   0.6716642
## 10  0.4011389
## 11  0.6755893
## 12  0.5586393
## 13  0.9952681
## 14  0.6190177
## 15  0.8521561
## 16  0.9758874
## 17  0.5063292
## 18  0.4983184
## 19  0.5643823
## 20  0.9643042
## 21  0.9063922
## 22  0.9694404
## 23  0.7322679
## 24  0.7787852
## 25  0.8025915
## 26  0.8683891
## 27  0.9219346
## 28  0.8643584
## 29  0.6899363
## 30  0.7915396
## 31  0.5232353
## 32  0.4333306
## 33  0.2935660
## 34  0.5950203
## 35  0.8465826
## 36  0.4943509
## 37  0.6683042
## 38  0.4340337
## 39  0.8684600
## 40  0.5181928
## 41  0.9803512
## 42  0.6330335
## 43  0.7149584
## 44  0.5068560

Laplacefittest=new_tests_df

# Create an empty list to store individual data frames
list_ad_results <- list()

# Calculate n and min_growrate once to reuse later
n <- length(data$growthrate)
min_growrate <- min(data$growthrate)

# Function to compute Anderson-Darling statistic for a given location
compute_QAD_gen <- function(location) {
  # Subset data by location and get parameters
  data_sub <- ip[ip$LOCATION == location,]
  mu <- result_mle[result_mle$LOCATION == location, "mu"]
  a <- result_mle[result_mle$LOCATION == location, "a"]
  b <- result_mle[result_mle$LOCATION == location, "b"]
  
  custom_cdfgen <- function(x) {
    plaplace_modified(x, m = mu, a = a, b = b)
  }
  
  # Integrate
  QAD_gen <- n * integrate(function(y) {
    Fn_y <- mean(data_sub$growthrate <= y)
    F_y <- custom_cdfgen(y)
    w_y <- weight_function(y)
    return((F_y - Fn_y)^2 * w_y)
  }, lower = min_growrate, upper = max(data_sub$growthrate))$value
  
  return(data.frame(
    LOCATION = location,
    AndersonDarling = QAD_gen,
    stringsAsFactors = FALSE
  ))
}



# Combine individual data frames into one
ad_results <- do.call("rbind", list_ad_results)

# Clear row names

# Initialize a dataframe to store the results
lr_test_results <- data.frame(
  LOCATION = character(),
  LR_Statistic_b1 = numeric(),
  p_value_b1 = numeric(),
  LR_Statistic_b2 = numeric(),
  p_value_b2 = numeric(),
  stringsAsFactors = FALSE
)

# Loop over each unique location
for (location in unique(ip$LOCATION)) {
  # Subset data
  data <- ip[ip$LOCATION == location,]
  
  # Retrieve estimated parameters from result_mle
  mu <- result_mle[result_mle$LOCATION == location, "mu"]
  a <- result_mle[result_mle$LOCATION == location, "a"]
  b <- result_mle[result_mle$LOCATION == location, "b"]
  
  # Custom CDFs
  custom_cdf <- function(x) {
    plaplace_modified(x, m = mu, a = a, b = b)
  }
  
  custom_cdfone <- function(x) {
    plaplace_modified(x, m = mu, a = a, b = 1)
  }

  custom_cdf2 <- function(x) {
    plaplace_modified(x, m = mu, a = a, b = 2)
  }
  
  # Compute log-likelihood for b=1
  logLik_null1 <- sum(log(sapply(data$growthrate, custom_cdfone)))
  logLik_alternative1 <- sum(log(sapply(data$growthrate, custom_cdf)))
  
  # Compute the likelihood ratio test statistic for b=1
  lr_stat1 <- 2 * (logLik_alternative1 - logLik_null1)
  
  # Compute p-value for b=1
  p_value1 <- 1 - pchisq(lr_stat1, df = 1)

  # Compute log-likelihood for b=2
  logLik_null2 <- sum(log(sapply(data$growthrate, custom_cdf2)))
  logLik_alternative2 <- sum(log(sapply(data$growthrate, custom_cdf)))
  
  # Compute the likelihood ratio test statistic for b=2
  lr_stat2 <- 2 * (logLik_alternative2 - logLik_null2)
  
  # Compute p-value for b=2
  p_value2 <- 1 - pchisq(lr_stat2, df = 1)

  # Temporary data frame to store the result for this location
  temp_result <- data.frame(
    LOCATION = location,
    LR_Statistic_b1 = lr_stat1,
    p_value_b1 = p_value1,
    LR_Statistic_b2 = lr_stat2,
    p_value_b2 = p_value2,
  stringsAsFactors = FALSE
  )
  
  # Append the results
  lr_test_results <- rbind(lr_test_results, temp_result)
}

# Display the results
print(lr_test_results)

##     LOCATION LR_Statistic_b1               p_value_b1 LR_Statistic_b2
## 1        AUT      -7.1317769 1.0000000000000000000000      177.769108
## 2        BEL     -18.4430927 1.0000000000000000000000      181.348092
## 3        BGR      -5.0100567 1.0000000000000000000000       20.510248
## 4        BRA      23.8002557 0.0000010686826416028694      442.307704
## 5        CAN     -60.1725493 1.0000000000000000000000       59.551304
## 6        CHL     -22.5618465 1.0000000000000000000000       24.343900
## 7        COL      -9.6783555 1.0000000000000000000000      106.098209
## 8        CZE      42.9853483 0.0000000000551513279490      359.818465
## 9        DEU     -25.8740732 1.0000000000000000000000      166.404697
## 10       DNK     -15.8537500 1.0000000000000000000000      132.993729
## 11      EA19     -38.7083860 1.0000000000000000000000       97.177928
## 12       ESP      31.7832654 0.0000000172371525941628      405.121964
## 13       EST     -13.1103666 1.0000000000000000000000       22.818305
## 14 EU27_2020     -40.1042695 1.0000000000000000000000       93.794805
## 15       FIN      -0.2883385 1.0000000000000000000000      239.754437
## 16       FRA     -42.7963998 1.0000000000000000000000       79.228608
## 17       G-7     -70.2113913 1.0000000000000000000000       76.688661
## 18       GBR     -22.6912235 1.0000000000000000000000      197.966251
## 19       GRC       3.9663971 0.0464169894952990436110      242.722720
## 20       HRV      -7.0582136 1.0000000000000000000000       20.246714
## 21       HUN      -8.7260049 1.0000000000000000000000      149.399610
## 22       IND      -8.0645301 1.0000000000000000000000       41.702736
## 23       IRL     -15.0884658 1.0000000000000000000000      155.715177
## 24       ISL      -7.0862266 1.0000000000000000000000       59.382911
## 25       ISR     -22.3507091 1.0000000000000000000000       43.864356
## 26       ITA       5.2699959 0.0216960632840166312008      266.118877
## 27       KOR     -31.6378085 1.0000000000000000000000       21.443918
## 28       LTU     -24.9896417 1.0000000000000000000000       -2.015463
## 29       LUX     -13.9464414 1.0000000000000000000000      169.062083
## 30       LVA     -15.1075521 1.0000000000000000000000       10.446931
## 31       MEX      -2.6941778 1.0000000000000000000000      158.822908
## 32       NLD     -19.5177549 1.0000000000000000000000      155.429314
## 33       NOR      43.4715055 0.0000000000430178115352      509.431627
## 34      OECD     -38.4059581 1.0000000000000000000000      112.088280
## 35     OECDE     -47.5139093 1.0000000000000000000000      127.647015
## 36       POL      11.7261392 0.0006162831202011220810      246.037831
## 37       PRT     -19.0522276 1.0000000000000000000000      119.652539
## 38       ROU       5.3754186 0.0204224404129998982071      105.913230
## 39       RUS      12.8967235 0.0003291578525166372060      140.068989
## 40       SVK      65.6011679 0.0000000000000005551115      501.981250
## 41       SVN     -12.2944386 1.0000000000000000000000       71.230067
## 42       SWE      32.8089100 0.0000000101677727348815      502.530471
## 43       TUR       5.2564135 0.0218660341601065733030      185.911391
## 44       USA     -43.3043411 1.0000000000000000000000      202.381503
##                p_value_b2
## 1  0.00000000000000000000
## 2  0.00000000000000000000
## 3  0.00000593128309978042
## 4  0.00000000000000000000
## 5  0.00000000000001187939
## 6  0.00000080581507966304
## 7  0.00000000000000000000
## 8  0.00000000000000000000
## 9  0.00000000000000000000
## 10 0.00000000000000000000
## 11 0.00000000000000000000
## 12 0.00000000000000000000
## 13 0.00000178061629851900
## 14 0.00000000000000000000
## 15 0.00000000000000000000
## 16 0.00000000000000000000
## 17 0.00000000000000000000
## 18 0.00000000000000000000
## 19 0.00000000000000000000
## 20 0.00000680702890731855
## 21 0.00000000000000000000
## 22 0.00000000010626011182
## 23 0.00000000000000000000
## 24 0.00000000000001298961
## 25 0.00000000003519406988
## 26 0.00000000000000000000
## 27 0.00000364329788316020
## 28 1.00000000000000000000
## 29 0.00000000000000000000
## 30 0.00122853396538846749
## 31 0.00000000000000000000
## 32 0.00000000000000000000
## 33 0.00000000000000000000
## 34 0.00000000000000000000
## 35 0.00000000000000000000
## 36 0.00000000000000000000
## 37 0.00000000000000000000
## 38 0.00000000000000000000
## 39 0.00000000000000000000
## 40 0.00000000000000000000
## 41 0.00000000000000000000
## 42 0.00000000000000000000
## 43 0.00000000000000000000
## 44 0.00000000000000000000

library(tseries)  # For adf.test
library(urca)     # For ur.pp
adf_pp_results <- data.frame(
  LOCATION = character(),
  ADF_Statistic = numeric(),
  ADF_P_Value = numeric(),
  PP_Statistic = numeric(),
  PP_P_Value = numeric(),
  stringsAsFactors = FALSE
)


# Loop through each unique location
for (location in unique(ip$LOCATION)) {
  # Subset the data for this location
  data <- ip[ip$LOCATION == location,]
  
  # Perform Augmented Dickey-Fuller Test
  adf_test <- adf.test(data$growthrate)
  
  # Perform Phillips-Perron Test
  pp_test <- ur.pp(data$growthrate, type="Z-alpha")
  
  # Create a temporary data frame for this location
  temp_results <- data.frame(
    LOCATION = location,
    ADF_Statistic = adf_test$statistic,
    ADF_P_Value = adf_test$p.value,
    PP_Statistic = pp_test@teststat[1],
    PP_P_Value = pp_test@cval[1],
     stringsAsFactors = FALSE
  )
  
  # Append to the adf_pp_results
  adf_pp_results <- rbind(adf_pp_results, temp_results)
}

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

## Warning in adf.test(data$growthrate): p-value smaller than printed p-value

# Create an empty dataframe to store filtered data
filtered_data <- data.frame()

# Define Z-score threshold (usually 2 or 3 for 95% and 99% confidence)
threshold <- 3.5
# Loop through each unique LOCATION
for (location in unique(ip$LOCATION)) {
  # Subset data for the current LOCATION
  data_subset <- ip[ip$LOCATION == location,]
  
  # Calculate mean and standard deviation for 'growthrate' for this LOCATION
  mean_growthrate <- mean(data_subset$growthrate, na.rm = TRUE)
  sd_growthrate <- sd(data_subset$growthrate, na.rm = TRUE)
  
  # Calculate Z-score
  data_subset$Z_score <- abs((data_subset$growthrate - mean_growthrate) / sd_growthrate)
  
  # Remove outliers
  data_no_outliers <- subset(data_subset, Z_score <= threshold)
  
  # Drop the Z-score column as it's no longer needed
  data_no_outliers$Z_score <- NULL
  
  # Append the data_no_outliers to the filtered_data dataframe
  filtered_data <- rbind(filtered_data, data_no_outliers)
}

# Create a new dataframe with just the rows where LOCATION == "USA"
usa_data <- subset(ip, LOCATION == "USA")

# Create a new column counting the rows
usa_data$row_count <- seq_len(nrow(usa_data))

# Initialize an empty dataframe to store results
result_mle_USA <- data.frame(Start_Year = numeric(), End_Year = numeric(), mu = numeric(), a = numeric(), b = numeric())

# Define the row increment
row_increment <- 60

# Initialize start and end for the first iteration
current_start_row <- 1
current_end_row <- row_increment

# Loop through the data, incrementing by 60 rows each time
while (current_end_row <= nrow(usa_data)) {
  
  # Subset the data for this row range
  data_subset <- subset(usa_data, row_count >= current_start_row & row_count <= current_end_row)
  
  # Get the minimum and maximum TIME within this subset
  start_year <- min(data_subset$TIME)
  end_year <- max(data_subset$TIME)
  
  # Fit the standard distribution
  fit_standard_gen <- MASS::fitdistr(data_subset$growthrate, dlaplace_standard, start = list(mu = 0, a = 1))
  
  # Define the objective function for b
  objective_function_b <- function(b, data, m, a) {
    log_density <- log(dlaplace_modified(data, m = m, a = a, b = b))
    neg_log_likelihood <- -sum(log_density, na.rm = TRUE)
    return(neg_log_likelihood)
  }
  
  # Take m and a from the fit_standard_gen
  m <- fit_standard_gen$estimate['mu']
  a <- fit_standard_gen$estimate['a']
  
  # Optimization for b, given m and a
  result_bgen <- optim(1, fn = objective_function_b, data = data_subset$growthrate, m = m, a = a, method = "L-BFGS-B", lower = 0.6, upper = 3.3)
  
  # Resulting estimate for b
  b_estimates <- result_bgen$par
  
  # Append the results for this row range to the result_mle_USA dataframe
  result_mle_USA <- rbind(result_mle_USA, data.frame(
    Start_Year = start_year,
    End_Year = end_year,
    mu = m,
    a = a,
    b = b_estimates
  ))
  
  # Move the row counter by 60
  current_start_row <- current_end_row + 1
  current_end_row <- current_end_row + row_increment
}

# Remove row names for a cleaner look
row.names(result_mle_USA) <- NULL

# View the results
print(result_mle_USA)

##   Start_Year End_Year           mu           a        b
## 1   1972 Aug 1977 Jul 0.0042709832 0.007212521 1.047898
## 2   1977 Aug 1982 Jul 0.0009321971 0.007188490 1.245107
## 3   1982 Aug 1987 Jul 0.0033798141 0.004847565 1.179829
## 4   1987 Aug 1992 Jul 0.0024205161 0.004345379 1.448927
## 5   1992 Aug 1997 Jul 0.0038761398 0.003383006 1.384323
## 6   1997 Aug 2002 Jul 0.0022113511 0.004544202 1.665361
## 7   2002 Aug 2007 Jul 0.0014145467 0.003742243 1.073655

# Create a new dataframe with just the rows where LOCATION == "USA"
usa_data <- subset(ip, LOCATION == "USA")

# Create a new column counting the rows
usa_data$row_count <- seq_len(nrow(usa_data))

# Initialize an empty dataframe to store results
result_mle_USA <- data.frame(Start_Year = numeric(), End_Year = numeric(), mu = numeric(), a = numeric(), b = numeric())

# Define the row increment
row_increment <- 120

# Initialize start and end for the first iteration
current_start_row <- 1
current_end_row <- row_increment

# Loop through the data, incrementing by 120 rows each time
while (current_end_row <= nrow(usa_data)) {
  
  # Subset the data for this row range
  data_subset <- subset(usa_data, row_count >= current_start_row & row_count <= current_end_row)
  
  # Get the minimum and maximum TIME within this subset
  start_year <- min(data_subset$TIME)
  end_year <- max(data_subset$TIME)
  
  # Fit the standard distribution
  fit_standard_gen <- MASS::fitdistr(data_subset$growthrate, dlaplace_standard, start = list(mu = 0, a = 1))
  
  # Define the objective function for b
  objective_function_b <- function(b, data, m, a) {
    log_density <- log(dlaplace_modified(data, m = m, a = a, b = b))
    neg_log_likelihood <- -sum(log_density, na.rm = TRUE)
    return(neg_log_likelihood)
  }
  
  # Take m and a from the fit_standard_gen
  m <- fit_standard_gen$estimate['mu']
  a <- fit_standard_gen$estimate['a']
  
  # Optimization for b, given m and a
  result_bgen <- optim(1, fn = objective_function_b, data = data_subset$growthrate, m = m, a = a, method = "L-BFGS-B", lower = 0.6, upper = 3.3)
  
  # Resulting estimate for b
  b_estimates <- result_bgen$par
  
  # Append the results for this row range to the result_mle_USA dataframe
  result_mle_USA <- rbind(result_mle_USA, data.frame(
    Start_Year = start_year,
    End_Year = end_year,
    mu = m,
    a = a,
    b = b_estimates
  ))
  
  # Move the row counter by 120
  current_start_row <- current_end_row + 1
  current_end_row <- current_end_row + row_increment
}

# Remove row names for a cleaner look
row.names(result_mle_USA) <- NULL

# View the results
print(result_mle_USA)

##   Start_Year End_Year          mu           a        b
## 1   1972 Aug 1982 Jul 0.002292676 0.007301194 1.143287
## 2   1982 Aug 1992 Jul 0.002822975 0.004733478 1.327824
## 3   1992 Aug 2002 Jul 0.003424343 0.003985547 1.430369

# could not figure out 
# Load the required package
library(ReIns)

# Initialize an empty data frame to store results
result_hill <- data.frame(
  Location = character(),
  Hill_Estimate = numeric(),
  stringsAsFactors = FALSE
)

# Loop through each unique location in the 'ip' data frame
for (loc in unique(ip$LOCATION)) {
  
  # Subset the data for this location
  data_subset <- subset(ip, LOCATION == loc)
  
  # Take the absolute value of the 'growthrate' column and add 1
  data_subset$growthrate <- abs(data_subset$growthrate) + 1
  
  # Run the Hill estimator
  hill_output <- tryCatch(
    Hill(data_subset$growthrate, k = TRUE, logk = FALSE, plot = FALSE),
    error = function(e) return(NULL)
  )
  
  # Check if Hill estimator was successful
  if (!is.null(hill_output) && !is.null(hill_output$alpha)) {
    
    # Extract the Hill estimate
    hill_estimate <- hill_output$alpha
    
    # Append the results for this location to the result_hill dataframe
    result_hill <- rbind(result_hill, data.frame(
      Location = loc,
      Hill_Estimate = hill_estimate
    ))
  } else {
    # Optionally, print a warning message if Hill estimator was unsuccessful for this location
    cat("Warning: Hill estimator unsuccessful for location", loc, "\n")
  }
}

## Warning: Hill estimator unsuccessful for location AUT 
## Warning: Hill estimator unsuccessful for location BEL 
## Warning: Hill estimator unsuccessful for location BGR 
## Warning: Hill estimator unsuccessful for location BRA 
## Warning: Hill estimator unsuccessful for location CAN 
## Warning: Hill estimator unsuccessful for location CHL 
## Warning: Hill estimator unsuccessful for location COL 
## Warning: Hill estimator unsuccessful for location CZE 
## Warning: Hill estimator unsuccessful for location DEU 
## Warning: Hill estimator unsuccessful for location DNK 
## Warning: Hill estimator unsuccessful for location EA19 
## Warning: Hill estimator unsuccessful for location ESP 
## Warning: Hill estimator unsuccessful for location EST 
## Warning: Hill estimator unsuccessful for location EU27_2020 
## Warning: Hill estimator unsuccessful for location FIN 
## Warning: Hill estimator unsuccessful for location FRA 
## Warning: Hill estimator unsuccessful for location G-7 
## Warning: Hill estimator unsuccessful for location GBR 
## Warning: Hill estimator unsuccessful for location GRC 
## Warning: Hill estimator unsuccessful for location HRV 
## Warning: Hill estimator unsuccessful for location HUN 
## Warning: Hill estimator unsuccessful for location IND 
## Warning: Hill estimator unsuccessful for location IRL 
## Warning: Hill estimator unsuccessful for location ISL 
## Warning: Hill estimator unsuccessful for location ISR 
## Warning: Hill estimator unsuccessful for location ITA 
## Warning: Hill estimator unsuccessful for location KOR 
## Warning: Hill estimator unsuccessful for location LTU 
## Warning: Hill estimator unsuccessful for location LUX 
## Warning: Hill estimator unsuccessful for location LVA 
## Warning: Hill estimator unsuccessful for location MEX 
## Warning: Hill estimator unsuccessful for location NLD 
## Warning: Hill estimator unsuccessful for location NOR 
## Warning: Hill estimator unsuccessful for location OECD 
## Warning: Hill estimator unsuccessful for location OECDE 
## Warning: Hill estimator unsuccessful for location POL 
## Warning: Hill estimator unsuccessful for location PRT 
## Warning: Hill estimator unsuccessful for location ROU 
## Warning: Hill estimator unsuccessful for location RUS 
## Warning: Hill estimator unsuccessful for location SVK 
## Warning: Hill estimator unsuccessful for location SVN 
## Warning: Hill estimator unsuccessful for location SWE 
## Warning: Hill estimator unsuccessful for location TUR 
## Warning: Hill estimator unsuccessful for location USA

# Remove row names for a cleaner look
row.names(result_hill) <- NULL

# View the results
print(result_hill)

## [1] Location      Hill_Estimate
## <0 rows> (or 0-length row.names)

pdf_student_t <- function(x, d, l, v) {
  # Calculate the Gamma function terms
  gamma_term1 <- gamma((v + 1) / 2)
  gamma_term2 <- gamma(v / 2)
  
  # Calculate the constant in front of the formula
  constant <- gamma_term1 / (sqrt(v * pi) *d* gamma_term2)
  
  # Calculate the main expression
  main_expr <- (1 + ((1 / v) * ((x - l) / d)^2))^(-((v + 1) / 2))
  
  # Calculate the full PDF value
  pdf_value <- constant * main_expr
 
  
  
  return(pdf_value)

}

pcustom_student_t <- function(x, d, l, v) {
  sapply(x, function(x_i) {
    integrate(function(u) pdf_student_t(u, d, l, v), -Inf, x_i)$value
  })
}

# Step 2: Create a custom function capturing the parameters
 cdf_student_t<- function(x) {
  pcustom_student_t(x, 
                     
                    d = fit$estimate['d'], 
                    l = fit$estimate['l'],
                    v = fit$estimate['v'])
   
  
  return(-sum(log_density))

}

ip_usa=ip[ip$LOCATION=="USA",]

library(MASS)
 fit2=fitdistr(ip_usa$growthrate, "t", 
                 start = list(m = mean(ip_usa$growthrate), s = sd(ip_usa$growthrate), df = 3), 
                 method = "L-BFGS-B", 
                 lower = c(-3, 0.0001, 1),
                 upper = c(3, 3, 10))
 objective_function <- function(params, data) {
  d <- params[1]
  l <- params[2]
  v <- params[3]
  
  log_density <- log(pdf_student_t(data, d, l, v))
  return(-sum(log_density))}
 optim(
  par = c(d = fit2$estimate["m"], l = fit2$estimate["s"], v = fit2$estimate["df"]),  # use fitted values here
  fn = objective_function,
  data = ip_usa$growthrate,
   method = "Nelder-Mead", 
                 #lower = c(-3, 0.0001, 1),
                #upper = c(3, 3, 30)
 
)

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## $par
##         d.m         l.s        v.df 
## 0.005196051 0.002517946 4.461651929 
## 
## $value
## [1] -1521.843
## 
## $counts
## function gradient 
##      208       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

# Initialize an empty data frame to store results
studentT <- data.frame(
  location = character(),
  opt_m = numeric(),
  opt_s = numeric(),
  opt_df = numeric(),
  stringsAsFactors = FALSE
)

# Unique locations
unique_locations <- unique(ip$LOCATION)

# Loop over each unique location
for(loc in unique_locations) {
  
  # Subset the data for the current location
  ip_subset <- subset(ip, LOCATION== loc)
  
  
    
    # Fit initial distribution
    

    # Perform optimization
    opt_result <- optim(
      par = c(d = mean(ip_subset$growthrate), l =sd(ip_subset$growthrate), v = 3),
      fn = objective_function,
      data = ip_subset$growthrate,
      method = "Nelder-Mead", 
      #lower = c(-3, 0.0001, 1),
      #upper = c(3, 3, 30)
    )
    
    # Store results in data frame
    new_row <- data.frame(
      location = loc,
      opt_m = opt_result$par[1],
      opt_s = opt_result$par[2],
      opt_df = opt_result$par[3],
      stringsAsFactors = FALSE
      
    
    )
    
  studentT <- bind_rows(studentT, new_row)
  }

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

## Warning in log(pdf_student_t(data, d, l, v)): NaNs produced

# View the results data frame
print(studentT)

##         location       opt_m       opt_s    opt_df
## d...1        AUT 0.014891368 0.002172285  3.626243
## d...2        BEL 0.016617616 0.002108240  4.333750
## d...3        BGR 0.019259918 0.005444479  8.322290
## d...4        BRA 0.016402045 0.003230761  2.879242
## d...5        CAN 0.009822828 0.002013870 10.895452
## d...6        CHL 0.017497723 0.003274205  9.377957
## d...7        COL 0.016351828 0.002488640  4.153267
## d...8        CZE 0.018511698 0.002454261  3.077221
## d...9        DEU 0.012108469 0.001754221  4.350692
## d...10       DNK 0.024248283 0.001249174  4.031760
## d...11      EA19 0.008775137 0.001779436  5.894504
## d...12       ESP 0.011623353 0.001834276  2.314585
## d...13       EST 0.021516370 0.005966453  6.789221
## d...14 EU27_2020 0.008687726 0.001812815  5.930977
## d...15       FIN 0.016820641 0.002817865  3.008355
## d...16       FRA 0.012102786 0.001105085  7.310810
## d...17       G-7 0.005354759 0.002076978  7.589369
## d...18       GBR 0.009899258 0.001629723  3.953371
## d...19       GRC 0.018948439 0.001960556  3.144176
## d...20       HRV 0.021140278 0.001496353  5.612522
## d...21       HUN 0.021460122 0.004213812  3.872693
## d...22       IND 0.012087270 0.005809426  4.627098
## d...23       IRL 0.032563222 0.006574945  4.234302
## d...24       ISL 0.022097759 0.006827743  3.565743
## d...25       ISR 0.015673568 0.003843476  6.188346
## d...26       ITA 0.013454374 0.001160977  2.967590
## d...27       KOR 0.019377570 0.006454552 10.508622
## d...28       LTU 0.066013885 0.006446277 75.100167
## d...29       LUX 0.030526682 0.001549647  3.590702
## d...30       LVA 0.013201142 0.005655534 10.238238
## d...31       MEX 0.009069864 0.001642735  3.512235
## d...32       NLD 0.018981652 0.001775325  4.185316
## d...33       NOR 0.020627384 0.003742288  2.369138
## d...34      OECD 0.004778253 0.002292763 10.693738
## d...35     OECDE 0.007151555 0.002116932  5.364809
## d...36       POL 0.018594318 0.004374682  2.952431
## d...37       PRT 0.023919922 0.001582256  4.791280
## d...38       ROU 0.012295971 0.003292646  2.312019
## d...39       RUS 0.013689073 0.001348019  2.296780
## d...40       SVK 0.022325621 0.003109459  1.628996
## d...41       SVN 0.018931374 0.002985321  3.714934
## d...42       SWE 0.013207260 0.002065843  3.321444
## d...43       TUR 0.025524664 0.005023437  3.070944
## d...44       USA 0.005195909 0.002517305  4.461275

# Custom cauchy function
cauchy_custom <- function(x, delta, p) {
  1 / (delta * pi) * (1 + ((x - p) / delta)^2)^(-1)
}

# Objective function for MLE
 cauchy_custom_objective <- function(params, data) {
  delta <- params[1]
  p <- params[2]
  
  # Calculate likelihood
  likelihood <- prod(cauchy_custom(data, delta, p))
  
  # Return negative log-likelihood because optim() looks for the minimum
  return(-log(likelihood))
}

cauchy_custom_cdf <- function(x, delta, p) {
  sapply(x, function(x_i) {
    integrate(function(u) cauchy_custom_pdf(u, delta, p), -Inf, x_i)$value
  })
}

#test optional techniques 
# Assume ip_usa$growthrate contains the data you're interested in
initial_values <- c(1, 0.5)  # Replace with sensible initial values

opt_result_usa <- optim(
  par = initial_values,
  fn = cauchy_custom_objective,
  data = ip_usa$growthrate,
  method = "Nelder-Mead"
    # Upper bounds for delta and p
)

# Initialize an empty data frame to store results with column names
cauchy_params <- data.frame(
  LOCATION = character(),
  delta_opt = numeric(),
  p_opt = numeric(),
  stringsAsFactors = FALSE
)

# Unique locations in your data
unique_locations <- unique(ip$LOCATION)

# Loop through each unique location
for (loc in unique_locations) {
  # Use tryCatch to skip errors
  
    # Subset data for the current location
    ip_subset <- subset(ip, LOCATION == loc)
    
    # Initial parameter estimates
    initial_values <- c(delta = 1, p = 0.5)
    
    # Perform optimization for the current location
    opt_result <- optim(
      par = initial_values,
      fn = cauchy_custom_objective,
      data = ip_subset$growthrate,
      method = "Nelder-Mead"
    )
    
    # Create a new row for results
    new_row <- data.frame(
      LOCATION = loc,
      delta_opt = opt_result$par[1],
      p_opt = opt_result$par[2],
      stringsAsFactors = FALSE
    )
    
    # Append the new row to the results data frame
    cauchy_params <- rbind(cauchy_params, new_row)
    
  }

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

## Warning in log(likelihood): NaNs produced

# Initialize an empty data frame to store test results
cauchy_tests <- data.frame(
  LOCATION = character(),
  CVM_Statistic = numeric(),
  CVM_P_Value = numeric(),
  KS_Statistic = numeric(),
  KS_P_Value = numeric(),
  AD_Statistic = numeric(),
  AD_P_Value = numeric(),
  stringsAsFactors = FALSE
)

# Loop through each unique location
for (loc in unique(ip$LOCATION)) {
  tryCatch({
     # Subset data for the current location
    ip_subset <- subset(ip, LOCATION == loc)
    
    # Get the estimated parameters from cauchy_params
    delta <- cauchy_params[cauchy_params$LOCATION == loc, "delta_opt"]
    p <- cauchy_params[cauchy_params$LOCATION == loc, "p_opt"]
    
    # Custom CDF function using the estimated parameters
    custom_cdfgen <- function(x) {
      cauchy_custom_cdf(x, delta, p)
    }
    
    # Perform the CVM test
    cvm_test <- goftest::cvm.test(ip_subset$growthrate, nullname = "custom_cdfgen", estimated = TRUE)
    
    # Perform the KS test
    ks_test <- ks.test(ip_subset$growthrate, "custom_cdfgen")
    
    # Perform the Anderson-Darling test
    ad_test <- goftest::ad.test(ip_subset$growthrate, "custom_cdfgen")
    
    temp_new_tests <- data.frame(
      LOCATION = loc,
      CVM_Statistic = cvm_test$statistic,
      CVM_P_Value = cvm_test$p.value,
      KS_Statistic = ks_test$statistic,
      KS_P_Value = ks_test$p.value,
      AD_Statistic = ad_test$statistic,
      AD_P_Value = ad_test$p.value,
      stringsAsFactors = FALSE
    )
    
    # Append to the cauchy_tests
    cauchy_tests <- rbind(cauchy_tests, temp_new_tests)
    
  }, error = function(e) {
    message("An error occurred for location ", loc, "; skipping to next location.")
  })
}

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location AUT; skipping to next location.

## An error occurred for location BEL; skipping to next location.

## An error occurred for location BGR; skipping to next location.

## An error occurred for location BRA; skipping to next location.

## An error occurred for location CAN; skipping to next location.

## An error occurred for location CHL; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location COL; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location CZE; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location DEU; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location DNK; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location EA19; skipping to next location.

## An error occurred for location ESP; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location EST; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location EU27_2020; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location FIN; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location FRA; skipping to next location.

## An error occurred for location G-7; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location GBR; skipping to next location.

## An error occurred for location GRC; skipping to next location.

## An error occurred for location HRV; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location HUN; skipping to next location.

## An error occurred for location IND; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location IRL; skipping to next location.

## An error occurred for location ISL; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location ISR; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location ITA; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location KOR; skipping to next location.

## An error occurred for location LTU; skipping to next location.

## An error occurred for location LUX; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location LVA; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location MEX; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location NLD; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location NOR; skipping to next location.

## An error occurred for location OECD; skipping to next location.

## An error occurred for location OECDE; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location POL; skipping to next location.

## An error occurred for location PRT; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location ROU; skipping to next location.

## An error occurred for location RUS; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location SVK; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location SVN; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location SWE; skipping to next location.

## Warning in ks.test.default(ip_subset$growthrate, "custom_cdfgen"): ties should
## not be present for the Kolmogorov-Smirnov test

## An error occurred for location TUR; skipping to next location.

## An error occurred for location USA; skipping to next location.

# Remove row names
row.names(cauchy_tests) <- NULL

# Check the results
print(cauchy_tests)

## [1] LOCATION      CVM_Statistic CVM_P_Value   KS_Statistic  KS_P_Value   
## [6] AD_Statistic  AD_P_Value   
## <0 rows> (or 0-length row.names)

gdp_distribution

2023-08-09