1. Load the data
library(readxl)
data <- read_excel("Final_reg_data_for_maize_DE.xlsx")
data
# Rename columns for convenience
colnames(data) <- c("Year","Population","Producer_Price_Index","Producer_Price_USD",
"Avg_Max_Temp","Annual_Precip","GNI_per_capita","Cropland_area",
"Value_added_deflator","N2O_indirect","N2O_direct","GDP_deflator","NVWI")
str(data)
## tibble [31 × 13] (S3: tbl_df/tbl/data.frame)
## $ Year : num [1:31] 1991 1992 1993 1994 1995 ...
## $ Population : num [1:31] 80.3 80.9 81.5 81.8 82 ...
## $ Producer_Price_Index: num [1:31] 114.8 109.3 86.4 89 94.5 ...
## $ Producer_Price_USD : num [1:31] 214 217 162 170 204 ...
## $ Avg_Max_Temp : num [1:31] 12.8 13.4 12.6 13.7 12.9 ...
## $ Annual_Precip : num [1:31] 655 814 878 856 841 ...
## $ GNI_per_capita : num [1:31] 23506 26657 25751 27255 31829 ...
## $ Cropland_area : num [1:31] 11807 11708 11911 12037 12061 ...
## $ Value_added_deflator: num [1:31] 58.7 62.7 64.5 94.3 114.8 ...
## $ N2O_indirect : num [1:31] 0.0552 0.0608 0.0749 0.0696 0.0679 0.0823 0.0895 0.0784 0.0913 0.0929 ...
## $ N2O_direct : num [1:31] 0.245 0.27 0.333 0.309 0.302 ...
## $ GDP_deflator : num [1:31] 75.5 84.5 82.9 86.1 99.2 ...
## $ NVWI : num [1:31] 1030 1095 1119 989 871 ...
sum(is.na(data))
## [1] 0
summary(data)
## Year Population Producer_Price_Index Producer_Price_USD
## Min. :1991 Min. :80.30 Min. : 60.63 Min. :101.6
## 1st Qu.:1998 1st Qu.:81.20 1st Qu.: 78.00 1st Qu.:151.6
## Median :2006 Median :81.93 Median : 96.00 Median :177.4
## Mean :2006 Mean :81.91 Mean : 96.22 Mean :185.2
## 3rd Qu.:2014 3rd Qu.:82.19 3rd Qu.:107.25 3rd Qu.:215.8
## Max. :2021 Max. :83.70 Max. :140.61 Max. :286.4
## Avg_Max_Temp Annual_Precip GNI_per_capita Cropland_area
## Min. :10.97 Min. :564.2 Min. :23506 Min. :11708
## 1st Qu.:12.79 1st Qu.:743.9 1st Qu.:27183 1st Qu.:11967
## Median :13.20 Median :796.5 Median :37481 Median :12038
## Mean :13.26 Mean :797.2 Mean :37047 Mean :12011
## 3rd Qu.:13.71 3rd Qu.:866.8 3rd Qu.:46049 3rd Qu.:12074
## Max. :15.12 Max. :987.0 Max. :53272 Max. :12145
## Value_added_deflator N2O_indirect N2O_direct GDP_deflator
## Min. : 58.67 Min. :0.05520 Min. :0.2455 Min. : 67.07
## 1st Qu.: 85.71 1st Qu.:0.09015 1st Qu.:0.4007 1st Qu.: 83.69
## Median :100.00 Median :0.10440 Median :0.4641 Median : 99.23
## Mean :102.05 Mean :0.10396 Mean :0.4621 Mean : 97.53
## 3rd Qu.:123.31 3rd Qu.:0.12045 3rd Qu.:0.5353 3rd Qu.:110.75
## Max. :142.05 Max. :0.15310 Max. :0.6805 Max. :119.06
## NVWI
## Min. : 517.3
## 1st Qu.: 874.7
## Median :1456.2
## Mean :1705.9
## 3rd Qu.:2251.3
## Max. :4549.9
2. Check for stationarity
library(urca)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
##
## Dołączanie pakietu: 'dplyr'
## Następujące obiekty zostały zakryte z 'package:stats':
##
## filter, lag
## Następujące obiekty zostały zakryte z 'package:base':
##
## intersect, setdiff, setequal, union
# ---- Variables ----
vars <- c("Population","GNI_per_capita","Producer_Price_Index","Producer_Price_USD",
"Avg_Max_Temp","Annual_Precip","Cropland_area","Value_added_deflator",
"N2O_indirect","N2O_direct","GDP_deflator","NVWI")
# ---- Function to test stationarity ----
check_stationarity <- function(x){
x <- na.omit(x)
max_lag <- floor(4*(length(x)/100)^(1/4))
adf <- ur.df(x, type="trend", lags=max_lag, selectlags="AIC")
adf_diff <- ur.df(diff(x), type="drift", lags=max_lag, selectlags="AIC")
pp <- pp.test(x)
kpss <- kpss.test(x)
level_stat <- ifelse(adf@teststat[1] < adf@cval[2, "5pct"] & pp$p.value < 0.05 & kpss$p.value > 0.05,
"Stationary", "Non-stationary")
diff_stat <- ifelse(adf_diff@teststat[1] < adf_diff@cval[2, "5pct"], "Stationary", "Non-stationary")
integ <- ifelse(level_stat=="Stationary", "I(0)",
ifelse(diff_stat=="Stationary","I(1)","I(?)"))
return(c(Level=level_stat, First_Diff=diff_stat, Integration=integ))
}
# ---- Build stationarity table with automatic log-transform ----
stationarity_table <- lapply(vars, function(v){
x <- data[[v]]
res <- check_stationarity(x)
# If I(?), apply log transform and retest
if(res["Integration"]=="I(?)" & all(x>0)){
x <- log(x)
res <- check_stationarity(x)
}
data.frame(Variable=v,
Level_ADF_PP_KPSS=res["Level"],
First_Difference_ADF=res["First_Diff"],
Integration_Order=res["Integration"],
stringsAsFactors = FALSE)
}) %>% bind_rows()
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in pp.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
# ---- Print final table ----
print(stationarity_table)
## Variable Level_ADF_PP_KPSS First_Difference_ADF
## Level...1 Population Non-stationary Stationary
## Level...2 GNI_per_capita Non-stationary Stationary
## Level...3 Producer_Price_Index Non-stationary Stationary
## Level...4 Producer_Price_USD Non-stationary Stationary
## Level...5 Avg_Max_Temp Non-stationary Stationary
## Level...6 Annual_Precip Stationary Stationary
## Level...7 Cropland_area Non-stationary Stationary
## Level...8 Value_added_deflator Non-stationary Stationary
## Level...9 N2O_indirect Non-stationary Stationary
## Level...10 N2O_direct Non-stationary Stationary
## Level...11 GDP_deflator Non-stationary Stationary
## Level...12 NVWI Non-stationary Stationary
## Integration_Order
## Level...1 I(1)
## Level...2 I(1)
## Level...3 I(1)
## Level...4 I(1)
## Level...5 I(1)
## Level...6 I(0)
## Level...7 I(1)
## Level...8 I(1)
## Level...9 I(1)
## Level...10 I(1)
## Level...11 I(1)
## Level...12 I(1)
# for ADF / PP Test: if p-value<0.05=stationary
# for KPSS Test: if p-value<0.05=non-stationary
3. Decide on the modeling approach:Check for cointegration (long-run
relationship)
Use Johansen test if you have multiple I(1) variables.
Since you have ~30 observations and 11 I(1) variables, Johansen may
fail due to rank deficiency.
Reduce variables (drop highly correlated ones like
Producer_Price_Index vs Producer_Price_USD)
Or use Engle-Granger two-step (NVWI vs each regressor
separately).
library(urca)
library(dplyr)
# Separate variables by integration order
# ---- Identify I(1) variables ----
I1_vars <- stationarity_table$Variable[stationarity_table$Integration_Order == "I(1)"]
I1_vars <- setdiff(I1_vars, "NVWI") # Remove dependent variable
I1_vars
## [1] "Population" "GNI_per_capita" "Producer_Price_Index"
## [4] "Producer_Price_USD" "Avg_Max_Temp" "Cropland_area"
## [7] "Value_added_deflator" "N2O_indirect" "N2O_direct"
## [10] "GDP_deflator"
I0_vars <- stationarity_table$Variable[stationarity_table$Integration_Order=="I(0)"]
I0_vars
## [1] "Annual_Precip"
# ---- Function to run Engle-Granger test ----
eg_test <- function(dep, indep){
df <- data[, c(dep, indep)]
df <- na.omit(df)
# Phillips-Ouliaris (Pz) test
test <- ca.po(df, type="Pz", demean="trend")
# Return as data.frame
data.frame(
Variable = indep,
Test_statistic = round(test@teststat, 3),
Critical_5pct = round(test@cval[2], 3),
Cointegrated = ifelse(test@teststat > test@cval[2], "No", "Yes"),
stringsAsFactors = FALSE
)
}
# ---- Loop through I(1) variables ----
eg_results <- lapply(I1_vars, function(v){
eg_test("NVWI", v)
}) %>% bind_rows()
# ---- View results ----
print(eg_results)
## Variable Test_statistic Critical_5pct Cointegrated
## 1 Population 5.290 81.381 Yes
## 2 GNI_per_capita 12.146 81.381 Yes
## 3 Producer_Price_Index 11.421 81.381 Yes
## 4 Producer_Price_USD 12.620 81.381 Yes
## 5 Avg_Max_Temp 5.050 81.381 Yes
## 6 Cropland_area 4.725 81.381 Yes
## 7 Value_added_deflator 8.748 81.381 Yes
## 8 N2O_indirect 8.866 81.381 Yes
## 9 N2O_direct 8.866 81.381 Yes
## 10 GDP_deflator 7.970 81.381 Yes
# since co-integration exists, use Engle-Granger Error Correction Model (ECM) to capture long run and short run dynamics
4. Engle-Granger Error Correction Model (ECM)
# a. Long-run regression: regress NVWI on all cointegrated I(1) variables.
long_run_formula <- as.formula(
paste("NVWI ~", paste(eg_results$Variable[eg_results$Cointegrated=="Yes"], collapse=" + "))
)
long_run_model <- lm(long_run_formula, data=data)
summary(long_run_model)
##
## Call:
## lm(formula = long_run_formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -600.74 -233.00 45.42 210.52 762.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.329e+04 2.335e+04 1.854 0.0786 .
## Population -1.154e+02 1.581e+02 -0.730 0.4741
## GNI_per_capita 3.269e-02 7.548e-02 0.433 0.6696
## Producer_Price_Index 2.891e+01 3.503e+01 0.825 0.4189
## Producer_Price_USD -2.053e+01 1.677e+01 -1.224 0.2352
## Avg_Max_Temp 2.709e+02 1.146e+02 2.365 0.0283 *
## Cropland_area -3.381e+00 1.428e+00 -2.368 0.0281 *
## Value_added_deflator 1.542e+01 7.844e+00 1.966 0.0633 .
## N2O_indirect 9.223e+05 2.539e+06 0.363 0.7203
## N2O_direct -2.085e+05 5.712e+05 -0.365 0.7189
## GDP_deflator 3.682e+01 4.666e+01 0.789 0.4393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 350.9 on 20 degrees of freedom
## Multiple R-squared: 0.9184, Adjusted R-squared: 0.8776
## F-statistic: 22.51 on 10 and 20 DF, p-value: 9.643e-09
# b. Compute the error correction term (residuals from long-run regression):
data$ECT <- residuals(long_run_model)
# c.Short-run model: first-difference all I(1) variables, include ECT and I(0) variables in levels:
# First differences
data_diff <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI)))
# Build short-run formula
short_run_vars <- paste0("d_", eg_results$Variable[eg_results$Cointegrated=="Yes"])
short_run_formula <- as.formula(
paste("d_NVWI ~", paste(short_run_vars, collapse=" + "),
"+ ECT +", paste(I0_vars, collapse=" + "))
)
# Fit ECM
ecm_model <- lm(short_run_formula, data=data_diff)
summary(ecm_model)
##
## Call:
## lm(formula = short_run_formula, data = data_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -462.77 -99.91 -3.30 131.66 475.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.792e+01 5.044e+02 -0.194 0.84836
## d_Population -7.909e+01 2.108e+02 -0.375 0.71221
## d_GNI_per_capita 1.875e-01 8.080e-02 2.320 0.03303 *
## d_Producer_Price_Index 7.064e+01 3.527e+01 2.003 0.06140 .
## d_Producer_Price_USD -4.467e+01 1.782e+01 -2.507 0.02262 *
## d_Avg_Max_Temp 2.179e+02 6.778e+01 3.215 0.00508 **
## d_Cropland_area -3.785e+00 1.111e+00 -3.407 0.00336 **
## d_Value_added_deflator 2.267e+01 6.746e+00 3.360 0.00371 **
## d_N2O_indirect -1.445e+06 1.443e+06 -1.002 0.33057
## d_N2O_direct 3.239e+05 3.246e+05 0.998 0.33242
## d_GDP_deflator 3.674e+01 3.753e+01 0.979 0.34136
## ECT 1.478e+00 2.789e-01 5.301 5.87e-05 ***
## Annual_Precip -7.844e-02 6.117e-01 -0.128 0.89947
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 285.2 on 17 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.8021, Adjusted R-squared: 0.6625
## F-statistic: 5.743 on 12 and 17 DF, p-value: 0.0006396
5. check diagnostics
—- 5. Diagnostics for Short-Run (ECM) and Long-Run Models —-
library(lmtest) # dwtest, bptest
## Ładowanie wymaganego pakietu: zoo
##
## Dołączanie pakietu: 'zoo'
## Następujące obiekty zostały zakryte z 'package:base':
##
## as.Date, as.Date.numeric
library(car) # vif
## Ładowanie wymaganego pakietu: carData
##
## Dołączanie pakietu: 'car'
## Następujący obiekt został zakryty z 'package:dplyr':
##
## recode
library(urca) # ur.df
# Function to run diagnostics
run_diagnostics <- function(model, model_name = "Model") {
# Residuals
res <- residuals(model)
# Durbin-Watson Test (Autocorrelation)
dw <- dwtest(model)
dw_stat <- round(dw$statistic, 3)
dw_p <- round(dw$p.value, 3)
# Breusch-Pagan Test (Heteroskedasticity)
bp <- bptest(model)
bp_stat <- round(bp$statistic, 3)
bp_p <- round(bp$p.value, 3)
# Multicollinearity (VIF)
vif_vals <- vif(model)
high_vif <- names(vif_vals[vif_vals > 5])
vif_flag <- if(length(high_vif) == 0) "No severe multicollinearity" else paste("High VIF for:", paste(high_vif, collapse=", "))
# Residual stationarity (ADF)
adf <- ur.df(res, type = "drift", selectlags = "AIC")
resid_stationary <- ifelse(adf@teststat[1] < adf@cval[2, "5pct"], "Stationary", "Non-stationary")
# Compile diagnostics table
diagnostics <- data.frame(
Test = c("Durbin-Watson (Autocorr)",
"Breusch-Pagan (Heteroskedasticity)",
"Residual Stationarity",
"Multicollinearity"),
Result = c(paste0(dw_stat, ", p=", dw_p),
paste0(bp_stat, ", p=", bp_p),
resid_stationary,
vif_flag),
stringsAsFactors = FALSE
)
cat("\nDiagnostics for", model_name, ":\n")
print(diagnostics)
invisible(diagnostics)
}
# ---- Run diagnostics for Long-Run Model ----
diagnostics_long <- run_diagnostics(long_run_model, "Long-Run Model")
##
## Diagnostics for Long-Run Model :
## Test
## 1 Durbin-Watson (Autocorr)
## 2 Breusch-Pagan (Heteroskedasticity)
## 3 Residual Stationarity
## 4 Multicollinearity
## Result
## 1 2.231, p=0.32
## 2 9.8, p=0.458
## 3 Stationary
## 4 High VIF for: GNI_per_capita, Producer_Price_Index, Producer_Price_USD, Value_added_deflator, N2O_indirect, N2O_direct, GDP_deflator
# ---- Run diagnostics for Short-Run ECM ----
diagnostics_short <- run_diagnostics(ecm_model, "Short-Run ECM")
##
## Diagnostics for Short-Run ECM :
## Test
## 1 Durbin-Watson (Autocorr)
## 2 Breusch-Pagan (Heteroskedasticity)
## 3 Residual Stationarity
## 4 Multicollinearity
## Result
## 1 1.617, p=0.171
## 2 15.07, p=0.238
## 3 Stationary
## 4 High VIF for: d_GNI_per_capita, d_Producer_Price_Index, d_Producer_Price_USD, d_N2O_indirect, d_N2O_direct, d_GDP_deflator
#####################################
#check for variables with high VIF
# VIF diagnostics for short- and long-run models
library(car)
check_vif <- function(model, model_name="Model") {
vif_vals <- vif(model)
vif_table <- data.frame(
Variable = names(vif_vals),
VIF = round(vif_vals, 2),
HighVIF = ifelse(vif_vals > 10, "Yes", "No"),
stringsAsFactors = FALSE
)
cat("\nVIF Table for", model_name, ":\n")
print(vif_table)
invisible(vif_table)
}
# ---- Run for long-run model ----
vif_long <- check_vif(long_run_model, "Long-Run Model")
##
## VIF Table for Long-Run Model :
## Variable VIF HighVIF
## Population Population 4.83 No
## GNI_per_capita GNI_per_capita 129.64 Yes
## Producer_Price_Index Producer_Price_Index 118.32 Yes
## Producer_Price_USD Producer_Price_USD 179.17 Yes
## Avg_Max_Temp Avg_Max_Temp 2.39 No
## Cropland_area Cropland_area 4.95 No
## Value_added_deflator Value_added_deflator 8.37 No
## N2O_indirect N2O_indirect 991046.33 Yes
## N2O_direct N2O_direct 990610.04 Yes
## GDP_deflator GDP_deflator 137.01 Yes
# ---- Run for short-run ECM ----
vif_short <- check_vif(ecm_model, "Short-Run ECM")
##
## VIF Table for Short-Run ECM :
## Variable VIF HighVIF
## d_Population d_Population 1.38 No
## d_GNI_per_capita d_GNI_per_capita 21.62 Yes
## d_Producer_Price_Index d_Producer_Price_Index 116.25 Yes
## d_Producer_Price_USD d_Producer_Price_USD 184.51 Yes
## d_Avg_Max_Temp d_Avg_Max_Temp 1.31 No
## d_Cropland_area d_Cropland_area 1.72 No
## d_Value_added_deflator d_Value_added_deflator 3.96 No
## d_N2O_indirect d_N2O_indirect 209050.78 Yes
## d_N2O_direct d_N2O_direct 208929.35 Yes
## d_GDP_deflator d_GDP_deflator 29.50 Yes
## ECT ECT 2.22 No
## Annual_Precip Annual_Precip 1.36 No
6. REFINE YOUR MODEL (by removing variables)
—- Refined Long- and Short-Run Models —-
library(dplyr)
library(lmtest)
library(car)
library(urca)
# ---- 1. Refine Long-Run Model ----
# Keep only significant and low-VIF variables
long_vars_refined <- c("Avg_Max_Temp", "Cropland_area", "Value_added_deflator", "Producer_Price_USD")
long_run_formula_refined <- as.formula(paste("NVWI ~", paste(long_vars_refined, collapse = " + ")))
long_run_model_refined <- lm(long_run_formula_refined, data = data)
summary(long_run_model_refined)
##
## Call:
## lm(formula = long_run_formula_refined, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -810.94 -297.14 22.34 354.68 847.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39015.868 13689.653 2.850 0.008443 **
## Avg_Max_Temp 464.324 116.417 3.988 0.000482 ***
## Cropland_area -3.848 1.085 -3.546 0.001507 **
## Value_added_deflator 31.792 6.071 5.237 1.8e-05 ***
## Producer_Price_USD -2.642 2.522 -1.048 0.304342
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 461.7 on 26 degrees of freedom
## Multiple R-squared: 0.8163, Adjusted R-squared: 0.788
## F-statistic: 28.88 on 4 and 26 DF, p-value: 3.151e-09
long_vars_refined2 <- c("Population", "Annual_Precip", "GNI_per_capita", "Producer_Price_Index")
long_run_formula_refined2 <- as.formula(paste("NVWI ~", paste(long_vars_refined2, collapse = " + ")))
long_run_model_refined2 <- lm(long_run_formula_refined2, data = data)
summary(long_run_model_refined2)
##
## Call:
## lm(formula = long_run_formula_refined2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -825.85 -306.09 -55.82 267.85 1403.18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.993e+04 9.634e+03 -3.107 0.00453 **
## Population 3.515e+02 1.167e+02 3.012 0.00572 **
## Annual_Precip -2.139e-01 9.102e-01 -0.235 0.81607
## GNI_per_capita 7.208e-02 1.404e-02 5.133 2.37e-05 ***
## Producer_Price_Index 3.595e+00 6.540e+00 0.550 0.58728
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 507 on 26 degrees of freedom
## Multiple R-squared: 0.7785, Adjusted R-squared: 0.7444
## F-statistic: 22.84 on 4 and 26 DF, p-value: 3.437e-08
long_vars_refined3 <- c("N2O_indirect", "GDP_deflator")
long_run_formula_refined3 <- as.formula(paste("NVWI ~", paste(long_vars_refined3, collapse = " + ")))
long_run_model_refined3 <- lm(long_run_formula_refined3, data = data)
summary(long_run_model_refined3)
##
## Call:
## lm(formula = long_run_formula_refined3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1054.5 -544.9 -128.7 309.6 2353.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2541.28 831.40 -3.057 0.00488 **
## N2O_indirect -3842.94 7434.33 -0.517 0.60927
## GDP_deflator 47.65 11.62 4.101 0.00032 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 740.7 on 28 degrees of freedom
## Multiple R-squared: 0.4909, Adjusted R-squared: 0.4545
## F-statistic: 13.5 on 2 and 28 DF, p-value: 7.86e-05
# ---- 2. Compute ECT ----
data$ECT_refined <- residuals(long_run_model_refined)
data$ECT_refined2 <- residuals(long_run_model_refined2)
data$ECT_refined3 <- residuals(long_run_model_refined3)
# ---- 3. Prepare differenced data for Short-Run ECM ----
data_diff <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined = data$ECT_refined) # add ECT to differenced data
data_diff2 <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined2 = data$ECT_refined2) # add ECT to differenced data
data_diff3 <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined3 = data$ECT_refined3) # add ECT to differenced data
# Keep differenced variables with low VIF and significant
short_vars_refined <- c("d_Avg_Max_Temp", "d_Cropland_area", "d_Value_added_deflator", "d_Producer_Price_USD")
short_run_formula_refined <- as.formula(paste("d_NVWI ~", paste(short_vars_refined, collapse = " + "), "+ ECT_refined"))
short_vars_refined2 <- c("d_Population", "Annual_Precip", "d_GNI_per_capita", "d_Producer_Price_Index")
short_run_formula_refined2 <- as.formula(paste("d_NVWI ~", paste(short_vars_refined2, collapse = " + "), "+ ECT_refined2"))
short_vars_refined3 <- c("d_N2O_indirect", "d_GDP_deflator")
short_run_formula_refined3 <- as.formula(paste("d_NVWI ~", paste(short_vars_refined3, collapse = " + "), "+ ECT_refined3"))
# Fit Short-Run ECM
ecm_model_refined <- lm(short_run_formula_refined, data = data_diff)
summary(ecm_model_refined)
##
## Call:
## lm(formula = short_run_formula_refined, data = data_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -685.53 -244.03 -82.11 297.80 646.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.1998 72.4233 0.389 0.70043
## d_Avg_Max_Temp 323.3771 90.8614 3.559 0.00159 **
## d_Cropland_area -1.2063 1.3024 -0.926 0.36357
## d_Value_added_deflator 22.3862 7.5221 2.976 0.00657 **
## d_Producer_Price_USD -3.9966 2.6359 -1.516 0.14253
## ECT_refined 0.5157 0.2222 2.320 0.02914 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 392.5 on 24 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.4709, Adjusted R-squared: 0.3607
## F-statistic: 4.273 on 5 and 24 DF, p-value: 0.006395
ecm_model_refined2 <- lm(short_run_formula_refined2, data = data_diff2)
summary(ecm_model_refined2)
##
## Call:
## lm(formula = short_run_formula_refined2, data = data_diff2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1217.57 -151.31 -18.87 257.24 469.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 684.66537 622.03913 1.101 0.2820
## d_Population -329.21915 272.42461 -1.208 0.2386
## Annual_Precip -0.79891 0.75947 -1.052 0.3033
## d_GNI_per_capita 0.07783 0.03031 2.568 0.0169 *
## d_Producer_Price_Index -8.51679 5.74822 -1.482 0.1514
## ECT_refined2 0.67075 0.18088 3.708 0.0011 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 403.5 on 24 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.4408, Adjusted R-squared: 0.3244
## F-statistic: 3.784 on 5 and 24 DF, p-value: 0.01143
ecm_model_refined3 <- lm(short_run_formula_refined3, data = data_diff3)
summary(ecm_model_refined3)
##
## Call:
## lm(formula = short_run_formula_refined3, data = data_diff3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1188.75 -94.24 12.78 207.89 943.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.9468 91.1999 0.471 0.6416
## d_N2O_indirect 323.8399 5537.1477 0.058 0.9538
## d_GDP_deflator 11.8349 12.5106 0.946 0.3529
## ECT_refined3 0.2246 0.1288 1.744 0.0929 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 487.8 on 26 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.1145, Adjusted R-squared: 0.01228
## F-statistic: 1.12 on 3 and 26 DF, p-value: 0.359
# ---- 4. Optional: Quick Diagnostics Function ----
run_diagnostics <- function(model) {
res <- residuals(model)
dw <- dwtest(model); bp <- bptest(model)
vif_vals <- vif(model); high_vif <- names(vif_vals[vif_vals>5])
adf <- ur.df(res, type="drift", selectlags="AIC")
data.frame(
Test = c("Durbin-Watson", "Breusch-Pagan", "Residual Stationarity", "Multicollinearity"),
Result = c(
paste0(round(dw$statistic,3), ", p=", round(dw$p.value,3)),
paste0(round(bp$statistic,3), ", p=", round(bp$p.value,3)),
ifelse(adf@teststat[1]<adf@cval[2,"5pct"], "Stationary", "Non-stationary"),
if(length(high_vif)==0) "No severe multicollinearity" else paste("High VIF for:", paste(high_vif, collapse=", "))
),
stringsAsFactors = FALSE
)
}
# Run diagnostics
run_diagnostics(long_run_model_refined)
run_diagnostics(long_run_model_refined2)
run_diagnostics(long_run_model_refined3)
run_diagnostics(ecm_model_refined)
run_diagnostics(ecm_model_refined2)
run_diagnostics(ecm_model_refined3)
# Adjust for serial correlation of refine long run_diagnostics
library(dynlm)
library(sandwich)
library(lmtest)
nw_se2 <- NeweyWest(long_run_model_refined2, lag = 2, prewhite = FALSE)
coeftest(long_run_model_refined2, vcov = nw_se2)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.9932e+04 1.0757e+04 -2.7825 0.009911 **
## Population 3.5151e+02 1.2964e+02 2.7114 0.011714 *
## Annual_Precip -2.1388e-01 8.7763e-01 -0.2437 0.809380
## GNI_per_capita 7.2085e-02 1.5141e-02 4.7607 6.32e-05 ***
## Producer_Price_Index 3.5945e+00 7.0459e+00 0.5102 0.614243
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nw_se3 <- NeweyWest(long_run_model_refined3, lag = 2, prewhite = FALSE)
coeftest(long_run_model_refined3, vcov = nw_se3)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2541.279 770.535 -3.2981 0.002654 **
## N2O_indirect -3842.937 8474.504 -0.4535 0.653705
## GDP_deflator 47.646 15.023 3.1716 0.003659 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nw_se4 <- NeweyWest(ecm_model_refined3, lag = 2, prewhite = FALSE)
coeftest(ecm_model_refined3, vcov = nw_se4)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.94676 71.12845 0.6038 0.5512
## d_N2O_indirect 323.83992 5447.57394 0.0594 0.9531
## d_GDP_deflator 11.83494 10.82283 1.0935 0.2842
## ECT_refined3 0.22462 0.13469 1.6677 0.1074