1. Load the data
library(readxl)
data <- read_excel("Final_reg_data_for_wheat_DE.xlsx")
data
# Rename columns for convenience
colnames(data) <- c("Year","Population","Producer_Price_Index","Producer_Price_USD",
"Avg_Max_Temp","Annual_Precip","GNI_per_capita","Cropland_area",
"Value_added_deflator","N2O_indirect","N2O_direct","GDP_deflator","NVWI")
str(data)
## tibble [31 × 13] (S3: tbl_df/tbl/data.frame)
## $ Year : num [1:31] 1991 1992 1993 1994 1995 ...
## $ Population : num [1:31] 80.3 80.9 81.5 81.8 82 ...
## $ Producer_Price_Index: num [1:31] 96.2 105 88.4 84.5 80.6 ...
## $ Producer_Price_USD : num [1:31] 178 207 164 160 173 ...
## $ Avg_Max_Temp : num [1:31] 12.8 13.4 12.6 13.7 12.9 ...
## $ Annual_Precip : num [1:31] 655 814 878 856 841 ...
## $ GNI_per_capita : num [1:31] 23506 26657 25751 27255 31829 ...
## $ Cropland_area : num [1:31] 11807 11708 11911 12037 12061 ...
## $ Value_added_deflator: num [1:31] 58.7 62.7 64.5 94.3 114.8 ...
## $ N2O_indirect : num [1:31] 0.722 0.68 0.686 0.717 0.772 ...
## $ N2O_direct : num [1:31] 3.21 3.02 3.05 3.19 3.43 ...
## $ GDP_deflator : num [1:31] 75.5 84.5 82.9 86.1 99.2 ...
## $ NVWI : num [1:31] 1152 -208 991 404 617 ...
sum(is.na(data))
## [1] 0
summary(data)
## Year Population Producer_Price_Index Producer_Price_USD
## Min. :1991 Min. :80.30 Min. : 63.06 Min. : 95.1
## 1st Qu.:1998 1st Qu.:81.20 1st Qu.: 73.16 1st Qu.:137.9
## Median :2006 Median :81.93 Median : 89.17 Median :173.0
## Mean :2006 Mean :81.91 Mean : 93.87 Mean :179.3
## 3rd Qu.:2014 3rd Qu.:82.19 3rd Qu.:107.45 3rd Qu.:202.8
## Max. :2021 Max. :83.70 Max. :139.85 Max. :287.8
## Avg_Max_Temp Annual_Precip GNI_per_capita Cropland_area
## Min. :10.97 Min. :564.2 Min. :23506 Min. :11708
## 1st Qu.:12.79 1st Qu.:743.9 1st Qu.:27183 1st Qu.:11967
## Median :13.20 Median :796.5 Median :37481 Median :12038
## Mean :13.26 Mean :797.2 Mean :37047 Mean :12011
## 3rd Qu.:13.71 3rd Qu.:866.8 3rd Qu.:46049 3rd Qu.:12074
## Max. :15.12 Max. :987.0 Max. :53272 Max. :12145
## Value_added_deflator N2O_indirect N2O_direct GDP_deflator
## Min. : 58.67 Min. :0.6800 Min. :3.022 Min. : 67.07
## 1st Qu.: 85.71 1st Qu.:0.8541 1st Qu.:3.796 1st Qu.: 83.69
## Median :100.00 Median :0.9582 Median :4.258 Median : 99.23
## Mean :102.05 Mean :0.9406 Mean :4.180 Mean : 97.53
## 3rd Qu.:123.31 3rd Qu.:1.0448 3rd Qu.:4.643 3rd Qu.:110.75
## Max. :142.05 Max. :1.1962 Max. :5.316 Max. :119.06
## NVWI
## Min. :-298.2
## 1st Qu.: 618.0
## Median :1302.1
## Mean :2205.9
## 3rd Qu.:4143.0
## Max. :5426.4
2. Check for stationarity
library(urca)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
##
## Dołączanie pakietu: 'dplyr'
## Następujące obiekty zostały zakryte z 'package:stats':
##
## filter, lag
## Następujące obiekty zostały zakryte z 'package:base':
##
## intersect, setdiff, setequal, union
# ---- Variables ----
vars <- c("Population","GNI_per_capita","Producer_Price_Index","Producer_Price_USD",
"Avg_Max_Temp","Annual_Precip","Cropland_area","Value_added_deflator",
"N2O_indirect","N2O_direct","GDP_deflator","NVWI")
# ---- Function to test stationarity ----
check_stationarity <- function(x){
x <- na.omit(x)
max_lag <- floor(4*(length(x)/100)^(1/4))
adf <- ur.df(x, type="trend", lags=max_lag, selectlags="AIC")
adf_diff <- ur.df(diff(x), type="drift", lags=max_lag, selectlags="AIC")
pp <- pp.test(x)
kpss <- kpss.test(x)
level_stat <- ifelse(adf@teststat[1] < adf@cval[2, "5pct"] & pp$p.value < 0.05 & kpss$p.value > 0.05,
"Stationary", "Non-stationary")
diff_stat <- ifelse(adf_diff@teststat[1] < adf_diff@cval[2, "5pct"], "Stationary", "Non-stationary")
integ <- ifelse(level_stat=="Stationary", "I(0)",
ifelse(diff_stat=="Stationary","I(1)","I(?)"))
return(c(Level=level_stat, First_Diff=diff_stat, Integration=integ))
}
# ---- Build stationarity table with automatic log-transform ----
stationarity_table <- lapply(vars, function(v){
x <- data[[v]]
res <- check_stationarity(x)
# If I(?), apply log transform and retest
if(res["Integration"]=="I(?)" & all(x>0)){
x <- log(x)
res <- check_stationarity(x)
}
data.frame(Variable=v,
Level_ADF_PP_KPSS=res["Level"],
First_Difference_ADF=res["First_Diff"],
Integration_Order=res["Integration"],
stringsAsFactors = FALSE)
}) %>% bind_rows()
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in pp.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
# ---- Print final table ----
print(stationarity_table)
## Variable Level_ADF_PP_KPSS First_Difference_ADF
## Level...1 Population Non-stationary Stationary
## Level...2 GNI_per_capita Non-stationary Stationary
## Level...3 Producer_Price_Index Non-stationary Stationary
## Level...4 Producer_Price_USD Non-stationary Stationary
## Level...5 Avg_Max_Temp Non-stationary Stationary
## Level...6 Annual_Precip Stationary Stationary
## Level...7 Cropland_area Non-stationary Stationary
## Level...8 Value_added_deflator Non-stationary Stationary
## Level...9 N2O_indirect Non-stationary Stationary
## Level...10 N2O_direct Non-stationary Stationary
## Level...11 GDP_deflator Non-stationary Stationary
## Level...12 NVWI Non-stationary Stationary
## Integration_Order
## Level...1 I(1)
## Level...2 I(1)
## Level...3 I(1)
## Level...4 I(1)
## Level...5 I(1)
## Level...6 I(0)
## Level...7 I(1)
## Level...8 I(1)
## Level...9 I(1)
## Level...10 I(1)
## Level...11 I(1)
## Level...12 I(1)
# Only annual prep is I(0)-stationary at basic level
# for ADF / PP Test: if p-value<0.05=stationary
# for KPSS Test: if p-value<0.05=non-stationary
3. Decide on the modeling approach:Check for cointegration (long-run
relationship)
Use Johansen test if you have multiple I(1) variables.
Since you have ~30 observations and 11 I(1) variables, Johansen may
fail due to rank deficiency.
Reduce variables (drop highly correlated ones like
Producer_Price_Index vs Producer_Price_USD)
Or use Engle-Granger two-step (NVWI vs each regressor
separately).
library(urca)
library(dplyr)
# Separate variables by integration order
# ---- Identify I(1) variables ----
I1_vars <- stationarity_table$Variable[stationarity_table$Integration_Order == "I(1)"]
I1_vars <- setdiff(I1_vars, "NVWI") # Remove dependent variable
I1_vars
## [1] "Population" "GNI_per_capita" "Producer_Price_Index"
## [4] "Producer_Price_USD" "Avg_Max_Temp" "Cropland_area"
## [7] "Value_added_deflator" "N2O_indirect" "N2O_direct"
## [10] "GDP_deflator"
I0_vars <- stationarity_table$Variable[stationarity_table$Integration_Order=="I(0)"]
I0_vars
## [1] "Annual_Precip"
# ---- Function to run Engle-Granger test ----
eg_test <- function(dep, indep){
df <- data[, c(dep, indep)]
df <- na.omit(df)
# Phillips-Ouliaris (Pz) test
test <- ca.po(df, type="Pz", demean="trend")
# Return as data.frame
data.frame(
Variable = indep,
Test_statistic = round(test@teststat, 3),
Critical_5pct = round(test@cval[2], 3),
Cointegrated = ifelse(test@teststat > test@cval[2], "No", "Yes"),
stringsAsFactors = FALSE
)
}
# ---- Loop through I(1) variables ----
eg_results <- lapply(I1_vars, function(v){
eg_test("NVWI", v)
}) %>% bind_rows()
# ---- View results ----
print(eg_results)
## Variable Test_statistic Critical_5pct Cointegrated
## 1 Population 3.110 81.381 Yes
## 2 GNI_per_capita 6.290 81.381 Yes
## 3 Producer_Price_Index 7.404 81.381 Yes
## 4 Producer_Price_USD 8.533 81.381 Yes
## 5 Avg_Max_Temp 4.947 81.381 Yes
## 6 Cropland_area 4.026 81.381 Yes
## 7 Value_added_deflator 6.408 81.381 Yes
## 8 N2O_indirect 4.968 81.381 Yes
## 9 N2O_direct 4.968 81.381 Yes
## 10 GDP_deflator 4.415 81.381 Yes
# since co-integration exists, use Engle-Granger Error Correction Model (ECM) to capture long run and short run dynamics
4. Engle-Granger Error Correction Model (ECM)
# a. Long-run regression: regress NVWI on all cointegrated I(1) variables.
long_run_formula <- as.formula(
paste("NVWI ~", paste(eg_results$Variable[eg_results$Cointegrated=="Yes"], collapse=" + "))
)
long_run_model <- lm(long_run_formula, data=data)
summary(long_run_model)
##
## Call:
## lm(formula = long_run_formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -873.81 -553.78 -20.35 299.84 1665.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.107e+05 5.083e+04 2.178 0.04157 *
## Population -1.216e+03 3.657e+02 -3.326 0.00337 **
## GNI_per_capita 1.500e-01 1.627e-01 0.922 0.36768
## Producer_Price_Index 1.618e+02 6.936e+01 2.333 0.03018 *
## Producer_Price_USD -8.836e+01 3.351e+01 -2.637 0.01581 *
## Avg_Max_Temp -2.252e+02 2.493e+02 -0.903 0.37705
## Cropland_area -1.617e+00 3.296e+00 -0.490 0.62916
## Value_added_deflator 1.706e+01 1.766e+01 0.966 0.34555
## N2O_indirect 7.607e+04 5.987e+06 0.013 0.98999
## N2O_direct -1.745e+04 1.347e+06 -0.013 0.98979
## GDP_deflator 8.481e+01 9.424e+01 0.900 0.37888
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 791.7 on 20 degrees of freedom
## Multiple R-squared: 0.8836, Adjusted R-squared: 0.8254
## F-statistic: 15.19 on 10 and 20 DF, p-value: 2.917e-07
# b. Compute the error correction term (residuals from long-run regression):
data$ECT <- residuals(long_run_model)
# c.Short-run model: first-difference all I(1) variables, include ECT and I(0) variables in levels:
# First differences
data_diff <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI)))
# Build short-run formula
short_run_vars <- paste0("d_", eg_results$Variable[eg_results$Cointegrated=="Yes"])
short_run_formula <- as.formula(
paste("d_NVWI ~", paste(short_run_vars, collapse=" + "),
"+ ECT +", paste(I0_vars, collapse=" + "))
)
# Fit ECM
ecm_model <- lm(short_run_formula, data=data_diff)
summary(ecm_model)
##
## Call:
## lm(formula = short_run_formula, data = data_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -979.5 -292.4 111.8 302.9 775.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.452e+02 1.139e+03 0.215 0.832110
## d_Population -1.847e+03 4.745e+02 -3.892 0.001171 **
## d_GNI_per_capita 1.582e-01 1.535e-01 1.031 0.317117
## d_Producer_Price_Index -5.349e+00 6.942e+01 -0.077 0.939481
## d_Producer_Price_USD -3.369e+00 3.389e+01 -0.099 0.921974
## d_Avg_Max_Temp -2.484e+02 1.484e+02 -1.673 0.112548
## d_Cropland_area 2.653e+00 2.398e+00 1.106 0.283948
## d_Value_added_deflator -1.570e+01 1.281e+01 -1.226 0.237022
## d_N2O_indirect 9.551e+04 2.910e+06 0.033 0.974202
## d_N2O_direct -2.213e+04 6.548e+05 -0.034 0.973439
## d_GDP_deflator -1.327e+01 7.290e+01 -0.182 0.857739
## ECT 1.042e+00 2.532e-01 4.114 0.000725 ***
## Annual_Precip 1.113e-02 1.387e+00 0.008 0.993689
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 575.2 on 17 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.7304, Adjusted R-squared: 0.5402
## F-statistic: 3.839 on 12 and 17 DF, p-value: 0.005943
5. check diagnostics
—- 5. Diagnostics for Short-Run (ECM) and Long-Run Models —-
library(lmtest) # dwtest, bptest
## Ładowanie wymaganego pakietu: zoo
##
## Dołączanie pakietu: 'zoo'
## Następujące obiekty zostały zakryte z 'package:base':
##
## as.Date, as.Date.numeric
library(car) # vif
## Ładowanie wymaganego pakietu: carData
##
## Dołączanie pakietu: 'car'
## Następujący obiekt został zakryty z 'package:dplyr':
##
## recode
library(urca) # ur.df
# Function to run diagnostics
run_diagnostics <- function(model, model_name = "Model") {
# Residuals
res <- residuals(model)
# Durbin-Watson Test (Autocorrelation)
dw <- dwtest(model)
dw_stat <- round(dw$statistic, 3)
dw_p <- round(dw$p.value, 3)
# Breusch-Pagan Test (Heteroskedasticity)
bp <- bptest(model)
bp_stat <- round(bp$statistic, 3)
bp_p <- round(bp$p.value, 3)
# Multicollinearity (VIF)
vif_vals <- vif(model)
high_vif <- names(vif_vals[vif_vals > 5])
vif_flag <- if(length(high_vif) == 0) "No severe multicollinearity" else paste("High VIF for:", paste(high_vif, collapse=", "))
# Residual stationarity (ADF)
adf <- ur.df(res, type = "drift", selectlags = "AIC")
resid_stationary <- ifelse(adf@teststat[1] < adf@cval[2, "5pct"], "Stationary", "Non-stationary")
# Compile diagnostics table
diagnostics <- data.frame(
Test = c("Durbin-Watson (Autocorr)",
"Breusch-Pagan (Heteroskedasticity)",
"Residual Stationarity",
"Multicollinearity"),
Result = c(paste0(dw_stat, ", p=", dw_p),
paste0(bp_stat, ", p=", bp_p),
resid_stationary,
vif_flag),
stringsAsFactors = FALSE
)
cat("\nDiagnostics for", model_name, ":\n")
print(diagnostics)
invisible(diagnostics)
}
# ---- Run diagnostics for Long-Run Model ----
diagnostics_long <- run_diagnostics(long_run_model, "Long-Run Model")
##
## Diagnostics for Long-Run Model :
## Test
## 1 Durbin-Watson (Autocorr)
## 2 Breusch-Pagan (Heteroskedasticity)
## 3 Residual Stationarity
## 4 Multicollinearity
## Result
## 1 1.893, p=0.073
## 2 11.003, p=0.357
## 3 Stationary
## 4 High VIF for: Population, GNI_per_capita, Producer_Price_Index, Producer_Price_USD, Cropland_area, Value_added_deflator, N2O_indirect, N2O_direct, GDP_deflator
# ---- Run diagnostics for Short-Run ECM ----
diagnostics_short <- run_diagnostics(ecm_model, "Short-Run ECM")
##
## Diagnostics for Short-Run ECM :
## Test
## 1 Durbin-Watson (Autocorr)
## 2 Breusch-Pagan (Heteroskedasticity)
## 3 Residual Stationarity
## 4 Multicollinearity
## Result
## 1 1.876, p=0.402
## 2 5.888, p=0.922
## 3 Stationary
## 4 High VIF for: d_GNI_per_capita, d_Producer_Price_Index, d_Producer_Price_USD, d_N2O_indirect, d_N2O_direct, d_GDP_deflator
# check for variables with high VIF
# VIF diagnostics for short- and long-run models
library(car)
check_vif <- function(model, model_name="Model") {
vif_vals <- vif(model)
vif_table <- data.frame(
Variable = names(vif_vals),
VIF = round(vif_vals, 2),
HighVIF = ifelse(vif_vals > 10, "Yes", "No"),
stringsAsFactors = FALSE
)
cat("\nVIF Table for", model_name, ":\n")
print(vif_table)
invisible(vif_table)
}
# ---- Run for long-run model ----
vif_long <- check_vif(long_run_model, "Long-Run Model")
##
## VIF Table for Long-Run Model :
## Variable VIF HighVIF
## Population Population 5.07 No
## GNI_per_capita GNI_per_capita 118.35 Yes
## Producer_Price_Index Producer_Price_Index 115.07 Yes
## Producer_Price_USD Producer_Price_USD 161.39 Yes
## Avg_Max_Temp Avg_Max_Temp 2.22 No
## Cropland_area Cropland_area 5.18 No
## Value_added_deflator Value_added_deflator 8.33 No
## N2O_indirect N2O_indirect 32508562.39 Yes
## N2O_direct N2O_direct 32507464.72 Yes
## GDP_deflator GDP_deflator 109.78 Yes
# ---- Run for short-run ECM ----
vif_short <- check_vif(ecm_model, "Short-Run ECM")
##
## VIF Table for Short-Run ECM :
## Variable VIF HighVIF
## d_Population d_Population 1.72 No
## d_GNI_per_capita d_GNI_per_capita 19.17 Yes
## d_Producer_Price_Index d_Producer_Price_Index 125.17 Yes
## d_Producer_Price_USD d_Producer_Price_USD 178.81 Yes
## d_Avg_Max_Temp d_Avg_Max_Temp 1.55 No
## d_Cropland_area d_Cropland_area 1.97 No
## d_Value_added_deflator d_Value_added_deflator 3.51 No
## d_N2O_indirect d_N2O_indirect 6300143.34 Yes
## d_N2O_direct d_N2O_direct 6300976.91 Yes
## d_GDP_deflator d_GDP_deflator 27.36 Yes
## ECT ECT 2.40 No
## Annual_Precip Annual_Precip 1.72 No
6. REFINE YOUR MODEL (by removing variables)
—- Refined Long- and Short-Run Models —-
library(dplyr)
library(lmtest)
library(car)
library(urca)
# ---- 1. Refine Long-Run Model ----
# Keep only significant and low-VIF variables
long_vars_refined <- c("Avg_Max_Temp", "Cropland_area","Value_added_deflator","N2O_indirect")
long_run_formula_refined <- as.formula(paste("NVWI ~", paste(long_vars_refined, collapse = " + ")))
long_run_model_refined <- lm(long_run_formula_refined, data = data)
summary(long_run_model_refined)
##
## Call:
## lm(formula = long_run_formula_refined, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1654.3 -557.0 -180.3 422.5 2092.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91001.515 30281.460 3.005 0.005812 **
## Avg_Max_Temp 86.970 257.668 0.338 0.738428
## Cropland_area -8.424 2.467 -3.415 0.002102 **
## Value_added_deflator 44.410 9.739 4.560 0.000107 ***
## N2O_indirect 7126.639 1884.752 3.781 0.000825 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1050 on 26 degrees of freedom
## Multiple R-squared: 0.7337, Adjusted R-squared: 0.6927
## F-statistic: 17.91 on 4 and 26 DF, p-value: 3.567e-07
long_vars_refined2 <- c("Population", "Annual_Precip", "GNI_per_capita", "Producer_Price_Index")
long_run_formula_refined2 <- as.formula(paste("NVWI ~", paste(long_vars_refined2, collapse = " + ")))
long_run_model_refined2 <- lm(long_run_formula_refined2, data = data)
summary(long_run_model_refined2)
##
## Call:
## lm(formula = long_run_formula_refined2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1948.86 -587.64 -47.73 582.29 1975.04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2525.8512 17506.3387 0.144 0.886
## Population -64.4408 213.4726 -0.302 0.765
## Annual_Precip -1.9587 1.6747 -1.170 0.253
## GNI_per_capita 0.1636 0.0280 5.845 3.68e-06 ***
## Producer_Price_Index 4.8743 11.5192 0.423 0.676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 933.5 on 26 degrees of freedom
## Multiple R-squared: 0.7897, Adjusted R-squared: 0.7573
## F-statistic: 24.4 on 4 and 26 DF, p-value: 1.776e-08
long_vars_refined3 <- c("N2O_direct", "GDP_deflator")
long_run_formula_refined3 <- as.formula(paste("NVWI ~", paste(long_vars_refined3, collapse = " + ")))
long_run_model_refined3 <- lm(long_run_formula_refined3, data = data)
summary(long_run_model_refined3)
##
## Call:
## lm(formula = long_run_formula_refined3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2427.25 -747.87 -17.64 745.76 2337.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8281.13 1587.95 -5.215 1.54e-05 ***
## N2O_direct 724.76 419.90 1.726 0.0954 .
## GDP_deflator 76.46 15.99 4.783 5.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1171 on 28 degrees of freedom
## Multiple R-squared: 0.6436, Adjusted R-squared: 0.6181
## F-statistic: 25.28 on 2 and 28 DF, p-value: 5.344e-07
# ---- 2. Compute ECT ----
data$ECT_refined <- residuals(long_run_model_refined)
data$ECT_refined2 <- residuals(long_run_model_refined2)
data$ECT_refined3 <- residuals(long_run_model_refined3)
# ---- 3. Prepare differenced data for Short-Run ECM ----
data_diff <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined = data$ECT_refined) # add ECT to differenced data
data_diff2 <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined2 = data$ECT_refined2) # add ECT to differenced data
data_diff3 <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined3 = data$ECT_refined3) # add ECT to differenced data
# Keep differenced variables with low VIF and significant
short_vars_refined <- c("d_Avg_Max_Temp", "d_Cropland_area","d_Value_added_deflator","d_N2O_indirect")
short_run_formula_refined <- as.formula(paste("d_NVWI ~", paste(short_vars_refined, collapse = " + "), "+ ECT_refined"))
short_vars_refined2 <- c("d_Population", "Annual_Precip", "d_GNI_per_capita", "d_Producer_Price_Index")
short_run_formula_refined2 <- as.formula(paste("d_NVWI ~", paste(short_vars_refined2, collapse = " + "), "+ ECT_refined2"))
short_vars_refined3 <- c("d_N2O_indirect", "d_GDP_deflator")
short_run_formula_refined3 <- as.formula(paste("d_NVWI ~", paste(short_vars_refined3, collapse = " + "), "+ ECT_refined3"))
# Fit Short-Run ECM
ecm_model_refined <- lm(short_run_formula_refined, data = data_diff)
summary(ecm_model_refined)
##
## Call:
## lm(formula = short_run_formula_refined, data = data_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1919.73 -280.81 -41.76 510.54 1106.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 113.5179 133.7251 0.849 0.40433
## d_Avg_Max_Temp -57.4317 156.6600 -0.367 0.71713
## d_Cropland_area 2.6280 2.6707 0.984 0.33493
## d_Value_added_deflator -0.4490 9.3545 -0.048 0.96212
## d_N2O_indirect 307.2606 2093.4125 0.147 0.88454
## ECT_refined 0.5103 0.1761 2.897 0.00791 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 722.9 on 24 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.3989, Adjusted R-squared: 0.2737
## F-statistic: 3.186 on 5 and 24 DF, p-value: 0.02404
ecm_model_refined2 <- lm(short_run_formula_refined2, data = data_diff2)
summary(ecm_model_refined2)
##
## Call:
## lm(formula = short_run_formula_refined2, data = data_diff2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1390.46 -394.11 27.93 445.06 1435.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.477e+03 1.067e+03 2.321 0.02907 *
## d_Population -1.629e+03 5.256e+02 -3.098 0.00491 **
## Annual_Precip -2.841e+00 1.300e+00 -2.185 0.03887 *
## d_GNI_per_capita 1.335e-01 5.262e-02 2.537 0.01809 *
## d_Producer_Price_Index -1.622e+01 8.767e+00 -1.850 0.07667 .
## ECT_refined2 6.709e-01 1.902e-01 3.527 0.00173 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 687.4 on 24 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.4565, Adjusted R-squared: 0.3432
## F-statistic: 4.031 on 5 and 24 DF, p-value: 0.008498
ecm_model_refined3 <- lm(short_run_formula_refined3, data = data_diff3)
summary(ecm_model_refined3)
##
## Call:
## lm(formula = short_run_formula_refined3, data = data_diff3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1806.72 -533.81 84.04 337.64 1859.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 82.0986 160.5733 0.511 0.613
## d_N2O_indirect -746.5113 1988.9985 -0.375 0.710
## d_GDP_deflator 24.5903 22.1707 1.109 0.278
## ECT_refined3 0.1433 0.1719 0.834 0.412
##
## Residual standard error: 859.9 on 26 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.07864, Adjusted R-squared: -0.02767
## F-statistic: 0.7397 on 3 and 26 DF, p-value: 0.538
# ---- 4. Optional: Quick Diagnostics Function ----
run_diagnostics <- function(model) {
res <- residuals(model)
dw <- dwtest(model); bp <- bptest(model)
vif_vals <- vif(model); high_vif <- names(vif_vals[vif_vals>5])
adf <- ur.df(res, type="drift", selectlags="AIC")
data.frame(
Test = c("Durbin-Watson", "Breusch-Pagan", "Residual Stationarity", "Multicollinearity"),
Result = c(
paste0(round(dw$statistic,3), ", p=", round(dw$p.value,3)),
paste0(round(bp$statistic,3), ", p=", round(bp$p.value,3)),
ifelse(adf@teststat[1]<adf@cval[2,"5pct"], "Stationary", "Non-stationary"),
if(length(high_vif)==0) "No severe multicollinearity" else paste("High VIF for:", paste(high_vif, collapse=", "))
),
stringsAsFactors = FALSE
)
}
# Run diagnostics
run_diagnostics(long_run_model_refined)
run_diagnostics(long_run_model_refined2)
run_diagnostics(long_run_model_refined3)
run_diagnostics(ecm_model_refined)
run_diagnostics(ecm_model_refined2)
run_diagnostics(ecm_model_refined3)
7. Adjust for serial correlation of refine long run_diagnostics
library(dynlm)
library(sandwich)
library(lmtest)
nw_se2 <- NeweyWest(long_run_model_refined2, lag = 2, prewhite = FALSE)
coeftest(long_run_model_refined2, vcov = nw_se2)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5259e+03 1.4498e+04 0.1742 0.8630
## Population -6.4441e+01 1.7762e+02 -0.3628 0.7197
## Annual_Precip -1.9587e+00 1.2380e+00 -1.5823 0.1257
## GNI_per_capita 1.6363e-01 2.5518e-02 6.4125 8.569e-07 ***
## Producer_Price_Index 4.8743e+00 1.0677e+01 0.4565 0.6518
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nw_se3 <- NeweyWest(long_run_model_refined3, lag = 2, prewhite = FALSE)
coeftest(long_run_model_refined3, vcov = nw_se3)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8281.132 1480.524 -5.5934 5.496e-06 ***
## N2O_direct 724.764 450.999 1.6070 0.1193
## GDP_deflator 76.464 14.460 5.2878 1.263e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1