1. Load the data
library(readxl)
data <- read_excel("Final_reg_data_for_potatoes_DE.xlsx")
data
# Rename columns for convenience
colnames(data) <- c("Year","Population","Producer_Price_Index","Producer_Price_USD",
"Avg_Max_Temp","Annual_Precip","GNI_per_capita","Cropland_area",
"Value_added_deflator","N2O_indirect","N2O_direct","GDP_deflator","NVWI")
str(data)
## tibble [31 × 13] (S3: tbl_df/tbl/data.frame)
## $ Year : num [1:31] 1991 1992 1993 1994 1995 ...
## $ Population : num [1:31] 80.3 80.9 81.5 81.8 82 ...
## $ Producer_Price_Index: num [1:31] 87.1 75.9 29.2 69.3 123 ...
## $ Producer_Price_USD : num [1:31] 165.1 153 55.6 134.3 270 ...
## $ Avg_Max_Temp : num [1:31] 12.8 13.4 12.6 13.7 12.9 ...
## $ Annual_Precip : num [1:31] 655 814 878 856 841 ...
## $ GNI_per_capita : num [1:31] 23506 26657 25751 27255 31829 ...
## $ Cropland_area : num [1:31] 11807 11708 11911 12037 12061 ...
## $ Value_added_deflator: num [1:31] 58.7 62.7 64.5 94.3 114.8 ...
## $ N2O_indirect : num [1:31] 0.062 0.066 0.0672 0.0565 0.059 0.0719 0.0644 0.0627 0.0644 0.0699 ...
## $ N2O_direct : num [1:31] 0.276 0.293 0.298 0.251 0.262 ...
## $ GDP_deflator : num [1:31] 75.5 84.5 82.9 86.1 99.2 ...
## $ NVWI : num [1:31] 139 159.6 120.7 88.1 143.1 ...
sum(is.na(data))
## [1] 0
summary(data)
## Year Population Producer_Price_Index Producer_Price_USD
## Min. :1991 Min. :80.30 Min. : 29.23 Min. : 55.6
## 1st Qu.:1998 1st Qu.:81.20 1st Qu.: 60.56 1st Qu.:114.2
## Median :2006 Median :81.93 Median : 80.91 Median :165.1
## Mean :2006 Mean :81.91 Mean : 86.94 Mean :170.8
## 3rd Qu.:2014 3rd Qu.:82.19 3rd Qu.:104.62 3rd Qu.:212.1
## Max. :2021 Max. :83.70 Max. :176.25 Max. :328.0
## Avg_Max_Temp Annual_Precip GNI_per_capita Cropland_area
## Min. :10.97 Min. :564.2 Min. :23506 Min. :11708
## 1st Qu.:12.79 1st Qu.:743.9 1st Qu.:27183 1st Qu.:11967
## Median :13.20 Median :796.5 Median :37481 Median :12038
## Mean :13.26 Mean :797.2 Mean :37047 Mean :12011
## 3rd Qu.:13.71 3rd Qu.:866.8 3rd Qu.:46049 3rd Qu.:12074
## Max. :15.12 Max. :987.0 Max. :53272 Max. :12145
## Value_added_deflator N2O_indirect N2O_direct GDP_deflator
## Min. : 58.67 Min. :0.05070 Min. :0.2256 Min. : 67.07
## 1st Qu.: 85.71 1st Qu.:0.05675 1st Qu.:0.2524 1st Qu.: 83.69
## Median :100.00 Median :0.06100 Median :0.2709 Median : 99.23
## Mean :102.05 Mean :0.06075 Mean :0.2700 Mean : 97.53
## 3rd Qu.:123.31 3rd Qu.:0.06250 3rd Qu.:0.2779 3rd Qu.:110.75
## Max. :142.05 Max. :0.07190 Max. :0.3197 Max. :119.06
## NVWI
## Min. : -2.589
## 1st Qu.: 20.372
## Median : 31.830
## Mean : 52.180
## 3rd Qu.: 82.458
## Max. :159.640
2. Check for stationarity
library(urca)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
##
## Dołączanie pakietu: 'dplyr'
## Następujące obiekty zostały zakryte z 'package:stats':
##
## filter, lag
## Następujące obiekty zostały zakryte z 'package:base':
##
## intersect, setdiff, setequal, union
# ---- Variables ----
vars <- c("Population","GNI_per_capita","Producer_Price_Index","Producer_Price_USD",
"Avg_Max_Temp","Annual_Precip","Cropland_area","Value_added_deflator",
"N2O_indirect","N2O_direct","GDP_deflator","NVWI")
# ---- Function to test stationarity ----
check_stationarity <- function(x){
x <- na.omit(x)
max_lag <- floor(4*(length(x)/100)^(1/4))
adf <- ur.df(x, type="trend", lags=max_lag, selectlags="AIC")
adf_diff <- ur.df(diff(x), type="drift", lags=max_lag, selectlags="AIC")
pp <- pp.test(x)
kpss <- kpss.test(x)
level_stat <- ifelse(adf@teststat[1] < adf@cval[2, "5pct"] & pp$p.value < 0.05 & kpss$p.value > 0.05,
"Stationary", "Non-stationary")
diff_stat <- ifelse(adf_diff@teststat[1] < adf_diff@cval[2, "5pct"], "Stationary", "Non-stationary")
integ <- ifelse(level_stat=="Stationary", "I(0)",
ifelse(diff_stat=="Stationary","I(1)","I(?)"))
return(c(Level=level_stat, First_Diff=diff_stat, Integration=integ))
}
# ---- Build stationarity table with automatic log-transform ----
stationarity_table <- lapply(vars, function(v){
x <- data[[v]]
res <- check_stationarity(x)
# If I(?), apply log transform and retest
if(res["Integration"]=="I(?)" & all(x>0)){
x <- log(x)
res <- check_stationarity(x)
}
data.frame(Variable=v,
Level_ADF_PP_KPSS=res["Level"],
First_Difference_ADF=res["First_Diff"],
Integration_Order=res["Integration"],
stringsAsFactors = FALSE)
}) %>% bind_rows()
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in pp.test(x): p-value smaller than printed p-value
## Warning in pp.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value greater than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in pp.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in pp.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
## Warning in kpss.test(x): p-value smaller than printed p-value
# ---- Print final table ----
print(stationarity_table)
## Variable Level_ADF_PP_KPSS First_Difference_ADF
## Level...1 Population Non-stationary Stationary
## Level...2 GNI_per_capita Non-stationary Stationary
## Level...3 Producer_Price_Index Non-stationary Stationary
## Level...4 Producer_Price_USD Non-stationary Stationary
## Level...5 Avg_Max_Temp Non-stationary Stationary
## Level...6 Annual_Precip Stationary Stationary
## Level...7 Cropland_area Non-stationary Stationary
## Level...8 Value_added_deflator Non-stationary Stationary
## Level...9 N2O_indirect Non-stationary Stationary
## Level...10 N2O_direct Non-stationary Stationary
## Level...11 GDP_deflator Non-stationary Stationary
## Level...12 NVWI Non-stationary Stationary
## Integration_Order
## Level...1 I(1)
## Level...2 I(1)
## Level...3 I(1)
## Level...4 I(1)
## Level...5 I(1)
## Level...6 I(0)
## Level...7 I(1)
## Level...8 I(1)
## Level...9 I(1)
## Level...10 I(1)
## Level...11 I(1)
## Level...12 I(1)
# Only annual prep is I(0)-stationary at basic level
# for ADF / PP Test: if p-value<0.05=stationary
# for KPSS Test: if p-value<0.05=non-stationary
3. Decide on the modeling approach:Check for cointegration (long-run
relationship)
Use Johansen test if you have multiple I(1) variables.
Since you have ~30 observations and 11 I(1) variables, Johansen may
fail due to rank deficiency.
Reduce variables (drop highly correlated ones like
Producer_Price_Index vs Producer_Price_USD)
Or use Engle-Granger two-step (NVWI vs each regressor
separately).
library(urca)
library(dplyr)
# Separate variables by integration order
# ---- Identify I(1) variables ----
I1_vars <- stationarity_table$Variable[stationarity_table$Integration_Order == "I(1)"]
I1_vars <- setdiff(I1_vars, "NVWI") # Remove dependent variable
I1_vars
## [1] "Population" "GNI_per_capita" "Producer_Price_Index"
## [4] "Producer_Price_USD" "Avg_Max_Temp" "Cropland_area"
## [7] "Value_added_deflator" "N2O_indirect" "N2O_direct"
## [10] "GDP_deflator"
I0_vars <- stationarity_table$Variable[stationarity_table$Integration_Order=="I(0)"]
I0_vars
## [1] "Annual_Precip"
# ---- Function to run Engle-Granger test ----
eg_test <- function(dep, indep){
df <- data[, c(dep, indep)]
df <- na.omit(df)
# Phillips-Ouliaris (Pz) test
test <- ca.po(df, type="Pz", demean="trend")
# Return as data.frame
data.frame(
Variable = indep,
Test_statistic = round(test@teststat, 3),
Critical_5pct = round(test@cval[2], 3),
Cointegrated = ifelse(test@teststat > test@cval[2], "No", "Yes"),
stringsAsFactors = FALSE
)
}
# ---- Loop through I(1) variables ----
eg_results <- lapply(I1_vars, function(v){
eg_test("NVWI", v)
}) %>% bind_rows()
# ---- View results ----
print(eg_results)
## Variable Test_statistic Critical_5pct Cointegrated
## 1 Population 7.206 81.381 Yes
## 2 GNI_per_capita 4.695 81.381 Yes
## 3 Producer_Price_Index 6.336 81.381 Yes
## 4 Producer_Price_USD 7.480 81.381 Yes
## 5 Avg_Max_Temp 8.126 81.381 Yes
## 6 Cropland_area 7.734 81.381 Yes
## 7 Value_added_deflator 5.179 81.381 Yes
## 8 N2O_indirect 6.379 81.381 Yes
## 9 N2O_direct 6.367 81.381 Yes
## 10 GDP_deflator 5.601 81.381 Yes
# since co-integration exists, use Engle-Granger Error Correction Model (ECM) to capture long run and short run dynamics
4. Engle-Granger Error Correction Model (ECM)
# a. Long-run regression: regress NVWI on all cointegrated I(1) variables.
long_run_formula <- as.formula(
paste("NVWI ~", paste(eg_results$Variable[eg_results$Cointegrated=="Yes"], collapse=" + "))
)
long_run_model <- lm(long_run_formula, data=data)
summary(long_run_model)
##
## Call:
## lm(formula = long_run_formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.044 -8.352 -2.215 10.809 33.748
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.522e+03 7.986e+02 5.662 1.53e-05 ***
## Population -1.285e+01 7.173e+00 -1.791 0.088370 .
## GNI_per_capita -1.528e-02 2.503e-03 -6.103 5.78e-06 ***
## Producer_Price_Index 1.989e+00 1.239e+00 1.605 0.124076
## Producer_Price_USD -9.351e-01 6.291e-01 -1.486 0.152783
## Avg_Max_Temp -7.236e+00 5.769e+00 -1.254 0.224209
## Cropland_area -3.048e-01 4.405e-02 -6.919 1.02e-06 ***
## Value_added_deflator 1.537e+00 3.826e-01 4.018 0.000674 ***
## N2O_indirect -1.145e+05 1.110e+05 -1.032 0.314444
## N2O_direct 2.595e+04 2.503e+04 1.037 0.312069
## GDP_deflator 7.039e+00 1.567e+00 4.491 0.000223 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.27 on 20 degrees of freedom
## Multiple R-squared: 0.8799, Adjusted R-squared: 0.8198
## F-statistic: 14.65 on 10 and 20 DF, p-value: 3.953e-07
# b. Compute the error correction term (residuals from long-run regression):
data$ECT <- residuals(long_run_model)
# c.Short-run model: first-difference all I(1) variables, include ECT and I(0) variables in levels:
# First differences
data_diff <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI)))
# Build short-run formula
short_run_vars <- paste0("d_", eg_results$Variable[eg_results$Cointegrated=="Yes"])
short_run_formula <- as.formula(
paste("d_NVWI ~", paste(short_run_vars, collapse=" + "),
"+ ECT +", paste(I0_vars, collapse=" + "))
)
# Fit ECM
ecm_model <- lm(short_run_formula, data=data_diff)
summary(ecm_model)
##
## Call:
## lm(formula = short_run_formula, data = data_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.639 -7.086 0.270 5.803 28.390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.570e+01 3.627e+01 0.984 0.33884
## d_Population -2.452e+01 1.464e+01 -1.675 0.11225
## d_GNI_per_capita -1.213e-02 4.290e-03 -2.826 0.01164 *
## d_Producer_Price_Index 7.941e-01 9.267e-01 0.857 0.40343
## d_Producer_Price_USD -3.334e-01 4.871e-01 -0.684 0.50300
## d_Avg_Max_Temp -9.030e+00 4.527e+00 -1.995 0.06238 .
## d_Cropland_area -2.449e-01 7.129e-02 -3.435 0.00316 **
## d_Value_added_deflator 1.282e+00 4.619e-01 2.774 0.01299 *
## d_N2O_indirect -5.258e+04 8.368e+04 -0.628 0.53809
## d_N2O_direct 1.205e+04 1.891e+04 0.637 0.53255
## d_GDP_deflator 5.419e+00 1.670e+00 3.245 0.00476 **
## ECT 9.987e-01 3.070e-01 3.253 0.00469 **
## Annual_Precip -4.350e-02 4.417e-02 -0.985 0.33845
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.87 on 17 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.7447, Adjusted R-squared: 0.5645
## F-statistic: 4.132 on 12 and 17 DF, p-value: 0.004064
5. check diagnostics
—- 5. Diagnostics for Short-Run (ECM) and Long-Run Models —-
library(lmtest) # dwtest, bptest
## Ładowanie wymaganego pakietu: zoo
##
## Dołączanie pakietu: 'zoo'
## Następujące obiekty zostały zakryte z 'package:base':
##
## as.Date, as.Date.numeric
library(car) # vif
## Ładowanie wymaganego pakietu: carData
##
## Dołączanie pakietu: 'car'
## Następujący obiekt został zakryty z 'package:dplyr':
##
## recode
library(urca) # ur.df
# Function to run diagnostics
run_diagnostics <- function(model, model_name = "Model") {
# Residuals
res <- residuals(model)
# Durbin-Watson Test (Autocorrelation)
dw <- dwtest(model)
dw_stat <- round(dw$statistic, 3)
dw_p <- round(dw$p.value, 3)
# Breusch-Pagan Test (Heteroskedasticity)
bp <- bptest(model)
bp_stat <- round(bp$statistic, 3)
bp_p <- round(bp$p.value, 3)
# Multicollinearity (VIF)
vif_vals <- vif(model)
high_vif <- names(vif_vals[vif_vals > 5])
vif_flag <- if(length(high_vif) == 0) "No severe multicollinearity" else paste("High VIF for:", paste(high_vif, collapse=", "))
# Residual stationarity (ADF)
adf <- ur.df(res, type = "drift", selectlags = "AIC")
resid_stationary <- ifelse(adf@teststat[1] < adf@cval[2, "5pct"], "Stationary", "Non-stationary")
# Compile diagnostics table
diagnostics <- data.frame(
Test = c("Durbin-Watson (Autocorr)",
"Breusch-Pagan (Heteroskedasticity)",
"Residual Stationarity",
"Multicollinearity"),
Result = c(paste0(dw_stat, ", p=", dw_p),
paste0(bp_stat, ", p=", bp_p),
resid_stationary,
vif_flag),
stringsAsFactors = FALSE
)
cat("\nDiagnostics for", model_name, ":\n")
print(diagnostics)
invisible(diagnostics)
}
# ---- Run diagnostics for Long-Run Model ----
diagnostics_long <- run_diagnostics(long_run_model, "Long-Run Model")
##
## Diagnostics for Long-Run Model :
## Test
## 1 Durbin-Watson (Autocorr)
## 2 Breusch-Pagan (Heteroskedasticity)
## 3 Residual Stationarity
## 4 Multicollinearity
## Result
## 1 2.069, p=0.236
## 2 8.846, p=0.547
## 3 Stationary
## 4 High VIF for: GNI_per_capita, Producer_Price_Index, Producer_Price_USD, Value_added_deflator, N2O_indirect, N2O_direct, GDP_deflator
# ---- Run diagnostics for Short-Run ECM ----
diagnostics_short <- run_diagnostics(ecm_model, "Short-Run ECM")
##
## Diagnostics for Short-Run ECM :
## Test
## 1 Durbin-Watson (Autocorr)
## 2 Breusch-Pagan (Heteroskedasticity)
## 3 Residual Stationarity
## 4 Multicollinearity
## Result
## 1 1.918, p=0.53
## 2 7.061, p=0.854
## 3 Stationary
## 4 High VIF for: d_GNI_per_capita, d_Producer_Price_Index, d_Producer_Price_USD, d_N2O_indirect, d_N2O_direct, d_GDP_deflator
#####################################
# 5. check for variables with high VIF
# VIF diagnostics for short- and long-run models
library(car)
check_vif <- function(model, model_name="Model") {
vif_vals <- vif(model)
vif_table <- data.frame(
Variable = names(vif_vals),
VIF = round(vif_vals, 2),
HighVIF = ifelse(vif_vals > 10, "Yes", "No"),
stringsAsFactors = FALSE
)
cat("\nVIF Table for", model_name, ":\n")
print(vif_table)
invisible(vif_table)
}
# ---- Run for long-run model ----
vif_long <- check_vif(long_run_model, "Long-Run Model")
##
## VIF Table for Long-Run Model :
## Variable VIF HighVIF
## Population Population 3.29 No
## GNI_per_capita GNI_per_capita 47.26 Yes
## Producer_Price_Index Producer_Price_Index 161.62 Yes
## Producer_Price_USD Producer_Price_USD 191.30 Yes
## Avg_Max_Temp Avg_Max_Temp 2.01 No
## Cropland_area Cropland_area 1.56 No
## Value_added_deflator Value_added_deflator 6.60 No
## N2O_indirect N2O_indirect 23859.83 Yes
## N2O_direct N2O_direct 23880.88 Yes
## GDP_deflator GDP_deflator 51.23 Yes
# ---- Run for short-run ECM ----
vif_short <- check_vif(ecm_model, "Short-Run ECM")
##
## VIF Table for Short-Run ECM :
## Variable VIF HighVIF
## d_Population d_Population 1.70 No
## d_GNI_per_capita d_GNI_per_capita 15.52 Yes
## d_Producer_Price_Index d_Producer_Price_Index 141.30 Yes
## d_Producer_Price_USD d_Producer_Price_USD 163.90 Yes
## d_Avg_Max_Temp d_Avg_Max_Temp 1.49 No
## d_Cropland_area d_Cropland_area 1.80 No
## d_Value_added_deflator d_Value_added_deflator 4.73 No
## d_N2O_indirect d_N2O_indirect 24662.46 Yes
## d_N2O_direct d_N2O_direct 24754.18 Yes
## d_GDP_deflator d_GDP_deflator 14.88 Yes
## ECT ECT 2.19 No
## Annual_Precip Annual_Precip 1.81 No
6. REFINE YOUR MODEL (by removing variables)
—- Refined Long- and Short-Run Models —-
library(dplyr)
library(lmtest)
library(car)
library(urca)
# ---- 1. Refine Long-Run Model ----
# Keep only significant and low-VIF variables
long_vars_refined <- c("Population", "GNI_per_capita", "Avg_Max_Temp", "Producer_Price_Index","Cropland_area")
long_run_formula_refined <- as.formula(paste("NVWI ~", paste(long_vars_refined, collapse = " + ")))
long_run_model_refined <- lm(long_run_formula_refined, data = data)
summary(long_run_model_refined)
##
## Call:
## lm(formula = long_run_formula_refined, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.765 -18.129 -2.294 20.518 71.117
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.908e+03 1.095e+03 3.568 0.00149 **
## Population -1.462e+01 7.650e+00 -1.911 0.06756 .
## GNI_per_capita -1.624e-03 8.654e-04 -1.877 0.07227 .
## Avg_Max_Temp -2.358e+01 8.692e+00 -2.712 0.01191 *
## Producer_Price_Index 3.686e-01 2.278e-01 1.618 0.11821
## Cropland_area -1.930e-01 6.468e-02 -2.984 0.00628 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.72 on 25 degrees of freedom
## Multiple R-squared: 0.5672, Adjusted R-squared: 0.4806
## F-statistic: 6.553 on 5 and 25 DF, p-value: 0.0005059
# ---- 2. Compute ECT ----
data$ECT_refined <- residuals(long_run_model_refined)
# ---- 3. Prepare differenced data for Short-Run ECM ----
data_diff <- data %>%
mutate(across(all_of(I1_vars), ~ c(NA, diff(.)), .names="d_{.col}")) %>%
mutate(d_NVWI = c(NA, diff(NVWI))) %>%
mutate(ECT_refined = data$ECT_refined) # add ECT to differenced data
# Keep differenced variables with low VIF and significant
short_vars_refined <- c("d_Population","d_GNI_per_capita", "d_Avg_Max_Temp","d_Producer_Price_Index", "d_Cropland_area", "d_Value_added_deflator")
short_run_formula_refined <- as.formula(paste("d_NVWI ~", paste(short_vars_refined, collapse = " + "), "+ ECT_refined"))
# Value_added_deflator was removed for long run because of high vif
# Fit Short-Run ECM
ecm_model_refined <- lm(short_run_formula_refined, data = data_diff)
summary(ecm_model_refined)
##
## Call:
## lm(formula = short_run_formula_refined, data = data_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.911 -14.667 3.343 14.549 34.669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.788592 4.746991 -1.219 0.2356
## d_Population -9.616154 14.950542 -0.643 0.5267
## d_GNI_per_capita 0.002167 0.001923 1.127 0.2720
## d_Avg_Max_Temp -10.511194 4.785436 -2.196 0.0389 *
## d_Producer_Price_Index 0.221409 0.108949 2.032 0.0544 .
## d_Cropland_area -0.157821 0.076935 -2.051 0.0523 .
## d_Value_added_deflator 0.275889 0.400433 0.689 0.4980
## ECT_refined 0.382250 0.160455 2.382 0.0263 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.15 on 22 degrees of freedom
## (1 obserwacja została skasowana z uwagi na braki w niej zawarte)
## Multiple R-squared: 0.4924, Adjusted R-squared: 0.3309
## F-statistic: 3.049 on 7 and 22 DF, p-value: 0.0212
# ---- 4. Optional: Quick Diagnostics Function ----
run_diagnostics <- function(model) {
res <- residuals(model)
dw <- dwtest(model); bp <- bptest(model)
vif_vals <- vif(model); high_vif <- names(vif_vals[vif_vals>5])
adf <- ur.df(res, type="drift", selectlags="AIC")
data.frame(
Test = c("Durbin-Watson", "Breusch-Pagan", "Residual Stationarity", "Multicollinearity"),
Result = c(
paste0(round(dw$statistic,3), ", p=", round(dw$p.value,3)),
paste0(round(bp$statistic,3), ", p=", round(bp$p.value,3)),
ifelse(adf@teststat[1]<adf@cval[2,"5pct"], "Stationary", "Non-stationary"),
if(length(high_vif)==0) "No severe multicollinearity" else paste("High VIF for:", paste(high_vif, collapse=", "))
),
stringsAsFactors = FALSE
)
}
# Run diagnostics
run_diagnostics(long_run_model_refined)
run_diagnostics(ecm_model_refined)
################################################
# Adjust for serial correlation of refine long run_diagnostics
library(dynlm)
library(sandwich)
library(lmtest)
nw_se <- NeweyWest(long_run_model_refined, lag = 2, prewhite = FALSE)
coeftest(long_run_model_refined, vcov = nw_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.9081e+03 9.6018e+02 4.0702 0.0004136 ***
## Population -1.4617e+01 6.9468e+00 -2.1042 0.0455879 *
## GNI_per_capita -1.6241e-03 1.0475e-03 -1.5504 0.1336022
## Avg_Max_Temp -2.3576e+01 8.1407e+00 -2.8961 0.0077390 **
## Producer_Price_Index 3.6862e-01 2.2424e-01 1.6439 0.1127175
## Cropland_area -1.9300e-01 4.7803e-02 -4.0373 0.0004502 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1