Introduction

This report investigates the relationship between trade openness (OT), foreign direct investment (FDI), per capita GDP (PGDP), savings (S), and globalization (GL) with industrial value added (AI).

We simulate a dataset for the period 1990–2020, perform econometric analyses, and visualize the relationships. The goal is to explore short-run and long-run dynamics, including correlation, stationarity, regression, cointegration, diagnostics, and robustness checks.


Objectives

  1. Simulate data representing key economic indicators.
  2. Perform correlation analysis and visualize variable relationships.
  3. Test for unit roots using the Augmented Dickey-Fuller (ADF) test.
  4. Estimate regression models linking AI to OT, PGDP, S, FDI, and GL.
  5. Run diagnostic tests for heteroskedasticity, autocorrelation, and structural breaks.
  6. Conduct cointegration analysis (Johansen test) to assess long-run equilibrium.
  7. Check robustness with control variables and Chow tests.
  8. Visualize results using time-series plots, correlation heatmaps, regression plots, and animated graphs.

Data Simulation

library(tidyverse)
library(tseries)
library(urca)
library(vars)
library(GGally)
library(stargazer)
library(lmtest)
library(strucchange)
library(psych)

set.seed(123)

year <- 1990:2020
n <- length(year)

OT   <- 50 + 0.5*(1:n) + rnorm(n, 0, 5)
PGDP <- 2000 + 50*(1:n) + rnorm(n, 0, 200)
S    <- 15 + rnorm(n, 0, 2)
FDI  <- runif(n, 1, 10) + 0.2*(1:n)
GL   <- seq(40, 70, length.out = n) + rnorm(n, 0, 3)

AI <- 100 + 0.3*OT + 0.002*PGDP + 0.5*FDI + rnorm(n, 0, 20)

data <- data.frame(year, AI, OT, PGDP, S, FDI, GL)
head(data)
##   year       AI       OT     PGDP        S      FDI       GL
## 1 1990 127.7265 47.69762 1990.986 14.33359 3.585295 39.30921
## 2 1991 100.8852 49.84911 2279.025 12.96285 6.481314 45.19228
## 3 1992 112.2394 59.29354 2325.627 12.85642 9.818694 47.29096
## 4 1993 120.1130 52.35254 2364.316 15.60706 9.916870 44.45680
## 5 1994 107.9039 53.14644 2387.728 15.89642 4.467500 43.20278
## 6 1995 134.6547 61.57532 2410.784 15.10601 5.093345 45.45483
cor_matrix <- cor(data[, c("OT","PGDP","S","FDI","GL","AI")])
round(cor_matrix, 2)
##        OT PGDP    S  FDI   GL   AI
## OT   1.00 0.60 0.08 0.44 0.66 0.33
## PGDP 0.60 1.00 0.30 0.34 0.90 0.28
## S    0.08 0.30 1.00 0.15 0.22 0.27
## FDI  0.44 0.34 0.15 1.00 0.40 0.16
## GL   0.66 0.90 0.22 0.40 1.00 0.18
## AI   0.33 0.28 0.27 0.16 0.18 1.00
GGally::ggpairs(data[, c("OT","PGDP","S","FDI","GL","AI")],
                title = "Scatterplot Matrix for Key Variables")
## Warning in geom_point(): All aesthetics have length 1, but the data has 36 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

adf_results <- list(
  AI   = adf.test(data$AI, k = 1),
  OT   = adf.test(data$OT, k = 1),
  PGDP = adf.test(data$PGDP, k = 1),
  S    = adf.test(data$S, k = 1),
  FDI  = adf.test(data$FDI, k = 1),
  GL   = adf.test(data$GL, k = 1)
)
## Warning in adf.test(data$OT, k = 1): p-value smaller than printed p-value
## Warning in adf.test(data$S, k = 1): p-value smaller than printed p-value
## Warning in adf.test(data$FDI, k = 1): p-value smaller than printed p-value
## Warning in adf.test(data$GL, k = 1): p-value smaller than printed p-value
lapply(adf_results, function(x) data.frame(Test=x$statistic, Pval=x$p.value))
## $AI
##                    Test       Pval
## Dickey-Fuller -3.943974 0.02413138
## 
## $OT
##                   Test Pval
## Dickey-Fuller -4.92252 0.01
## 
## $PGDP
##                    Test       Pval
## Dickey-Fuller -3.799713 0.03384465
## 
## $S
##                    Test Pval
## Dickey-Fuller -5.107193 0.01
## 
## $FDI
##                   Test Pval
## Dickey-Fuller -4.47167 0.01
## 
## $GL
##                    Test Pval
## Dickey-Fuller -4.387605 0.01
model <- lm(AI ~ OT + PGDP + S + FDI + GL, data = data)
summary(model)
## 
## Call:
## lm(formula = AI ~ OT + PGDP + S + FDI + GL, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -42.65 -11.95  -2.43  12.84  42.04 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.08693   53.76064   0.095    0.925
## OT           1.54441    0.96611   1.599    0.122
## PGDP         0.02505    0.02058   1.217    0.235
## S            2.89208    2.55583   1.132    0.269
## FDI          0.15432    1.87269   0.082    0.935
## GL          -1.55855    1.14668  -1.359    0.186
## 
## Residual standard error: 22.78 on 25 degrees of freedom
## Multiple R-squared:  0.2237, Adjusted R-squared:  0.06841 
## F-statistic: 1.441 on 5 and 25 DF,  p-value: 0.2445
stargazer(model, type = "text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                 AI             
## -----------------------------------------------
## OT                             1.544           
##                               (0.966)          
##                                                
## PGDP                           0.025           
##                               (0.021)          
##                                                
## S                              2.892           
##                               (2.556)          
##                                                
## FDI                            0.154           
##                               (1.873)          
##                                                
## GL                            -1.559           
##                               (1.147)          
##                                                
## Constant                       5.087           
##                              (53.761)          
##                                                
## -----------------------------------------------
## Observations                    31             
## R2                             0.224           
## Adjusted R2                    0.068           
## Residual Std. Error      22.783 (df = 25)      
## F Statistic             1.441 (df = 5; 25)     
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

A multiple linear regression model explains AI (Industrial Value Added) using OT, PGDP, S, FDI, and GL.

Key results:

None of the predictors are statistically significant at 5%.

R² = 0.22 (low explanatory power).

Interpretation: In the simulated dataset, relationships are weak, which is expected because we used random components.

lmtest::bptest(model)         # Heteroskedasticity
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 7.8807, df = 5, p-value = 0.1629
lmtest::dwtest(model)         # Autocorrelation (Durbin-Watson)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 2.4313, p-value = 0.8556
## alternative hypothesis: true autocorrelation is greater than 0
lmtest::bgtest(model, order=1)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model
## LM test = 3.1066, df = 1, p-value = 0.07798

Breusch-Pagan test → no heteroskedasticity (p = 0.16).

Durbin-Watson → no evidence of autocorrelation.

Breusch-Godfrey → weak sign of serial correlation (p ≈ 0.07).

✅ Model residuals look fairly well-behaved.

johansen_test <- ca.jo(data[,c("AI","OT","PGDP","S","FDI","GL")],
                       type="trace", ecdet="const", K=2)
summary(johansen_test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 8.561622e-01 7.744786e-01 5.646242e-01 3.588287e-01 2.986216e-01
## [6] 2.217681e-01 1.221245e-15
## 
## Values of teststatistic and critical values of test:
## 
##            test 10pct   5pct   1pct
## r <= 5 |   7.27  7.52   9.24  12.97
## r <= 4 |  17.56 17.85  19.96  24.60
## r <= 3 |  30.45 32.00  34.91  41.07
## r <= 2 |  54.56 49.65  53.12  60.16
## r <= 1 |  97.75 71.86  76.07  84.45
## r = 0  | 153.99 97.18 102.14 111.01
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                  AI.l2        OT.l2      PGDP.l2          S.l2        FDI.l2
## AI.l2      1.000000000    1.0000000  1.000000000      1.000000    1.00000000
## OT.l2     -0.148659574    0.3615621 -5.050394606   1042.895471   -4.54034816
## PGDP.l2    0.006632476   -0.1322558 -0.002890405     -9.902398    0.08946461
## S.l2     -11.568660669    3.4986042  9.599767909   1321.622866   13.07789214
## FDI.l2    -5.402675095   -0.3225803 -8.540152975  -2374.577167   37.62872907
## GL.l2      0.186279587    6.2257913  1.488810156    234.196088   -6.81334447
## constant  84.537301856 -167.8259281 19.408833893 -41232.829791 -213.16538192
##                  GL.l2      constant
## AI.l2       1.00000000    1.00000000
## OT.l2      12.65562857    0.73737058
## PGDP.l2     0.09109959    0.07481978
## S.l2       13.01092055   -6.26496117
## FDI.l2      0.40365394   -9.81380633
## GL.l2     -10.47823363    7.70058750
## constant -747.26540762 -617.65811891
## 
## Weights W:
## (This is the loading matrix)
## 
##              AI.l2       OT.l2     PGDP.l2          S.l2       FDI.l2
## AI.d   -1.39125237 -0.32060932 -0.36129503  1.392100e-04  0.010609998
## OT.d   -0.02189632 -0.11328973  0.06391529 -5.131351e-05  0.007360513
## PGDP.d  2.32608915  4.85603489 -2.69867587  1.290151e-02  0.351818557
## S.d     0.04805255 -0.02476208 -0.04851825 -2.949498e-05 -0.006748419
## FDI.d  -0.03898077 -0.02502523  0.03176905  1.755338e-04 -0.010192811
## GL.d    0.06910920 -0.09383745  0.01832506  1.060221e-04  0.011471450
##                GL.l2      constant
## AI.d   -0.0633530916  2.819205e-15
## OT.d   -0.0279638327  5.903653e-16
## PGDP.d -0.7704138611 -4.032766e-14
## S.d    -0.0007571062 -1.532588e-16
## FDI.d  -0.0000980359  7.778162e-17
## GL.d    0.0084302998  4.247617e-16

Johansen test evaluates long-run equilibrium relationships.

Output: Trace statistics exceed critical values at multiple ranks → suggests at least one cointegration relationship among variables.

Meaning: despite short-run noise, variables move together in the long run.

data %>% 
  pivot_longer(-year, names_to="Variable", values_to="Value") %>%
  ggplot(aes(year, Value, color=Variable)) +
  geom_line(size=1.2) +
  facet_wrap(~Variable, scales="free_y") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
cor_melt <- melt(round(cor(data[, -1]), 2))
ggplot(cor_melt, aes(Var1, Var2, fill=value)) +
  geom_tile(color="white") +
  geom_text(aes(label=value)) +
  scale_fill_gradient2(low="blue", mid="white", high="red", midpoint=0)

library(broom)
## Warning: package 'broom' was built under R version 4.2.3
tidy_model <- tidy(model, conf.int=TRUE)
ggplot(tidy_model, aes(estimate, term)) +
  geom_point(size=3, color="steelblue") +
  geom_errorbarh(aes(xmin=conf.low, xmax=conf.high), height=0.2) +
  theme_minimal() +
  labs(title="Regression Coefficients with 95% CI")

ggpairs(data[, c("OT","PGDP","S","FDI","GL","AI")])
## Warning in geom_point(): All aesthetics have length 1, but the data has 36 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.