Preliminary

Author

Crustom Jake D. Roncal

Load the Data

thesis <- read.csv("DATA/data.csv")

Data Diagnostics

# Load necessary libraries
library(dplyr)
library(skimr)

# Summary of the dataset
summary(thesis)
   Country               Year           RDef              GDPpc      
 Length:95          Min.   :2002   Min.   :-2.24593   Min.   : 3107  
 Class :character   1st Qu.:2006   1st Qu.:-0.54182   1st Qu.: 5852  
 Mode  :character   Median :2011   Median : 0.01196   Median : 9216  
                    Mean   :2011   Mean   :-0.12229   Mean   :11090  
                    3rd Qu.:2016   3rd Qu.: 0.38421   3rd Qu.:14839  
                    Max.   :2020   Max.   : 1.38761   Max.   :29750  
     GDPpc2               PopD            AgriLU          RENEW      
 Min.   :  9650852   Min.   : 73.36   Min.   :21.42   Min.   : 2.00  
 1st Qu.: 34241516   1st Qu.:125.20   1st Qu.:26.09   1st Qu.:20.15  
 Median : 84942840   Median :139.29   Median :34.37   Median :26.90  
 Mean   :165129154   Mean   :193.39   Mean   :34.09   Mean   :25.26  
 3rd Qu.:220187646   3rd Qu.:289.56   3rd Qu.:42.06   3rd Qu.:34.00  
 Max.   :885078446   Max.   :375.90   Max.   :45.55   Max.   :52.20  
     Trade             CCor              RegQ                RLaw         
 Min.   : 32.97   Min.   :-1.1373   Min.   :-0.866097   Min.   :-0.91026  
 1st Qu.: 60.84   1st Qu.:-0.6329   1st Qu.:-0.373876   1st Qu.:-0.54605  
 Median :120.84   Median :-0.4829   Median : 0.007617   Median :-0.33899  
 Mean   :108.71   Mean   :-0.4202   Mean   :-0.041053   Mean   :-0.21699  
 3rd Qu.:138.08   3rd Qu.:-0.3186   3rd Qu.: 0.206029   3rd Qu.: 0.06385  
 Max.   :210.37   Max.   : 0.3965   Max.   : 0.799231   Max.   : 0.57312  
# Checking for missing values
missing_values <- colSums(is.na(thesis))
missing_values
Country    Year    RDef   GDPpc  GDPpc2    PopD  AgriLU   RENEW   Trade    CCor 
      0       0       0       0       0       0       0       0       0       0 
   RegQ    RLaw 
      0       0 
# Additional diagnostics using skimr
skim(thesis)
Data summary
Name thesis
Number of rows 95
Number of columns 12
_______________________
Column type frequency:
character 1
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Country 0 1 7 11 0 5 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Year 0 1 2011.00 5.51 2002.00 2006.00 2011.00 2016.00 2020.00 ▇▇▆▇▇
RDef 0 1 -0.12 0.69 -2.25 -0.54 0.01 0.38 1.39 ▁▃▆▇▂
GDPpc 0 1 11090.08 6525.91 3106.58 5851.54 9216.44 14838.57 29750.27 ▇▆▂▂▁
GDPpc2 0 1 165129153.84 194332821.37 9650851.72 34241515.67 84942840.01 220187646.00 885078446.10 ▇▂▁▁▁
PopD 0 1 193.39 96.32 73.36 125.20 139.29 289.56 375.90 ▇▅▁▆▂
AgriLU 0 1 34.09 8.15 21.42 26.09 34.37 42.06 45.55 ▇▃▃▅▇
RENEW 0 1 25.26 13.14 2.00 20.15 26.90 34.00 52.20 ▅▃▇▆▂
Trade 0 1 108.71 46.25 32.97 60.84 120.84 138.08 210.37 ▇▂▇▃▂
CCor 0 1 -0.42 0.35 -1.14 -0.63 -0.48 -0.32 0.40 ▂▇▇▂▃
RegQ 0 1 -0.04 0.42 -0.87 -0.37 0.01 0.21 0.80 ▃▃▇▃▃
RLaw 0 1 -0.22 0.40 -0.91 -0.55 -0.34 0.06 0.57 ▂▇▃▂▃
str(thesis)
'data.frame':   95 obs. of  12 variables:
 $ Country: chr  "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
 $ Year   : int  2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 ...
 $ RDef   : num  0.16 0.161 0.161 0.161 0.162 ...
 $ GDPpc  : num  4509 4750 5051 5428 5821 ...
 $ GDPpc2 : num  20329061 22561009 25511722 29467841 33879978 ...
 $ PopD   : num  118 120 121 123 125 ...
 $ AgriLU : num  24.5 24.8 24.8 24.9 25.5 ...
 $ RENEW  : num  44.7 43 41.5 41.6 40.1 40 41.1 38.2 36 32.4 ...
 $ Trade  : num  59.1 53.6 59.8 64 56.7 ...
 $ CCor   : num  -1.137 -0.98 -0.977 -0.906 -0.864 ...
 $ RegQ   : num  -0.742 -0.866 -0.736 -0.647 -0.396 ...
 $ RLaw   : num  -0.91 -0.859 -0.733 -0.798 -0.694 ...

Correlation and VIF check

#correlation and VIF check
library(dplyr)
library(ggcorrplot)

numerical_vars <- thesis %>% select(RDef, GDPpc, GDPpc2, PopD, AgriLU, RENEW, Trade, CCor, RegQ, RLaw)

cor_matrix <- cor(numerical_vars, use = "complete.obs")
print(cor_matrix)
              RDef      GDPpc     GDPpc2       PopD     AgriLU      RENEW
RDef    1.00000000  0.2088263  0.1878000 -0.3921006 -0.2642595 -0.2813786
GDPpc   0.20882631  1.0000000  0.9687735 -0.6112429 -0.2151636 -0.8319080
GDPpc2  0.18780004  0.9687735  1.0000000 -0.5435059 -0.2611145 -0.7459108
PopD   -0.39210060 -0.6112429 -0.5435059  1.0000000  0.5165452  0.5577752
AgriLU -0.26425946 -0.2151636 -0.2611145  0.5165452  1.0000000  0.2766443
RENEW  -0.28137862 -0.8319080 -0.7459108  0.5577752  0.2766443  1.0000000
Trade  -0.32905849  0.4082120  0.3504682 -0.2603324 -0.1241431 -0.5395182
CCor    0.05021369  0.7767199  0.7297790 -0.5055260 -0.3433025 -0.8919754
RegQ    0.33256327  0.7701194  0.7042928 -0.4940333 -0.1290193 -0.8966702
RLaw    0.08677284  0.8262877  0.7707358 -0.5438441 -0.2229963 -0.9153047
            Trade        CCor       RegQ        RLaw
RDef   -0.3290585  0.05021369  0.3325633  0.08677284
GDPpc   0.4082120  0.77671994  0.7701194  0.82628768
GDPpc2  0.3504682  0.72977900  0.7042928  0.77073577
PopD   -0.2603324 -0.50552602 -0.4940333 -0.54384411
AgriLU -0.1241431 -0.34330252 -0.1290193 -0.22299631
RENEW  -0.5395182 -0.89197535 -0.8966702 -0.91530471
Trade   1.0000000  0.61561795  0.3168260  0.68265866
CCor    0.6156180  1.00000000  0.8322854  0.89616950
RegQ    0.3168260  0.83228544  1.0000000  0.80674692
RLaw    0.6826587  0.89616950  0.8067469  1.00000000
ggcorrplot(cor_matrix, method = "circle", type = "upper", lab = TRUE, lab_size = 3, colors = c("red", "white", "blue")) +
  labs(title = "Correlation Matrix of Numerical Variables",
       subtitle = "Panel Dataset 2002-2020") +
  theme_minimal()

library(car)

VIFcheck <- lm(RDef ~ GDPpc + GDPpc2 + PopD + AgriLU + RENEW + Trade + CCor + RegQ + RLaw, data = thesis)
VIFvalues <- vif(VIFcheck)

print(VIFvalues)
    GDPpc    GDPpc2      PopD    AgriLU     RENEW     Trade      CCor      RegQ 
52.912871 35.613794  3.277782  3.196470 20.174101  3.603101  9.537877 10.869833 
     RLaw 
13.457959 
model2 <- lm(RDef ~ GDPpc + GDPpc2 + PopD + AgriLU + Trade + RegQ + RLaw + CCor, data = thesis)
summary(model2)$adj.r.squared
[1] 0.4813273

Transforming Variables

library(psych)
# Select governance variables
governance <- thesis[, c("CCor", "RegQ", "RLaw")]

# Standardize data before PCA
governance_scaled <- scale(governance)

# Perform PCA (extracting one component)
pca_result <- principal(governance_scaled, nfactors = 1, rotate = "none")

# Create a governance index
thesis$Gov_Index <- pca_result$scores[,1]

# Drop original governance variables (w/ GDPPpc2 and RENEW)
thesis <- thesis[, !(names(thesis) %in% c("GDPpc2", "RENEW", "CCor", "RegQ", "RLaw"))]

Rechecking of VIF and Correlation

library(ggplot2)
library(corrplot)
library(ggcorrplot)

independent_vars <- thesis %>% select(GDPpc, PopD, AgriLU, Trade, Gov_Index)

cor_matrix <- cor(thesis %>% select(-Country, -Year), use = "complete.obs")
print(cor_matrix) #correlation matrix

ggcorrplot(cor_matrix, method = "circle", type = "upper", lab = TRUE, lab_size = 3, colors = c("green", "white", "red")) +
  labs(title = "Correlation Matrix of Variables",
       subtitle = "Panel Dataset from 2002-2020") +
  theme_minimal()

Converting Data into a Panel Data using plm function

library(plm)
library(psych)

# Create a log-transformed variable for GDPpc
thesis$log_GDPpc <- log(thesis$GDPpc)

# Check for zeros or negative values in RDef
sum(thesis$RDef <= 0)
[1] 44
thesis$log_RDef <- sign(thesis$RDef) * log(abs(thesis$RDef) + 1)

head(thesis)
    Country Year      RDef    GDPpc     PopD   AgriLU    Trade  Gov_Index
1 Indonesia 2002 0.1602881 4508.776 118.2883 24.52226 59.07946 -1.9266834
2 Indonesia 2003 0.1605454 4749.843 119.8646 24.78856 53.61649 -1.8211118
3 Indonesia 2004 0.1608036 5050.913 121.3978 24.84183 59.76129 -1.5992592
4 Indonesia 2005 0.1610626 5428.429 122.9663 24.94835 63.98794 -1.5110343
5 Indonesia 2006 0.1615831 5820.651 124.6068 25.53423 56.65713 -1.1706477
6 Indonesia 2007 0.1618446 6268.284 126.2636 26.06685 54.82925 -0.9278005
  log_GDPpc  log_RDef
1  8.413781 0.1486683
2  8.465867 0.1488901
3  8.527324 0.1491125
4  8.599405 0.1493356
5  8.669167 0.1497838
6  8.743258 0.1500089
attach(thesis)

Y <- cbind(log_RDef)
X <- cbind(log_GDPpc, PopD, AgriLU, Trade, Gov_Index)

summary(Y)
    log_RDef       
 Min.   :-1.17740  
 1st Qu.:-0.43297  
 Median : 0.01189  
 Mean   :-0.07268  
 3rd Qu.: 0.32513  
 Max.   : 0.87029  
summary(X)
   log_GDPpc           PopD            AgriLU          Trade       
 Min.   : 8.041   Min.   : 73.36   Min.   :21.42   Min.   : 32.97  
 1st Qu.: 8.674   1st Qu.:125.20   1st Qu.:26.09   1st Qu.: 60.84  
 Median : 9.129   Median :139.29   Median :34.37   Median :120.84  
 Mean   : 9.150   Mean   :193.39   Mean   :34.09   Mean   :108.71  
 3rd Qu.: 9.605   3rd Qu.:289.56   3rd Qu.:42.06   3rd Qu.:138.08  
 Max.   :10.301   Max.   :375.90   Max.   :45.55   Max.   :210.37  
   Gov_Index      
 Min.   :-1.9267  
 1st Qu.:-0.7935  
 Median :-0.1649  
 Mean   : 0.0000  
 3rd Qu.: 0.3354  
 Max.   : 2.1858  
paneldata <- pdata.frame(thesis, index = c("Country", "Year")) 
pdim(paneldata)
Balanced Panel: n = 5, T = 19, N = 95
summary(paneldata)
        Country        Year         RDef              GDPpc      
 Indonesia  :19   2002   : 5   Min.   :-2.24593   Min.   : 3107  
 Malaysia   :19   2003   : 5   1st Qu.:-0.54182   1st Qu.: 5852  
 Philippines:19   2004   : 5   Median : 0.01196   Median : 9216  
 Thailand   :19   2005   : 5   Mean   :-0.12229   Mean   :11090  
 Vietnam    :19   2006   : 5   3rd Qu.: 0.38421   3rd Qu.:14839  
                  2007   : 5   Max.   : 1.38761   Max.   :29750  
                  (Other):65                                     
      PopD            AgriLU          Trade          Gov_Index      
 Min.   : 73.36   Min.   :21.42   Min.   : 32.97   Min.   :-1.9267  
 1st Qu.:125.20   1st Qu.:26.09   1st Qu.: 60.84   1st Qu.:-0.7935  
 Median :139.29   Median :34.37   Median :120.84   Median :-0.1649  
 Mean   :193.39   Mean   :34.09   Mean   :108.71   Mean   : 0.0000  
 3rd Qu.:289.56   3rd Qu.:42.06   3rd Qu.:138.08   3rd Qu.: 0.3354  
 Max.   :375.90   Max.   :45.55   Max.   :210.37   Max.   : 2.1858  
                                                                    
   log_GDPpc         log_RDef       
 Min.   : 8.041   Min.   :-1.17740  
 1st Qu.: 8.674   1st Qu.:-0.43297  
 Median : 9.129   Median : 0.01189  
 Mean   : 9.150   Mean   :-0.07268  
 3rd Qu.: 9.605   3rd Qu.: 0.32513  
 Max.   :10.301   Max.   : 0.87029  
                                    
describe(paneldata) 
          vars  n     mean      sd  median  trimmed     mad     min      max
Country*     1 95     3.00    1.42    3.00     3.00    1.48    1.00     5.00
Year*        2 95    10.00    5.51   10.00    10.00    7.41    1.00    19.00
RDef         3 95    -0.12    0.69    0.01    -0.10    0.81   -2.25     1.39
GDPpc        4 95 11090.08 6525.91 9216.44 10278.53 5633.77 3106.58 29750.27
PopD         5 95   193.39   96.32  139.29   187.46   74.91   73.36   375.90
AgriLU       6 95    34.09    8.15   34.37    34.27   11.75   21.42    45.55
Trade        7 95   108.71   46.25  120.84   107.09   54.85   32.97   210.37
Gov_Index    8 95     0.00    1.00   -0.16    -0.06    0.90   -1.93     2.19
log_GDPpc    9 95     9.15    0.58    9.13     9.14    0.69    8.04    10.30
log_RDef    10 95    -0.07    0.49    0.01    -0.06    0.65   -1.18     0.87
             range  skew kurtosis     se
Country*      4.00  0.00    -1.34   0.15
Year*        18.00  0.00    -1.24   0.56
RDef          3.63 -0.34    -0.36   0.07
GDPpc     26643.69  1.01     0.28 669.54
PopD        302.54  0.46    -1.43   9.88
AgriLU       24.13 -0.08    -1.55   0.84
Trade       177.40  0.11    -0.98   4.75
Gov_Index     4.11  0.56    -0.62   0.10
log_GDPpc     2.26  0.09    -0.95   0.06
log_RDef      2.05 -0.11    -1.19   0.05

Stationary/Unit Root Test

library(plm)
library(tseries)
library(urca)

ADFTest_GDPpc <- adf.test(paneldata$GDPpc, alternative = "stationary")
print(ADFTest_GDPpc)

    Augmented Dickey-Fuller Test

data:  paneldata$GDPpc
Dickey-Fuller = -2.7269, Lag order = 4, p-value = 0.2762
alternative hypothesis: stationary
ADFTest_RDef <- adf.test(paneldata$RDef, alternative = "stationary")
print(ADFTest_RDef)

    Augmented Dickey-Fuller Test

data:  paneldata$RDef
Dickey-Fuller = -3.4961, Lag order = 4, p-value = 0.04645
alternative hypothesis: stationary
ADFTest_PopD <- adf.test(paneldata$PopD, alternative = "stationary")
print(ADFTest_PopD)

    Augmented Dickey-Fuller Test

data:  paneldata$PopD
Dickey-Fuller = -2.1902, Lag order = 4, p-value = 0.4978
alternative hypothesis: stationary
ADFTest_Trade <- adf.test(paneldata$Trade, alternative = "stationary")
print(ADFTest_Trade)

    Augmented Dickey-Fuller Test

data:  paneldata$Trade
Dickey-Fuller = -2.6876, Lag order = 4, p-value = 0.2924
alternative hypothesis: stationary
ADFTest__AgriLU <- adf.test(paneldata$AgriLU, alternative = "stationary")
print(ADFTest__AgriLU)

    Augmented Dickey-Fuller Test

data:  paneldata$AgriLU
Dickey-Fuller = -1.6466, Lag order = 4, p-value = 0.7224
alternative hypothesis: stationary
ADFTest_Gov <- adf.test(paneldata$Gov_Index, alternative = "stationary")
print(ADFTest_Gov)

    Augmented Dickey-Fuller Test

data:  paneldata$Gov_Index
Dickey-Fuller = -2.5806, Lag order = 4, p-value = 0.3366
alternative hypothesis: stationary
#<<all are stationary>>#

Exploratory Descriptive Analysis (selected)

library(ggplot2)

#logging GDPpc and RDef
log <- thesis %>% mutate(log_RDef = RDef, log_GDPpc = log(GDPpc))

thesis$RDef[is.na(thesis$RDef)] <- mean(thesis$RDef, na.rm = TRUE)
thesis$GDPpc[is.na(thesis$GDPpc)] <- mean(thesis$GDPpc, na.rm = TRUE)

#gdp per capita over time
ggplot(log, aes(x = Year, y = log_GDPpc, group = Country, color = Country)) +
  geom_line() + theme_minimal() + ggtitle("GDP per Capita Over Time")

ggplot(log, aes(x = Year, y = log_RDef, group = Country, color = Country)) +
  geom_line() + theme_minimal() + ggtitle("Rate of Deforestation per Capita Over Time")

library(ggplot2)
library(dplyr)
library(scales)

ggplot(log, aes(x = log_GDPpc, y = log_RDef)) +
  geom_point() +
  geom_smooth(method = "loess", se = FALSE) +  # Adding a smooth line to visualize the relationship
  facet_wrap(~ Country, scales = "free") +  # Create separate scatter plots for each country with free x-axis scale
  labs(title = "Scatter Plot of log(RDef) vs. log(GDPpc) by Country",
       x = "log(GDP per capita)",
       y = "log(RDef)") +
  theme_minimal()

Pooled OLS

Pooled <- plm(log_RDef ~ log_GDPpc + PopD + Trade + AgriLU + Gov_Index, 
                   data = paneldata, index = c("Country", "Year"), 
                   model = "pooling")
summary(Pooled) 
Pooling Model

Call:
plm(formula = log_RDef ~ log_GDPpc + PopD + Trade + AgriLU + 
    Gov_Index, data = paneldata, model = "pooling", index = c("Country", 
    "Year"))

Balanced Panel: n = 5, T = 19, N = 95

Residuals:
     Min.   1st Qu.    Median   3rd Qu.      Max. 
-0.696203 -0.289698 -0.069755  0.309126  0.738431 

Coefficients:
               Estimate  Std. Error t-value  Pr(>|t|)    
(Intercept)  1.74771823  1.35136972  1.2933  0.199256    
log_GDPpc   -0.06414715  0.14577322 -0.4400  0.660970    
PopD        -0.00186609  0.00064608 -2.8883  0.004862 ** 
Trade       -0.00587586  0.00107477 -5.4671 4.137e-07 ***
AgriLU      -0.00685906  0.00618153 -1.1096  0.270158    
Gov_Index    0.14617244  0.08395553  1.7411  0.085127 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    22.172
Residual Sum of Squares: 13.64
R-Squared:      0.3848
Adj. R-Squared: 0.35024
F-statistic: 11.1336 on 5 and 89 DF, p-value: 2.3919e-08

Random Effects

ing ani ang mugawas inig run sa random effects Sir

Fixed Effects

Fixed <- plm(RDef ~ GDPpc + PopD + Trade + AgriLU + Trade + Gov_Index,
                data = paneldata, index = c("Country", "Year"), 
                model = "within") 
summary(Fixed)
Oneway (individual) effect Within Model

Call:
plm(formula = RDef ~ GDPpc + PopD + Trade + AgriLU + Trade + 
    Gov_Index, data = paneldata, model = "within", index = c("Country", 
    "Year"))

Balanced Panel: n = 5, T = 19, N = 95

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-1.2911389 -0.2109381  0.0023661  0.2566497  0.7971983 

Coefficients:
             Estimate  Std. Error t-value  Pr(>|t|)    
GDPpc      1.0304e-04  2.2029e-05  4.6774 1.085e-05 ***
PopD      -1.9400e-02  3.8422e-03 -5.0492 2.497e-06 ***
Trade      1.4265e-02  2.7312e-03  5.2229 1.234e-06 ***
AgriLU     3.6044e-02  4.8990e-02  0.7357    0.4639    
Gov_Index  6.8211e-02  1.2992e-01  0.5250    0.6009    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    18.607
Residual Sum of Squares: 10.864
R-Squared:      0.41615
Adj. R-Squared: 0.35433
F-statistic: 12.1172 on 5 and 85 DF, p-value: 7.1654e-09

F-test

since no estimates are showed for random effects model, I cannot make use of the Hausman Test. Instead, I used F-test to determine which model is the best to use. However, the results are showed this way:

SIRRR unsaon ni nakoooo? hahahahah