thesis <- read.csv("DATA/data.csv")Preliminary
Load the Data
Data Diagnostics
# Load necessary libraries
library(dplyr)
library(skimr)
# Summary of the dataset
summary(thesis) Country Year RDef GDPpc
Length:95 Min. :2002 Min. :-2.24593 Min. : 3107
Class :character 1st Qu.:2006 1st Qu.:-0.54182 1st Qu.: 5852
Mode :character Median :2011 Median : 0.01196 Median : 9216
Mean :2011 Mean :-0.12229 Mean :11090
3rd Qu.:2016 3rd Qu.: 0.38421 3rd Qu.:14839
Max. :2020 Max. : 1.38761 Max. :29750
GDPpc2 PopD AgriLU RENEW
Min. : 9650852 Min. : 73.36 Min. :21.42 Min. : 2.00
1st Qu.: 34241516 1st Qu.:125.20 1st Qu.:26.09 1st Qu.:20.15
Median : 84942840 Median :139.29 Median :34.37 Median :26.90
Mean :165129154 Mean :193.39 Mean :34.09 Mean :25.26
3rd Qu.:220187646 3rd Qu.:289.56 3rd Qu.:42.06 3rd Qu.:34.00
Max. :885078446 Max. :375.90 Max. :45.55 Max. :52.20
Trade CCor RegQ RLaw
Min. : 32.97 Min. :-1.1373 Min. :-0.866097 Min. :-0.91026
1st Qu.: 60.84 1st Qu.:-0.6329 1st Qu.:-0.373876 1st Qu.:-0.54605
Median :120.84 Median :-0.4829 Median : 0.007617 Median :-0.33899
Mean :108.71 Mean :-0.4202 Mean :-0.041053 Mean :-0.21699
3rd Qu.:138.08 3rd Qu.:-0.3186 3rd Qu.: 0.206029 3rd Qu.: 0.06385
Max. :210.37 Max. : 0.3965 Max. : 0.799231 Max. : 0.57312
# Checking for missing values
missing_values <- colSums(is.na(thesis))
missing_valuesCountry Year RDef GDPpc GDPpc2 PopD AgriLU RENEW Trade CCor
0 0 0 0 0 0 0 0 0 0
RegQ RLaw
0 0
# Additional diagnostics using skimr
skim(thesis)| Name | thesis |
| Number of rows | 95 |
| Number of columns | 12 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 11 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| Country | 0 | 1 | 7 | 11 | 0 | 5 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Year | 0 | 1 | 2011.00 | 5.51 | 2002.00 | 2006.00 | 2011.00 | 2016.00 | 2020.00 | ▇▇▆▇▇ |
| RDef | 0 | 1 | -0.12 | 0.69 | -2.25 | -0.54 | 0.01 | 0.38 | 1.39 | ▁▃▆▇▂ |
| GDPpc | 0 | 1 | 11090.08 | 6525.91 | 3106.58 | 5851.54 | 9216.44 | 14838.57 | 29750.27 | ▇▆▂▂▁ |
| GDPpc2 | 0 | 1 | 165129153.84 | 194332821.37 | 9650851.72 | 34241515.67 | 84942840.01 | 220187646.00 | 885078446.10 | ▇▂▁▁▁ |
| PopD | 0 | 1 | 193.39 | 96.32 | 73.36 | 125.20 | 139.29 | 289.56 | 375.90 | ▇▅▁▆▂ |
| AgriLU | 0 | 1 | 34.09 | 8.15 | 21.42 | 26.09 | 34.37 | 42.06 | 45.55 | ▇▃▃▅▇ |
| RENEW | 0 | 1 | 25.26 | 13.14 | 2.00 | 20.15 | 26.90 | 34.00 | 52.20 | ▅▃▇▆▂ |
| Trade | 0 | 1 | 108.71 | 46.25 | 32.97 | 60.84 | 120.84 | 138.08 | 210.37 | ▇▂▇▃▂ |
| CCor | 0 | 1 | -0.42 | 0.35 | -1.14 | -0.63 | -0.48 | -0.32 | 0.40 | ▂▇▇▂▃ |
| RegQ | 0 | 1 | -0.04 | 0.42 | -0.87 | -0.37 | 0.01 | 0.21 | 0.80 | ▃▃▇▃▃ |
| RLaw | 0 | 1 | -0.22 | 0.40 | -0.91 | -0.55 | -0.34 | 0.06 | 0.57 | ▂▇▃▂▃ |
str(thesis)'data.frame': 95 obs. of 12 variables:
$ Country: chr "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
$ Year : int 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 ...
$ RDef : num 0.16 0.161 0.161 0.161 0.162 ...
$ GDPpc : num 4509 4750 5051 5428 5821 ...
$ GDPpc2 : num 20329061 22561009 25511722 29467841 33879978 ...
$ PopD : num 118 120 121 123 125 ...
$ AgriLU : num 24.5 24.8 24.8 24.9 25.5 ...
$ RENEW : num 44.7 43 41.5 41.6 40.1 40 41.1 38.2 36 32.4 ...
$ Trade : num 59.1 53.6 59.8 64 56.7 ...
$ CCor : num -1.137 -0.98 -0.977 -0.906 -0.864 ...
$ RegQ : num -0.742 -0.866 -0.736 -0.647 -0.396 ...
$ RLaw : num -0.91 -0.859 -0.733 -0.798 -0.694 ...
Correlation and VIF check
#correlation and VIF check
library(dplyr)
library(ggcorrplot)
numerical_vars <- thesis %>% select(RDef, GDPpc, GDPpc2, PopD, AgriLU, RENEW, Trade, CCor, RegQ, RLaw)
cor_matrix <- cor(numerical_vars, use = "complete.obs")
print(cor_matrix) RDef GDPpc GDPpc2 PopD AgriLU RENEW
RDef 1.00000000 0.2088263 0.1878000 -0.3921006 -0.2642595 -0.2813786
GDPpc 0.20882631 1.0000000 0.9687735 -0.6112429 -0.2151636 -0.8319080
GDPpc2 0.18780004 0.9687735 1.0000000 -0.5435059 -0.2611145 -0.7459108
PopD -0.39210060 -0.6112429 -0.5435059 1.0000000 0.5165452 0.5577752
AgriLU -0.26425946 -0.2151636 -0.2611145 0.5165452 1.0000000 0.2766443
RENEW -0.28137862 -0.8319080 -0.7459108 0.5577752 0.2766443 1.0000000
Trade -0.32905849 0.4082120 0.3504682 -0.2603324 -0.1241431 -0.5395182
CCor 0.05021369 0.7767199 0.7297790 -0.5055260 -0.3433025 -0.8919754
RegQ 0.33256327 0.7701194 0.7042928 -0.4940333 -0.1290193 -0.8966702
RLaw 0.08677284 0.8262877 0.7707358 -0.5438441 -0.2229963 -0.9153047
Trade CCor RegQ RLaw
RDef -0.3290585 0.05021369 0.3325633 0.08677284
GDPpc 0.4082120 0.77671994 0.7701194 0.82628768
GDPpc2 0.3504682 0.72977900 0.7042928 0.77073577
PopD -0.2603324 -0.50552602 -0.4940333 -0.54384411
AgriLU -0.1241431 -0.34330252 -0.1290193 -0.22299631
RENEW -0.5395182 -0.89197535 -0.8966702 -0.91530471
Trade 1.0000000 0.61561795 0.3168260 0.68265866
CCor 0.6156180 1.00000000 0.8322854 0.89616950
RegQ 0.3168260 0.83228544 1.0000000 0.80674692
RLaw 0.6826587 0.89616950 0.8067469 1.00000000
ggcorrplot(cor_matrix, method = "circle", type = "upper", lab = TRUE, lab_size = 3, colors = c("red", "white", "blue")) +
labs(title = "Correlation Matrix of Numerical Variables",
subtitle = "Panel Dataset 2002-2020") +
theme_minimal()library(car)
VIFcheck <- lm(RDef ~ GDPpc + GDPpc2 + PopD + AgriLU + RENEW + Trade + CCor + RegQ + RLaw, data = thesis)
VIFvalues <- vif(VIFcheck)
print(VIFvalues) GDPpc GDPpc2 PopD AgriLU RENEW Trade CCor RegQ
52.912871 35.613794 3.277782 3.196470 20.174101 3.603101 9.537877 10.869833
RLaw
13.457959
model2 <- lm(RDef ~ GDPpc + GDPpc2 + PopD + AgriLU + Trade + RegQ + RLaw + CCor, data = thesis)
summary(model2)$adj.r.squared[1] 0.4813273
Transforming Variables
library(psych)
# Select governance variables
governance <- thesis[, c("CCor", "RegQ", "RLaw")]
# Standardize data before PCA
governance_scaled <- scale(governance)
# Perform PCA (extracting one component)
pca_result <- principal(governance_scaled, nfactors = 1, rotate = "none")
# Create a governance index
thesis$Gov_Index <- pca_result$scores[,1]
# Drop original governance variables (w/ GDPPpc2 and RENEW)
thesis <- thesis[, !(names(thesis) %in% c("GDPpc2", "RENEW", "CCor", "RegQ", "RLaw"))]Rechecking of VIF and Correlation
library(ggplot2)
library(corrplot)
library(ggcorrplot)
independent_vars <- thesis %>% select(GDPpc, PopD, AgriLU, Trade, Gov_Index)
cor_matrix <- cor(thesis %>% select(-Country, -Year), use = "complete.obs")
print(cor_matrix) #correlation matrix
ggcorrplot(cor_matrix, method = "circle", type = "upper", lab = TRUE, lab_size = 3, colors = c("green", "white", "red")) +
labs(title = "Correlation Matrix of Variables",
subtitle = "Panel Dataset from 2002-2020") +
theme_minimal()
Converting Data into a Panel Data using plm function
library(plm)
library(psych)
# Create a log-transformed variable for GDPpc
thesis$log_GDPpc <- log(thesis$GDPpc)
# Check for zeros or negative values in RDef
sum(thesis$RDef <= 0)[1] 44
thesis$log_RDef <- sign(thesis$RDef) * log(abs(thesis$RDef) + 1)
head(thesis) Country Year RDef GDPpc PopD AgriLU Trade Gov_Index
1 Indonesia 2002 0.1602881 4508.776 118.2883 24.52226 59.07946 -1.9266834
2 Indonesia 2003 0.1605454 4749.843 119.8646 24.78856 53.61649 -1.8211118
3 Indonesia 2004 0.1608036 5050.913 121.3978 24.84183 59.76129 -1.5992592
4 Indonesia 2005 0.1610626 5428.429 122.9663 24.94835 63.98794 -1.5110343
5 Indonesia 2006 0.1615831 5820.651 124.6068 25.53423 56.65713 -1.1706477
6 Indonesia 2007 0.1618446 6268.284 126.2636 26.06685 54.82925 -0.9278005
log_GDPpc log_RDef
1 8.413781 0.1486683
2 8.465867 0.1488901
3 8.527324 0.1491125
4 8.599405 0.1493356
5 8.669167 0.1497838
6 8.743258 0.1500089
attach(thesis)
Y <- cbind(log_RDef)
X <- cbind(log_GDPpc, PopD, AgriLU, Trade, Gov_Index)
summary(Y) log_RDef
Min. :-1.17740
1st Qu.:-0.43297
Median : 0.01189
Mean :-0.07268
3rd Qu.: 0.32513
Max. : 0.87029
summary(X) log_GDPpc PopD AgriLU Trade
Min. : 8.041 Min. : 73.36 Min. :21.42 Min. : 32.97
1st Qu.: 8.674 1st Qu.:125.20 1st Qu.:26.09 1st Qu.: 60.84
Median : 9.129 Median :139.29 Median :34.37 Median :120.84
Mean : 9.150 Mean :193.39 Mean :34.09 Mean :108.71
3rd Qu.: 9.605 3rd Qu.:289.56 3rd Qu.:42.06 3rd Qu.:138.08
Max. :10.301 Max. :375.90 Max. :45.55 Max. :210.37
Gov_Index
Min. :-1.9267
1st Qu.:-0.7935
Median :-0.1649
Mean : 0.0000
3rd Qu.: 0.3354
Max. : 2.1858
paneldata <- pdata.frame(thesis, index = c("Country", "Year"))
pdim(paneldata)Balanced Panel: n = 5, T = 19, N = 95
summary(paneldata) Country Year RDef GDPpc
Indonesia :19 2002 : 5 Min. :-2.24593 Min. : 3107
Malaysia :19 2003 : 5 1st Qu.:-0.54182 1st Qu.: 5852
Philippines:19 2004 : 5 Median : 0.01196 Median : 9216
Thailand :19 2005 : 5 Mean :-0.12229 Mean :11090
Vietnam :19 2006 : 5 3rd Qu.: 0.38421 3rd Qu.:14839
2007 : 5 Max. : 1.38761 Max. :29750
(Other):65
PopD AgriLU Trade Gov_Index
Min. : 73.36 Min. :21.42 Min. : 32.97 Min. :-1.9267
1st Qu.:125.20 1st Qu.:26.09 1st Qu.: 60.84 1st Qu.:-0.7935
Median :139.29 Median :34.37 Median :120.84 Median :-0.1649
Mean :193.39 Mean :34.09 Mean :108.71 Mean : 0.0000
3rd Qu.:289.56 3rd Qu.:42.06 3rd Qu.:138.08 3rd Qu.: 0.3354
Max. :375.90 Max. :45.55 Max. :210.37 Max. : 2.1858
log_GDPpc log_RDef
Min. : 8.041 Min. :-1.17740
1st Qu.: 8.674 1st Qu.:-0.43297
Median : 9.129 Median : 0.01189
Mean : 9.150 Mean :-0.07268
3rd Qu.: 9.605 3rd Qu.: 0.32513
Max. :10.301 Max. : 0.87029
describe(paneldata) vars n mean sd median trimmed mad min max
Country* 1 95 3.00 1.42 3.00 3.00 1.48 1.00 5.00
Year* 2 95 10.00 5.51 10.00 10.00 7.41 1.00 19.00
RDef 3 95 -0.12 0.69 0.01 -0.10 0.81 -2.25 1.39
GDPpc 4 95 11090.08 6525.91 9216.44 10278.53 5633.77 3106.58 29750.27
PopD 5 95 193.39 96.32 139.29 187.46 74.91 73.36 375.90
AgriLU 6 95 34.09 8.15 34.37 34.27 11.75 21.42 45.55
Trade 7 95 108.71 46.25 120.84 107.09 54.85 32.97 210.37
Gov_Index 8 95 0.00 1.00 -0.16 -0.06 0.90 -1.93 2.19
log_GDPpc 9 95 9.15 0.58 9.13 9.14 0.69 8.04 10.30
log_RDef 10 95 -0.07 0.49 0.01 -0.06 0.65 -1.18 0.87
range skew kurtosis se
Country* 4.00 0.00 -1.34 0.15
Year* 18.00 0.00 -1.24 0.56
RDef 3.63 -0.34 -0.36 0.07
GDPpc 26643.69 1.01 0.28 669.54
PopD 302.54 0.46 -1.43 9.88
AgriLU 24.13 -0.08 -1.55 0.84
Trade 177.40 0.11 -0.98 4.75
Gov_Index 4.11 0.56 -0.62 0.10
log_GDPpc 2.26 0.09 -0.95 0.06
log_RDef 2.05 -0.11 -1.19 0.05
Stationary/Unit Root Test
library(plm)
library(tseries)
library(urca)
ADFTest_GDPpc <- adf.test(paneldata$GDPpc, alternative = "stationary")
print(ADFTest_GDPpc)
Augmented Dickey-Fuller Test
data: paneldata$GDPpc
Dickey-Fuller = -2.7269, Lag order = 4, p-value = 0.2762
alternative hypothesis: stationary
ADFTest_RDef <- adf.test(paneldata$RDef, alternative = "stationary")
print(ADFTest_RDef)
Augmented Dickey-Fuller Test
data: paneldata$RDef
Dickey-Fuller = -3.4961, Lag order = 4, p-value = 0.04645
alternative hypothesis: stationary
ADFTest_PopD <- adf.test(paneldata$PopD, alternative = "stationary")
print(ADFTest_PopD)
Augmented Dickey-Fuller Test
data: paneldata$PopD
Dickey-Fuller = -2.1902, Lag order = 4, p-value = 0.4978
alternative hypothesis: stationary
ADFTest_Trade <- adf.test(paneldata$Trade, alternative = "stationary")
print(ADFTest_Trade)
Augmented Dickey-Fuller Test
data: paneldata$Trade
Dickey-Fuller = -2.6876, Lag order = 4, p-value = 0.2924
alternative hypothesis: stationary
ADFTest__AgriLU <- adf.test(paneldata$AgriLU, alternative = "stationary")
print(ADFTest__AgriLU)
Augmented Dickey-Fuller Test
data: paneldata$AgriLU
Dickey-Fuller = -1.6466, Lag order = 4, p-value = 0.7224
alternative hypothesis: stationary
ADFTest_Gov <- adf.test(paneldata$Gov_Index, alternative = "stationary")
print(ADFTest_Gov)
Augmented Dickey-Fuller Test
data: paneldata$Gov_Index
Dickey-Fuller = -2.5806, Lag order = 4, p-value = 0.3366
alternative hypothesis: stationary
#<<all are stationary>>#Exploratory Descriptive Analysis (selected)
library(ggplot2)
#logging GDPpc and RDef
log <- thesis %>% mutate(log_RDef = RDef, log_GDPpc = log(GDPpc))
thesis$RDef[is.na(thesis$RDef)] <- mean(thesis$RDef, na.rm = TRUE)
thesis$GDPpc[is.na(thesis$GDPpc)] <- mean(thesis$GDPpc, na.rm = TRUE)
#gdp per capita over time
ggplot(log, aes(x = Year, y = log_GDPpc, group = Country, color = Country)) +
geom_line() + theme_minimal() + ggtitle("GDP per Capita Over Time")ggplot(log, aes(x = Year, y = log_RDef, group = Country, color = Country)) +
geom_line() + theme_minimal() + ggtitle("Rate of Deforestation per Capita Over Time")library(ggplot2)
library(dplyr)
library(scales)
ggplot(log, aes(x = log_GDPpc, y = log_RDef)) +
geom_point() +
geom_smooth(method = "loess", se = FALSE) + # Adding a smooth line to visualize the relationship
facet_wrap(~ Country, scales = "free") + # Create separate scatter plots for each country with free x-axis scale
labs(title = "Scatter Plot of log(RDef) vs. log(GDPpc) by Country",
x = "log(GDP per capita)",
y = "log(RDef)") +
theme_minimal()Pooled OLS
Pooled <- plm(log_RDef ~ log_GDPpc + PopD + Trade + AgriLU + Gov_Index,
data = paneldata, index = c("Country", "Year"),
model = "pooling")
summary(Pooled) Pooling Model
Call:
plm(formula = log_RDef ~ log_GDPpc + PopD + Trade + AgriLU +
Gov_Index, data = paneldata, model = "pooling", index = c("Country",
"Year"))
Balanced Panel: n = 5, T = 19, N = 95
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-0.696203 -0.289698 -0.069755 0.309126 0.738431
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
(Intercept) 1.74771823 1.35136972 1.2933 0.199256
log_GDPpc -0.06414715 0.14577322 -0.4400 0.660970
PopD -0.00186609 0.00064608 -2.8883 0.004862 **
Trade -0.00587586 0.00107477 -5.4671 4.137e-07 ***
AgriLU -0.00685906 0.00618153 -1.1096 0.270158
Gov_Index 0.14617244 0.08395553 1.7411 0.085127 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 22.172
Residual Sum of Squares: 13.64
R-Squared: 0.3848
Adj. R-Squared: 0.35024
F-statistic: 11.1336 on 5 and 89 DF, p-value: 2.3919e-08
Random Effects
ing ani ang mugawas inig run sa random effects Sir
Fixed Effects
Fixed <- plm(RDef ~ GDPpc + PopD + Trade + AgriLU + Trade + Gov_Index,
data = paneldata, index = c("Country", "Year"),
model = "within")
summary(Fixed)Oneway (individual) effect Within Model
Call:
plm(formula = RDef ~ GDPpc + PopD + Trade + AgriLU + Trade +
Gov_Index, data = paneldata, model = "within", index = c("Country",
"Year"))
Balanced Panel: n = 5, T = 19, N = 95
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-1.2911389 -0.2109381 0.0023661 0.2566497 0.7971983
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
GDPpc 1.0304e-04 2.2029e-05 4.6774 1.085e-05 ***
PopD -1.9400e-02 3.8422e-03 -5.0492 2.497e-06 ***
Trade 1.4265e-02 2.7312e-03 5.2229 1.234e-06 ***
AgriLU 3.6044e-02 4.8990e-02 0.7357 0.4639
Gov_Index 6.8211e-02 1.2992e-01 0.5250 0.6009
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 18.607
Residual Sum of Squares: 10.864
R-Squared: 0.41615
Adj. R-Squared: 0.35433
F-statistic: 12.1172 on 5 and 85 DF, p-value: 7.1654e-09
F-test
since no estimates are showed for random effects model, I cannot make use of the Hausman Test. Instead, I used F-test to determine which model is the best to use. However, the results are showed this way:
SIRRR unsaon ni nakoooo? hahahahah