1. Enter data into RStudio and perform descriptive statistics on all variables after encoding dummy variables and categorical variables.
setwd("D:/dataR")
drug3 = read.csv("drug3.csv")
drug3$urbanize <- as.numeric(drug3$urbanize == "urban", 1)
drug3$countryborder <- as.numeric(drug3$countryborder == "border", 1)
drug3$livelihood <- as.numeric(drug3$livelihood == "industry","selfempl", 1)
View(drug3)
library("psych")
describe(drug3)
##               vars    n    mean     sd  median trimmed    mad     min     max
## commune          1 2832  177.50 102.21  177.50  177.50 131.21    1.00  354.00
## year             2 2832 2018.50   2.29 2018.50 2018.50   2.97 2015.00 2022.00
## drugusers        3 2832   13.38   6.72   12.00   12.48   5.93    2.00   54.00
## landprice        4 2832   20.58  10.35   18.95   19.83  11.07    0.70   50.67
## unempl           5 2832    2.55   1.27    2.41    2.48   1.20   -0.52    7.45
## pcgdp            6 2832   69.27  13.79   68.32   68.78  12.76   32.80  123.08
## urbanize         7 2832    0.26   0.44    0.00    0.19   0.00    0.00    1.00
## countryborder    8 2832    0.13   0.34    0.00    0.04   0.00    0.00    1.00
## livelihood       9 2832    0.24   0.43    0.00    0.18   0.00    0.00    1.00
##                range skew kurtosis   se
## commune       353.00 0.00    -1.20 1.92
## year            7.00 0.00    -1.24 0.04
## drugusers      52.00 1.75     4.77 0.13
## landprice      49.97 0.58    -0.33 0.19
## unempl          7.97 0.58     0.40 0.02
## pcgdp          90.28 0.40     0.31 0.26
## urbanize        1.00 1.12    -0.74 0.01
## countryborder   1.00 2.16     2.68 0.01
## livelihood      1.00 1.21    -0.53 0.01

2.Fit a regression model with the specified function form using the “lm” command (Pooled OLS).

pols = lm(log(drugusers) ~ landprice + unempl + log(pcgdp) + urbanize + countryborder + livelihood, data = drug3)
summary(pols)
## 
## Call:
## lm(formula = log(drugusers) ~ landprice + unempl + log(pcgdp) + 
##     urbanize + countryborder + livelihood, data = drug3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66446 -0.28262 -0.01939  0.26849  1.66999 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.7460597  0.1776187   9.830  < 2e-16 ***
## landprice      0.0010548  0.0008128   1.298  0.19450    
## unempl         0.0852732  0.0066608  12.802  < 2e-16 ***
## log(pcgdp)     0.1077646  0.0420887   2.560  0.01051 *  
## urbanize       0.0619526  0.0194601   3.184  0.00147 ** 
## countryborder -0.0488710  0.0248627  -1.966  0.04944 *  
## livelihood     0.1488582  0.0197039   7.555 5.63e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4461 on 2825 degrees of freedom
## Multiple R-squared:  0.07936,    Adjusted R-squared:  0.0774 
## F-statistic: 40.58 on 6 and 2825 DF,  p-value: < 2.2e-16

3.Fit the specified function form regression with (a) robust standard error estimation and (b) clustered standard error estimation (aggregated by commune/ward).

library(sandwich)
library(lmtest)
coeftest(pols, vcov = vcovHC(pols, type = "HC1")) #Robust Standart Errors
## 
## t test of coefficients:
## 
##                  Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)    1.74605966  0.17531289  9.9597 < 2.2e-16 ***
## landprice      0.00105477  0.00080417  1.3116  0.189753    
## unempl         0.08527316  0.00676468 12.6056 < 2.2e-16 ***
## log(pcgdp)     0.10776462  0.04223779  2.5514  0.010782 *  
## urbanize       0.06195259  0.01915145  3.2349  0.001231 ** 
## countryborder -0.04887104  0.02687399 -1.8185  0.069090 .  
## livelihood     0.14885820  0.01886350  7.8913 4.239e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(clubSandwich)
coef_test(pols, vcov = "CR2", cluster = drug3$commune ) #Cluster Standard Errors
##          Coef. Estimate      SE t-stat d.f. (Satt) p-val (Satt) Sig.
##    (Intercept)  1.74606 0.45223  3.861       109.3      < 0.001  ***
##      landprice  0.00105 0.00207  0.509       132.7      0.61149     
##         unempl  0.08527 0.01720  4.959       107.4      < 0.001  ***
##     log(pcgdp)  0.10776 0.10937  0.985       110.1      0.32664     
##       urbanize  0.06195 0.04876  1.271       156.2      0.20576     
##  countryborder -0.04887 0.07107 -0.688        61.7      0.49425     
##     livelihood  0.14886 0.04827  3.084       143.7      0.00245   **
  1. Fit the specified model using the “plm” command (Pooled OLS)
library(plm)
pols2 = plm(log(drugusers) ~ landprice + unempl + log(pcgdp) + urbanize + countryborder + livelihood, data = drug3)
summary(pols2)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = log(drugusers) ~ landprice + unempl + log(pcgdp) + 
##     urbanize + countryborder + livelihood, data = drug3)
## 
## Balanced Panel: n = 354, T = 8, N = 2832
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -0.77232694 -0.12388020  0.00059713  0.12677222  0.58150898 
## 
## Coefficients:
##              Estimate Std. Error t-value  Pr(>|t|)    
## landprice   0.0019347  0.0034052  0.5682   0.56997    
## unempl      0.0896807  0.0157380  5.6983 1.353e-08 ***
## log(pcgdp)  0.3639506  0.2126983  1.7111   0.08719 .  
## urbanize   -0.0315378  0.0498924 -0.6321   0.52737    
## livelihood  0.0371870  0.0532107  0.6989   0.48470    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    101.38
## Residual Sum of Squares: 99.811
## R-Squared:      0.015503
## Adj. R-Squared: -0.12702
## F-statistic: 7.78845 on 5 and 2473 DF, p-value: 2.7771e-07

5.Fit the specified model using fixed effects (FE) with within-group estimator.

library(plm)
fe = plm(log(drugusers) ~ landprice + unempl + log(pcgdp) + urbanize + countryborder + livelihood, data = drug3, index = c("commune", "year"),model = "within")
summary(fe)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = log(drugusers) ~ landprice + unempl + log(pcgdp) + 
##     urbanize + countryborder + livelihood, data = drug3, model = "within", 
##     index = c("commune", "year"))
## 
## Balanced Panel: n = 354, T = 8, N = 2832
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -0.77232694 -0.12388020  0.00059713  0.12677222  0.58150898 
## 
## Coefficients:
##              Estimate Std. Error t-value  Pr(>|t|)    
## landprice   0.0019347  0.0034052  0.5682   0.56997    
## unempl      0.0896807  0.0157380  5.6983 1.353e-08 ***
## log(pcgdp)  0.3639506  0.2126983  1.7111   0.08719 .  
## urbanize   -0.0315378  0.0498924 -0.6321   0.52737    
## livelihood  0.0371870  0.0532107  0.6989   0.48470    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    101.38
## Residual Sum of Squares: 99.811
## R-Squared:      0.015503
## Adj. R-Squared: -0.12702
## F-statistic: 7.78845 on 5 and 2473 DF, p-value: 2.7771e-07
  1. Present the regression results of the model in no 5 with (a) robust standard error estimation and (b) clustered standard error estimation (aggregated by commune/ward).
coeftest(fe, vcov = vcovHC(fe, type = "HC1")) #Robust Standard Errors
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)    
## landprice   0.0019347  0.0033844  0.5716   0.5676    
## unempl      0.0896807  0.0150871  5.9442 3.17e-09 ***
## log(pcgdp)  0.3639506  0.2114228  1.7214   0.0853 .  
## urbanize   -0.0315378  0.0475882 -0.6627   0.5076    
## livelihood  0.0371870  0.0499557  0.7444   0.4567    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef_test(fe, vcov = "CR2", cluster = drug3$commune) #Cluster Standard Errors
##       Coef. Estimate     SE t-stat d.f. (Satt) p-val (Satt) Sig.
##   landprice  0.00193 0.0034  0.569       170.4       0.5702     
##      unempl  0.08968 0.0152  5.919       169.6       <0.001  ***
##  log(pcgdp)  0.36395 0.2120  1.716       251.6       0.0873    .
##    urbanize -0.03154 0.0491 -0.642        22.1       0.5274     
##  livelihood  0.03719 0.0525  0.709        10.1       0.4945

7.Fit the specified model using one-way least square dummy estimator. Test the regression results with (a) robust standard error estimation and (b) clustered standard error estimation. Only present the coefficient estimates of the independent variables, not the estimates for the dummy variables.

fel = lm(log(drugusers) ~ landprice + unempl + log(pcgdp) + urbanize + countryborder + livelihood, data = drug3)

summary(fel)
## 
## Call:
## lm(formula = log(drugusers) ~ landprice + unempl + log(pcgdp) + 
##     urbanize + countryborder + livelihood, data = drug3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66446 -0.28262 -0.01939  0.26849  1.66999 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.7460597  0.1776187   9.830  < 2e-16 ***
## landprice      0.0010548  0.0008128   1.298  0.19450    
## unempl         0.0852732  0.0066608  12.802  < 2e-16 ***
## log(pcgdp)     0.1077646  0.0420887   2.560  0.01051 *  
## urbanize       0.0619526  0.0194601   3.184  0.00147 ** 
## countryborder -0.0488710  0.0248627  -1.966  0.04944 *  
## livelihood     0.1488582  0.0197039   7.555 5.63e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4461 on 2825 degrees of freedom
## Multiple R-squared:  0.07936,    Adjusted R-squared:  0.0774 
## F-statistic: 40.58 on 6 and 2825 DF,  p-value: < 2.2e-16
coeftest(fel, vcov. = vcovHC, type = "HC1", ) #Robust Standard Errors
## 
## t test of coefficients:
## 
##                  Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)    1.74605966  0.17531289  9.9597 < 2.2e-16 ***
## landprice      0.00105477  0.00080417  1.3116  0.189753    
## unempl         0.08527316  0.00676468 12.6056 < 2.2e-16 ***
## log(pcgdp)     0.10776462  0.04223779  2.5514  0.010782 *  
## urbanize       0.06195259  0.01915145  3.2349  0.001231 ** 
## countryborder -0.04887104  0.02687399 -1.8185  0.069090 .  
## livelihood     0.14885820  0.01886350  7.8913 4.239e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef_test(fel, vcov = "CR2", cluster = drug3$commune) #Cluster Standard Errors
##          Coef. Estimate      SE t-stat d.f. (Satt) p-val (Satt) Sig.
##    (Intercept)  1.74606 0.45223  3.861       109.3      < 0.001  ***
##      landprice  0.00105 0.00207  0.509       132.7      0.61149     
##         unempl  0.08527 0.01720  4.959       107.4      < 0.001  ***
##     log(pcgdp)  0.10776 0.10937  0.985       110.1      0.32664     
##       urbanize  0.06195 0.04876  1.271       156.2      0.20576     
##  countryborder -0.04887 0.07107 -0.688        61.7      0.49425     
##     livelihood  0.14886 0.04827  3.084       143.7      0.00245   **
  1. Fit the specified model using two-way FE with the group variable estimator (LSDV).
fe2 = lm(log(drugusers) ~ landprice + unempl + log(pcgdp) + factor(urbanize) + factor(countryborder) + factor(livelihood), data = drug3)
summary(fe2)
## 
## Call:
## lm(formula = log(drugusers) ~ landprice + unempl + log(pcgdp) + 
##     factor(urbanize) + factor(countryborder) + factor(livelihood), 
##     data = drug3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66446 -0.28262 -0.01939  0.26849  1.66999 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.7460597  0.1776187   9.830  < 2e-16 ***
## landprice               0.0010548  0.0008128   1.298  0.19450    
## unempl                  0.0852732  0.0066608  12.802  < 2e-16 ***
## log(pcgdp)              0.1077646  0.0420887   2.560  0.01051 *  
## factor(urbanize)1       0.0619526  0.0194601   3.184  0.00147 ** 
## factor(countryborder)1 -0.0488710  0.0248627  -1.966  0.04944 *  
## factor(livelihood)1     0.1488582  0.0197039   7.555 5.63e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4461 on 2825 degrees of freedom
## Multiple R-squared:  0.07936,    Adjusted R-squared:  0.0774 
## F-statistic: 40.58 on 6 and 2825 DF,  p-value: < 2.2e-16
  1. Perform regression analysis using random effects (RE) with (a) robust standard error estimation and (b) clustered standard error estimation. Test the regression results of the RE model.
library(plm)
re = plm(log(drugusers) ~ landprice + unempl + log(pcgdp) + urbanize + countryborder + livelihood, data = drug3, index = c("commune", "year"),model = "random")
summary(re)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = log(drugusers) ~ landprice + unempl + log(pcgdp) + 
##     urbanize + countryborder + livelihood, data = drug3, model = "random", 
##     index = c("commune", "year"))
## 
## Balanced Panel: n = 354, T = 8, N = 2832
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.04036 0.20090   0.2
## individual    0.16140 0.40174   0.8
## theta: 0.8259
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.8207152 -0.1316932 -0.0014731  0.1359764  0.6392281 
## 
## Coefficients:
##                 Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept)    1.5808586  0.4068432  3.8857  0.000102 ***
## landprice      0.0011919  0.0017929  0.6648  0.506186    
## unempl         0.0877354  0.0116866  7.5074 6.033e-14 ***
## log(pcgdp)     0.1501467  0.0969664  1.5484  0.121516    
## urbanize       0.0222815  0.0344345  0.6471  0.517588    
## countryborder -0.0539014  0.0641089 -0.8408  0.400472    
## livelihood     0.0996070  0.0369283  2.6973  0.006990 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    116.82
## Residual Sum of Squares: 114
## R-Squared:      0.024134
## Adj. R-Squared: 0.022061
## Chisq: 69.8638 on 6 DF, p-value: 4.3605e-13
library(lmtest)
coeftest(re, vcov. = vcovHC, type = "HC1") #Robust Standard Errors
## 
## t test of coefficients:
## 
##                 Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)    1.5808586  0.3946021  4.0062 6.329e-05 ***
## landprice      0.0011919  0.0017650  0.6753  0.499544    
## unempl         0.0877354  0.0116114  7.5559 5.583e-14 ***
## log(pcgdp)     0.1501467  0.0953985  1.5739  0.115625    
## urbanize       0.0222815  0.0330478  0.6742  0.500227    
## countryborder -0.0539014  0.0694475 -0.7761  0.437728    
## livelihood     0.0996070  0.0347584  2.8657  0.004192 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Haumans Test
phtest(fe, re)
## 
##  Hausman Test
## 
## data:  log(drugusers) ~ landprice + unempl + log(pcgdp) + urbanize +  ...
## chisq = 4.5399, df = 5, p-value = 0.4746
## alternative hypothesis: one model is inconsistent