패널 데이터

install.packages(“foreign”)

library(foreign)

grdp <- read.dta("F:\\cloud\\2015년 여름방학\\panel\\panel_grdp.dta")

head(grdp, 0)
##  [1] year        id          name        sudo        ln_grdp    
##  [6] ln_edu      ln_rnd      _Iid_1      _Iid_2      _Iid_3     
## [11] _Iid_4      _Iid_5      _Iid_6      _Iid_7      _Iid_8     
## [16] _Iid_9      _Iid_10     _Iid_11     _Iid_12     _Iid_13    
## [21] _Iid_14     _Iid_15     _Iid_16     _Iyear_2000 _Iyear_2001
## [26] _Iyear_2002 _Iyear_2003 _Iyear_2004 _Iyear_2005 _Iyear_2006
## [31] _Iyear_2007 _Iyear_2008 _Iyear_2009
## <0 rows> (or 0-length row.names)

ggplot2에서 여러 창에 그림을 그리게 하는 함수 multiplot

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

그림을 그려보자

library(ggplot2)

ggplot(grdp, aes(x=year, y=ln_grdp)) + geom_point() + stat_smooth(method=lm, se=FALSE)

g.temp<-list()

for(i in 2000:2009)
{
  g.temp[[i-1999]]<-ggplot(grdp[grdp$year==i,], aes(x=ln_edu, y=ln_grdp)) + geom_point()
}

multiplot(plotlist=g.temp,cols=4)

pool 모델

getwd() setwd(“F:\cloud\2015년 여름방학\panel”) write.csv(grdp, “F:\cloud\2015년 여름방학\panel\grdp.csv”)

grdp <- read.csv("F:\\cloud\\2015년 여름방학\\panel\\grdp.csv")

head(grdp,0)
##  [1] X         year      id        name      sudo      ln_grdp   ln_edu   
##  [8] ln_rnd    id_1      id_2      id_3      id_4      id_5      id_6     
## [15] id_7      id_8      id_9      id_10     id_11     id_12     id_13    
## [22] id_14     id_15     id_16     year_2000 year_2001 year_2002 year_2003
## [29] year_2004 year_2005 year_2006 year_2007 year_2008 year_2009
## <0 rows> (or 0-length row.names)
pooled <- lm(ln_grdp ~ ln_edu + ln_rnd + 
               id_2 + id_3 + id_4 + id_5 + id_6 + id_7 + id_8 + id_9 + 
               id_10 + id_11 + id_12 + id_13 + id_14 + id_15 + id_16 + 
               year_2001 + year_2002 + year_2003 + year_2004 + year_2005 +
               year_2006 + year_2007 + year_2008 + year_2009, data=grdp)

summary(pooled)
## 
## Call:
## lm(formula = ln_grdp ~ ln_edu + ln_rnd + id_2 + id_3 + id_4 + 
##     id_5 + id_6 + id_7 + id_8 + id_9 + id_10 + id_11 + id_12 + 
##     id_13 + id_14 + id_15 + id_16 + year_2001 + year_2002 + year_2003 + 
##     year_2004 + year_2005 + year_2006 + year_2007 + year_2008 + 
##     year_2009, data = grdp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.144491 -0.025230 -0.000773  0.023463  0.178066 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.07438    0.57217   1.878  0.06261 .  
## ln_edu       0.39889    0.07889   5.056 1.39e-06 ***
## ln_rnd       0.08510    0.01966   4.328 2.94e-05 ***
## id_2        -0.70329    0.11730  -5.996 1.80e-08 ***
## id_3        -1.03225    0.14398  -7.170 4.69e-11 ***
## id_4        -0.70341    0.15627  -4.501 1.46e-05 ***
## id_5        -1.35117    0.16686  -8.098 3.15e-13 ***
## id_6        -1.52020    0.16014  -9.493  < 2e-16 ***
## id_7        -0.26712    0.22632  -1.180  0.23999    
## id_8        -0.23265    0.03155  -7.374 1.58e-11 ***
## id_9        -0.87922    0.20380  -4.314 3.10e-05 ***
## id_10       -0.82940    0.20244  -4.097 7.23e-05 ***
## id_11       -0.41723    0.19054  -2.190  0.03029 *  
## id_12       -0.95445    0.18205  -5.243 6.06e-07 ***
## id_13       -0.26515    0.20717  -1.280  0.20282    
## id_14       -0.26575    0.17382  -1.529  0.12867    
## id_15       -0.43941    0.14060  -3.125  0.00218 ** 
## id_16       -1.51363    0.27092  -5.587 1.25e-07 ***
## year_2001    0.01264    0.01725   0.733  0.46494    
## year_2002    0.06126    0.01832   3.343  0.00108 ** 
## year_2003    0.03097    0.02655   1.166  0.24551    
## year_2004    0.03356    0.02967   1.131  0.26011    
## year_2005    0.03994    0.03269   1.221  0.22406    
## year_2006    0.05228    0.03610   1.448  0.14995    
## year_2007    0.07751    0.03927   1.974  0.05049 .  
## year_2008    0.06280    0.04349   1.444  0.15108    
## year_2009    0.05934    0.04482   1.324  0.18780    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04793 on 133 degrees of freedom
## Multiple R-squared:  0.9969, Adjusted R-squared:  0.9963 
## F-statistic:  1637 on 26 and 133 DF,  p-value: < 2.2e-16
summary(aov(pooled))
##              Df Sum Sq Mean Sq   F value   Pr(>F)    
## ln_edu        1  68.06   68.06 29629.078  < 2e-16 ***
## ln_rnd        1   3.50    3.50  1522.103  < 2e-16 ***
## id_2          1   0.00    0.00     0.646 0.422900    
## id_3          1   0.95    0.95   415.065  < 2e-16 ***
## id_4          1   0.03    0.03    11.328 0.000998 ***
## id_5          1   4.42    4.42  1925.015  < 2e-16 ***
## id_6          1  12.70   12.70  5529.645  < 2e-16 ***
## id_7          1   1.39    1.39   604.848  < 2e-16 ***
## id_8          1   0.88    0.88   383.737  < 2e-16 ***
## id_9          1   0.01    0.01     6.468 0.012125 *  
## id_10         1   0.41    0.41   179.786  < 2e-16 ***
## id_11         1   0.01    0.01     2.354 0.127355    
## id_12         1   0.69    0.69   301.526  < 2e-16 ***
## id_13         1   3.02    3.02  1313.697  < 2e-16 ***
## id_14         1   0.57    0.57   247.748  < 2e-16 ***
## id_15         1   0.42    0.42   182.892  < 2e-16 ***
## id_16         1   0.66    0.66   286.157  < 2e-16 ***
## year_2001     1   0.00    0.00     0.845 0.359729    
## year_2002     1   0.02    0.02    10.306 0.001663 ** 
## year_2003     1   0.00    0.00     0.019 0.889450    
## year_2004     1   0.00    0.00     0.278 0.599216    
## year_2005     1   0.00    0.00     0.315 0.575613    
## year_2006     1   0.00    0.00     0.011 0.917631    
## year_2007     1   0.01    0.01     3.364 0.068881 .  
## year_2008     1   0.00    0.00     0.334 0.564447    
## year_2009     1   0.00    0.00     1.753 0.187797    
## Residuals   133   0.31    0.00                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

chow test

full.model <- lm(ln_grdp ~ ln_edu + ln_rnd + 
               id_2 + id_3 + id_4 + id_5 + id_6 + id_7 + id_8 + id_9 + 
               id_10 + id_11 + id_12 + id_13 + id_14 + id_15 + id_16 + 
               year_2001 + year_2002 + year_2003 + year_2004 + year_2005 +
               year_2006 + year_2007 + year_2008 + year_2009, data=grdp)

reduced.model.id <- lm(ln_grdp ~ ln_edu + ln_rnd + 
               year_2001 + year_2002 + year_2003 + year_2004 + year_2005 +
               year_2006 + year_2007 + year_2008 + year_2009, data=grdp)

reduced.model.time <- lm(ln_grdp ~ ln_edu + ln_rnd + 
               id_2 + id_3 + id_4 + id_5 + id_6 + id_7 + id_8 + id_9 + 
               id_10 + id_11 + id_12 + id_13 + id_14 + id_15 + id_16, data=grdp)

reduced.model <- lm(ln_grdp ~ ln_edu + ln_rnd, data=grdp)


anova(reduced.model.id, full.model) #개체효과에 관한 chow test
## Analysis of Variance Table
## 
## Model 1: ln_grdp ~ ln_edu + ln_rnd + year_2001 + year_2002 + year_2003 + 
##     year_2004 + year_2005 + year_2006 + year_2007 + year_2008 + 
##     year_2009
## Model 2: ln_grdp ~ ln_edu + ln_rnd + id_2 + id_3 + id_4 + id_5 + id_6 + 
##     id_7 + id_8 + id_9 + id_10 + id_11 + id_12 + id_13 + id_14 + 
##     id_15 + id_16 + year_2001 + year_2002 + year_2003 + year_2004 + 
##     year_2005 + year_2006 + year_2007 + year_2008 + year_2009
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1    148 26.3292                                  
## 2    133  0.3055 15    26.024 755.31 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(reduced.model.time, full.model) #시간효과에 관한 chow test
## Analysis of Variance Table
## 
## Model 1: ln_grdp ~ ln_edu + ln_rnd + id_2 + id_3 + id_4 + id_5 + id_6 + 
##     id_7 + id_8 + id_9 + id_10 + id_11 + id_12 + id_13 + id_14 + 
##     id_15 + id_16
## Model 2: ln_grdp ~ ln_edu + ln_rnd + id_2 + id_3 + id_4 + id_5 + id_6 + 
##     id_7 + id_8 + id_9 + id_10 + id_11 + id_12 + id_13 + id_14 + 
##     id_15 + id_16 + year_2001 + year_2002 + year_2003 + year_2004 + 
##     year_2005 + year_2006 + year_2007 + year_2008 + year_2009
##   Res.Df     RSS Df Sum of Sq      F Pr(>F)  
## 1    142 0.34506                             
## 2    133 0.30550  9  0.039561 1.9137 0.0551 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(reduced.model, full.model) #개체효과와 시간효과에 관한 chow test
## Analysis of Variance Table
## 
## Model 1: ln_grdp ~ ln_edu + ln_rnd
## Model 2: ln_grdp ~ ln_edu + ln_rnd + id_2 + id_3 + id_4 + id_5 + id_6 + 
##     id_7 + id_8 + id_9 + id_10 + id_11 + id_12 + id_13 + id_14 + 
##     id_15 + id_16 + year_2001 + year_2002 + year_2003 + year_2004 + 
##     year_2005 + year_2006 + year_2007 + year_2008 + year_2009
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1    157 26.5095                                  
## 2    133  0.3055 24    26.204 475.34 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

패키지 plm

install.packages(“plm”)

library(plm)
## Loading required package: Formula
head(grdp, 0)
##  [1] X         year      id        name      sudo      ln_grdp   ln_edu   
##  [8] ln_rnd    id_1      id_2      id_3      id_4      id_5      id_6     
## [15] id_7      id_8      id_9      id_10     id_11     id_12     id_13    
## [22] id_14     id_15     id_16     year_2000 year_2001 year_2002 year_2003
## [29] year_2004 year_2005 year_2006 year_2007 year_2008 year_2009
## <0 rows> (or 0-length row.names)
p.model.f <- plm(ln_grdp ~ ln_edu + ln_rnd, data=grdp, index=c("name", "year"))

summary(p.model.f)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = ln_grdp ~ ln_edu + ln_rnd, data = grdp, index = c("name", 
##     "year"))
## 
## Balanced Panel: n=16, T=10, N=160
## 
## Residuals :
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -0.11300 -0.02660 -0.00505  0.02590  0.15900 
## 
## Coefficients :
##        Estimate Std. Error t-value  Pr(>|t|)    
## ln_edu 0.460621   0.032130 14.3360 < 2.2e-16 ***
## ln_rnd 0.102450   0.018043  5.6781 7.409e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.5338
## Residual Sum of Squares: 0.34506
## R-Squared      :  0.86382 
##       Adj. R-Squared :  0.76664 
## F-statistic: 450.366 on 2 and 142 DF, p-value: < 2.22e-16
p.model.f <- plm(ln_grdp ~ ln_edu + ln_rnd, data=grdp, effect="individual", model="random", 
                 index=c("name", "year"))

summary(p.model.f)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = ln_grdp ~ ln_edu + ln_rnd, data = grdp, effect = "individual", 
##     model = "random", index = c("name", "year"))
## 
## Balanced Panel: n=16, T=10, N=160
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.00243 0.04929 0.012
## individual    0.19921 0.44633 0.988
## theta:  0.9651  
## 
## Residuals :
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -0.10900 -0.03580 -0.00344  0.02600  0.16800 
## 
## Coefficients :
##              Estimate Std. Error t-value  Pr(>|t|)    
## (Intercept) -0.175591   0.188877 -0.9297     0.354    
## ln_edu       0.463167   0.031772 14.5778 < 2.2e-16 ***
## ln_rnd       0.103384   0.017910  5.7725 4.054e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.6502
## Residual Sum of Squares: 0.38122
## R-Squared      :  0.85615 
##       Adj. R-Squared :  0.8401 
## F-statistic: 467.225 on 2 and 157 DF, p-value: < 2.22e-16
p.model.mixed <- plm(ln_grdp ~ ln_edu + ln_rnd +
                   id_2 + id_3 + id_4 + id_5 + id_6 + id_7 + id_8 + id_9 + 
                   id_10 + id_11 + id_12 + id_13 + id_14 + id_15 + id_16   
                   , data=grdp, effect="time", model="random", index=c("name", "year"))

summary(p.model.mixed)
## Oneway (time) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = ln_grdp ~ ln_edu + ln_rnd + id_2 + id_3 + id_4 + 
##     id_5 + id_6 + id_7 + id_8 + id_9 + id_10 + id_11 + id_12 + 
##     id_13 + id_14 + id_15 + id_16, data = grdp, effect = "time", 
##     model = "random", index = c("name", "year"))
## 
## Balanced Panel: n=16, T=10, N=160
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 0.0022970 0.0479266 0.948
## time          0.0001256 0.0112085 0.052
## theta:  0.2697  
## 
## Residuals :
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -0.11200 -0.02530 -0.00451  0.02580  0.16400 
## 
## Coefficients :
##              Estimate Std. Error  t-value  Pr(>|t|)    
## (Intercept)  0.462467   0.219985   2.1023  0.037295 *  
## ln_edu       0.464555   0.036282  12.8039 < 2.2e-16 ***
## ln_rnd       0.096442   0.018427   5.2338 5.850e-07 ***
## id_2        -0.579804   0.045116 -12.8513 < 2.2e-16 ***
## id_3        -0.882087   0.052970 -16.6524 < 2.2e-16 ***
## id_4        -0.552008   0.059890  -9.2170 3.753e-16 ***
## id_5        -1.180504   0.060391 -19.5476 < 2.2e-16 ***
## id_6        -1.380118   0.070528 -19.5683 < 2.2e-16 ***
## id_7        -0.045945   0.083751  -0.5486  0.584145    
## id_8        -0.223873   0.027538  -8.1296 1.925e-13 ***
## id_9        -0.668923   0.072210  -9.2635 2.861e-16 ***
## id_10       -0.630659   0.074910  -8.4189 3.744e-14 ***
## id_11       -0.237110   0.074467  -3.1841  0.001785 ** 
## id_12       -0.769727   0.065657 -11.7235 < 2.2e-16 ***
## id_13       -0.055367   0.074108  -0.7471  0.456231    
## id_14       -0.102244   0.068775  -1.4866  0.139325    
## id_15       -0.304232   0.054964  -5.5351 1.456e-07 ***
## id_16       -1.233035   0.094307 -13.0747 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    97.082
## Residual Sum of Squares: 0.32816
## R-Squared      :  0.99662 
##       Adj. R-Squared :  0.8845 
## F-statistic: 2462.75 on 17 and 142 DF, p-value: < 2.22e-16
ls(p.model.mixed)
## [1] "args"         "call"         "coefficients" "df.residual" 
## [5] "ercomp"       "formula"      "model"        "residuals"   
## [9] "vcov"
lm.test <- pcdtest(ln_grdp ~ ln_edu + ln_rnd +
        id_2 + id_3 + id_4 + id_5 + id_6 + id_7 + id_8 + id_9 + 
        id_10 + id_11 + id_12 + id_13 + id_14 + id_15 + id_16, 
        data=grdp, effect="time", model="random", index=c("name", "year"),
        test="sclm")

lm.test
## 
##  Scaled LM test for cross-sectional dependence in panels
## 
## data:  formula
## z = 9.0338, p-value < 2.2e-16
## alternative hypothesis: cross-sectional dependence
ls(lm.test)
## [1] "alternative" "data.name"   "method"      "p.value"     "parameter"  
## [6] "statistic"

잔차도

res<-p.model.mixed$residual
pred<-predict(p.model.mixed)

plot.mixed <- data.frame(as.numeric(res), as.numeric(pred))

names(plot.mixed) <- c("res", "pred")

ggplot(plot.mixed, aes(x=pred, y=res)) + geom_point()