1. 데이터 불러오기
mydata <- read.csv("si_data.csv")
attach(mydata)
head(mydata)
## code sido sigungu year cor closed sum_closed pop X0_4
## 1 43720 충청북도 보은군 1992 -0.9886081 3 3 53513 2394
## 2 43720 충청북도 보은군 1993 -0.9886081 0 3 51814 2335
## 3 43720 충청북도 보은군 1994 -0.9886081 2 5 49891 2313
## 4 43720 충청북도 보은군 1995 -0.9886081 2 7 48460 2312
## 5 43720 충청북도 보은군 1996 -0.9886081 1 8 47242 2243
## 6 43720 충청북도 보은군 1997 -0.9886081 0 8 45980 2170
## X5_14 X15_19 X20_29 X30_49 X50_64 X65up
## 1 8108 6090 8984 11672 10276 5989
## 2 7381 5355 9036 11350 10178 6179
## 3 6778 4830 8643 11125 9869 6333
## 4 6149 4570 8131 11183 9647 6468
## 5 5736 4329 7686 11182 9521 6545
## 6 5328 4087 7330 10976 9329 6760
2. plm 패키지 설치
library(plm)
## Warning: package 'plm' was built under R version 3.3.1
## Loading required package: Formula
3. 종속변수, 독립변수 설정
Y <- cbind(sum_closed)
X <- cbind(X0_4, X5_14, X15_19, X20_29, X30_49, X50_64, X65up)
pdata <- plm.data(mydata, index=c("code", "year"))
summary(Y)
## sum_closed
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 4.00
## Mean :10.36
## 3rd Qu.:19.00
## Max. :74.00
summary(X)
## X0_4 X5_14 X15_19 X20_29
## Min. : 251 Min. : 542 Min. : 409 Min. : 1058
## 1st Qu.: 2531 1st Qu.: 6396 1st Qu.: 4224 1st Qu.: 8369
## Median : 6821 Median : 16196 Median : 9960 Median : 20681
## Mean :12231 Mean : 27859 Mean :15725 Mean : 34380
## 3rd Qu.:18971 3rd Qu.: 42633 3rd Qu.:23320 3rd Qu.: 53189
## Max. :81009 Max. :174981 Max. :87316 Max. :191472
## X30_49 X50_64 X65up
## Min. : 2693 Min. : 1970 Min. : 993
## 1st Qu.: 15355 1st Qu.: 11770 1st Qu.: 9168
## Median : 38629 Median : 21782 Median : 14568
## Mean : 69918 Mean : 32471 Mean : 18725
## 3rd Qu.:107900 3rd Qu.: 45451 3rd Qu.: 23708
## Max. :421624 Max. :225373 Max. :110050
4. plm을 통한 분석
fixed <- plm(Y ~ log(X), data=pdata, model="within")
summary(fixed)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = Y ~ log(X), data = pdata, model = "within")
##
## Balanced Panel: n=210, T=24, N=5040
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -29.6000 -1.6900 -0.0489 1.9300 18.8000
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## log(X)X0_4 5.06172 0.49355 10.2558 < 2.2e-16 ***
## log(X)X5_14 -18.11802 0.77419 -23.4024 < 2.2e-16 ***
## log(X)X15_19 -5.68266 0.58072 -9.7856 < 2.2e-16 ***
## log(X)X20_29 -5.27847 0.69688 -7.5744 4.298e-14 ***
## log(X)X30_49 27.69453 1.03592 26.7342 < 2.2e-16 ***
## log(X)X50_64 -6.76823 0.74782 -9.0506 < 2.2e-16 ***
## log(X)X65up 0.09973 0.67685 0.1473 0.8829
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 195770
## Residual Sum of Squares: 75269
## R-Squared: 0.61552
## Adj. R-Squared: 0.58902
## F-statistic: 1103.02 on 7 and 4823 DF, p-value: < 2.22e-16
random <- plm(Y ~ log(X), data=pdata, model="random")
summary(random)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = Y ~ log(X), data = pdata, model = "random")
##
## Balanced Panel: n=210, T=24, N=5040
##
## Effects:
## var std.dev share
## idiosyncratic 15.606 3.950 0.241
## individual 49.257 7.018 0.759
## theta: 0.8859
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -24.600 -2.120 -0.315 2.110 21.400
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 34.36576 3.01576 11.3954 < 2.2e-16 ***
## log(X)X0_4 5.65638 0.50520 11.1963 < 2.2e-16 ***
## log(X)X5_14 -15.17156 0.78085 -19.4295 < 2.2e-16 ***
## log(X)X15_19 -4.72130 0.59796 -7.8957 3.518e-15 ***
## log(X)X20_29 -5.83472 0.70753 -8.2466 < 2.2e-16 ***
## log(X)X30_49 20.86996 0.99697 20.9334 < 2.2e-16 ***
## log(X)X50_64 -8.08548 0.76815 -10.5259 < 2.2e-16 ***
## log(X)X65up 3.54331 0.67052 5.2844 1.314e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 204010
## Residual Sum of Squares: 84248
## R-Squared: 0.58703
## Adj. R-Squared: 0.5861
## F-statistic: 1021.86 on 7 and 5032 DF, p-value: < 2.22e-16
5. 하우스만 검정
phtest(random, fixed)
##
## Hausman Test
##
## data: Y ~ log(X)
## chisq = 905.75, df = 7, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent