setwd('C:\\Users\\fanning1207\\Desktop\\STATA')
library(foreign)
table1_org=read.dta('table1_org.dta')
library(Hmisc)
label(table1_org$code)='问卷编码'
label(table1_org$prov)='省'
label(table1_org$county)='县'
label(table1_org$town)='乡镇'
label(table1_org$village)='村'
table1_org$prov=factor(table1_org$prov)
levels(table1_org$prov)
## [1] "湖北" "湖南" "江西" "四川"
table1_org$provcode=factor(table1_org$prov,levels=levels(table1_org$prov),labels=c(1,2,4,3))
add_table1=model.matrix(~factor(prov)-1,table1_org)
table1_org=merge(table1_org,as.data.frame(add_table1))
names(table1_org)[7:10]
## [1] "factor(prov)湖北" "factor(prov)湖南" "factor(prov)江西"
## [4] "factor(prov)四川"
names(table1_org)[7:10]=c('hubei','hunan','jiangxi','sichuan')
table1_org=table1_org[-3:-5]
table2_org=read.dta('table2_org.dta')
names(table2_org)[2:6]=c('familysize','gender','birthyear','educ','training')
table2_org$gender=factor(table2_org$gender)
levels(table2_org$gender)
## [1] "1" "2"
table2_org$female=factor(table2_org$gender,levels=levels(table2_org$gender),labels=c(0,1))
table2_org$female=factor(table2_org$female,levels = c(0,1),labels = c('男','女'))
table2_org$birthyear
## [1] "1964" "1950" "1947" "1950" "一九六二" "1957"
## [7] "一九七二" "1972" "1959" "一九五八" "1984" "1950"
## [13] "1964" "1953" "1971" "1951" "1972" "1940"
## [19] "1965" "1980"
table2_org$birthyear[5]='1962'
table2_org$birthyear[7]='1972'
table2_org$birthyear[10]='1958'
table2_org$birthyear=as.integer(table2_org$birthyear)
table2_org$age=2013-table2_org$birthyear
table2_org$educ[table2_org$educ==1.5]=5
table2_org$educ[table2_org$code==1220103]=6
table3_org=read.dta('table3_org.dta')
attach(table3_org)
d14[code==1220103]=19200
d14[code==1310102]=10000
table3_org$avprod=(d14/2)/(d3/15)
table3_org$labor=15*(d21+d25+d29+d38+d46)/8
d58[d58=='八十斤']='80'
d58=as.integer(d58)
table3_org$puren=(d58*d55)/(100*d3*2/15)
table3_org$purep=(d58*d56)/(100*d3*2/15)
table3_org$purek=(d58*d57)/(100*d3*2/15)
detach(table3_org)
names(table3_org)[names(table3_org)=='d19']='npest'
names(table3_org)[names(table3_org)=='d35']='nirri'
table3_org=table3_org[table3_org$d2=='中稻',]
table3_org=table3_org[c('code','npest','nirri','avprod','labor','puren','purep','purek')]
table4_org=read.dta('table4_org.dta')
table4_org[,'s7']
## [1] "70000" "三千" "10000" "260000" "100000" "5000" "5000"
## [8] "200000" "六万" "50000" "25000" "150000" "50000" "1000"
## [15] "100000" "150000" "60000" "70000" "90000" "100000" "180000"
## [22] "20000"
table4_org[,'s7'][table4_org[,'s7']=='三千']='3000'
table4_org[,'s7'][table4_org[,'s7']=='六万']='60000'
table4_org$s7=as.integer(table4_org$s7)
library(plyr)
table4_org=ddply(table4_org,.(code),summarize,asset=sum(s7))
.dta这四个数据合并。(10分)
temp1=merge(table1_cln,table2_cln,by='code')
temp2=merge(table3_cln,table4_cln,by='code')
table_all=merge(temp1,temp2,by='code')
| 产量(公斤/公顷) | 参加过培训 | 未参加过培训 |
|---|---|---|
| 湖北 | ||
| 湖南 | ||
| 四川 | ||
| 江西 |
xtabs(avprod~ prov+training, data= table_all)
## training
## prov 0 1
## 湖北 21875.000 23250.000
## 湖南 12000.000 35375.000
## 江西 22136.363 14250.000
## 四川 6321.429 18000.000
lm_dat=lm(formula = avprod ~ training + gender + age + educ
+ asset + puren + purep + purek + npest + nirri
+ Hunan + Sichuan + Jiangxi, data = table_all)
summary(lm_dat)
##
## Call:
## lm(formula = avprod ~ training + gender + age + educ + asset +
## puren + purep + purek + npest + nirri + Hunan + Sichuan +
## Jiangxi, data = table_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2136.4 -676.5 -361.0 578.1 3078.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.319e+04 5.320e+03 2.479 0.0479 *
## training 2.172e+03 1.198e+03 1.813 0.1197
## gender 2.204e+03 2.528e+03 0.872 0.4168
## age -1.560e+02 6.035e+01 -2.585 0.0415 *
## educ -2.685e+02 2.848e+02 -0.943 0.3821
## asset -1.537e-02 1.165e-02 -1.320 0.2351
## puren 1.887e+01 9.747e+00 1.936 0.1010
## purep 3.355e+01 2.434e+01 1.378 0.2173
## purek -3.641e+01 2.127e+01 -1.712 0.1378
## npest 8.329e+02 8.638e+02 0.964 0.3721
## nirri -1.337e+02 4.179e+02 -0.320 0.7598
## Hunan 7.096e+01 2.314e+03 0.031 0.9765
## Sichuan -6.990e+03 1.945e+03 -3.594 0.0114 *
## Jiangxi -3.592e+03 4.127e+03 -0.871 0.4175
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2181 on 6 degrees of freedom
## Multiple R-squared: 0.8629, Adjusted R-squared: 0.566
## F-statistic: 2.906 on 13 and 6 DF, p-value: 0.09901
par(mfrow=c(2,2))
plot(lm_dat)