Kun Wang (86756137) Feb13-2004
summary(df) ## show some summary statistics of df
## firm any.foreign.own year firm.age
## Min. : 1001 Min. :0.00 Min. :1991 Min. : 1.0
## 1st Qu.: 1548 1st Qu.:0.00 1st Qu.:1994 1st Qu.: 9.0
## Median : 3156 Median :0.00 Median :1997 Median : 17.0
## Mean : 86038 Mean :0.18 Mean :1996 Mean : 18.8
## 3rd Qu.: 50418 3rd Qu.:0.00 3rd Qu.:1999 3rd Qu.: 25.0
## Max. :1009999 Max. :1.00 Max. :2003 Max. :102.0
## NA's :257 NA's :1691
## exports exportna exporta rmawagus
## Min. :0 Min. :0 Min. :0 Min. : -729
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.: 22
## Median :0 Median :0 Median :0 Median : 51
## Mean :0 Mean :0 Mean :0 Mean : 158
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.: 110
## Max. :1 Max. :1 Max. :1 Max. :117383
## NA's :5178 NA's :6106 NA's :6103 NA's :5357
## eduwgt agewgt tenwgt ernus
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 7 1st Qu.:26 1st Qu.: 3 1st Qu.: 38
## Median : 9 Median :32 Median : 6 Median : 61
## Mean : 9 Mean :31 Mean : 7 Mean : 73
## 3rd Qu.:10 3rd Qu.:36 3rd Qu.: 9 3rd Qu.: 91
## Max. :21 Max. :65 Max. :30 Max. :1310
## NA's :6190 NA's :6190 NA's :6190 NA's :6522
## country prateus kus_routus entex
## Ghana :3390 Min. :-1578 Min. : 0 Min. :0.0000
## Kenya :3240 1st Qu.: 0 1st Qu.: 0 1st Qu.:0.0000
## Nigeria : 700 Median : 0 Median : 0 Median :0.0000
## South Africa: 404 Mean : 1 Mean : 2 Mean :0.0088
## Tanzania :2625 3rd Qu.: 1 3rd Qu.: 1 3rd Qu.:0.0000
## Max. : 633 Max. :191 Max. :1.0000
## NA's :5580 NA's :5274
## exitex industry labor
## Min. :0.0000 food and baking :2265 Min. : 1
## 1st Qu.:0.0000 furniture :1820 1st Qu.: 8
## Median :0.0000 garment :1741 Median : 25
## Mean :0.0079 metals and machinery:3084 Mean : 125
## 3rd Qu.:0.0000 textile : 648 3rd Qu.: 81
## Max. :1.0000 wood : 781 Max. :13000
## NA's : 20 NA's :4888
## output capital materials
## Min. :8.70e+01 Min. :2.30e+01 Min. :3.00e+00
## 1st Qu.:1.87e+04 1st Qu.:3.74e+03 1st Qu.:8.71e+03
## Median :1.15e+05 Median :9.74e+04 Median :5.29e+04
## Mean :4.09e+06 Mean :3.24e+06 Mean :2.38e+06
## 3rd Qu.:1.02e+06 3rd Qu.:8.06e+05 3rd Qu.:4.78e+05
## Max. :5.15e+08 Max. :5.70e+08 Max. :3.84e+08
## NA's :5149 NA's :5120 NA's :5277
## indirect.costs
## Min. : 8
## 1st Qu.: 1280
## Median : 10527
## Mean : 458566
## 3rd Qu.: 98569
## Max. :68251868
## NA's :5226
hist(log(df$output)) ## plot the histogram of the logoutput##
plot(x = log(df$labor), y = log(df$output)) ## scatter plot of log(output) and log(labor)
plot(x = log(df$capital), y = log(df$output)) ## scatter plot of log(output) and log(capital)
plot(x = log(df$materials), y = log(df$output)) ## scatter plot of log(output) and log(materials)
output.histogram <- ggplot(df, aes(x = log(output), fill = country)) + geom_histogram() +
facet_grid(country ~ industry) + theme_minimal() + scale_fill_brewer(type = "qual",
guide = FALSE)
output.histogram ## the histogram by country and indsutry
(1) Are there any strange patterns or other obvious problems with the data?
incomplete (no observation for textile and wood industries). Relationship between logoutput and logcapital,logoutput and loglabor are NOT that linear for Ghana and Nigeria.(2) Does it appear that Cobb-Douglas will be a good approximation for these firm's production functions ? why or why not?
scatter.capital <- ggplot(df, aes(x = log(capital), y = log(output), color = country)) +
geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual",
guide = FALSE)
scatter.capital
scatter.labor <- ggplot(df, aes(x = log(labor), y = log(output), color = country)) +
geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual",
guide = FALSE)
scatter.labor
scatter.materials <- ggplot(df, aes(x = log(materials), y = log(output), color = country)) +
geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual",
guide = FALSE)
scatter.materials
scatter.indirect.costs <- ggplot(df, aes(x = log(indirect.costs), y = log(output),
color = country)) + geom_point() + facet_grid(country ~ industry) + theme_bw() +
scale_color_brewer(type = "qual", guide = FALSE)
scatter.indirect.costs
** The 3-d plot confirms the general positive linear relationship between “log labor” and “log output”, “log capital” and “log output” **
This 3-d plot also demonstrates obvious linear relation between “log output” and the “log inputs”.
rgl.spheres(log(df$labor), log(df$capital), log(df$output), color = "grey",
radius = 0.05, specular = "white")
axes3d(c("x", "y", "z"))
title3d("", "", "log labor", "log capital", "log output")
grid3d(c("x", "y", "z"))
From the OLS, whe have following resutls
(we installed the ClusterFunctions.R to calculate the heterosckedasticity robust s.d.)
prod.ols <- plm(log(output) ~ log(capital) + log(labor) + log(materials), data = df,
model = "pooling", index = c("firm", "year"))
cl.plm(df, prod.ols, df$firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.84128 0.04931 37.3 <2e-16 ***
## log(capital) 0.08649 0.00576 15.0 <2e-16 ***
## log(labor) 0.21919 0.01159 18.9 <2e-16 ***
## log(materials) 0.75205 0.01007 74.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The return to scale for the production function is 1.06. This shows almost **constant* return of scale.*
We also observe that there is a clear increasing trend of output (positively significant year dummies); and the industries dummies are not statistically significant, excpet for the negative “textile industry dummy”.“
prod.indyear <- plm(log(output) ~ log(capital) + log(labor) + log(materials) +
industry + as.factor(year), data = df, model = "pooling", index = c("firm",
"year")) ## regression including industry and year dummies
cl.plm(df, prod.indyear, df$firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.80807 0.07159 25.25 < 2e-16 ***
## log(capital) 0.08669 0.00572 15.16 < 2e-16 ***
## log(labor) 0.22834 0.01343 17.00 < 2e-16 ***
## log(materials) 0.74506 0.01125 66.26 < 2e-16 ***
## industryfurniture -0.03341 0.02778 -1.20 0.22917
## industrygarment 0.00559 0.02717 0.21 0.83710
## industrymetals and machinery 0.01836 0.02066 0.89 0.37426
## industrytextile -0.13178 0.03402 -3.87 0.00011 ***
## industrywood 0.02545 0.03642 0.70 0.48467
## as.factor(year)1992 0.02694 0.03871 0.70 0.48643
## as.factor(year)1993 0.01228 0.04029 0.30 0.76049
## as.factor(year)1994 0.11514 0.04495 2.56 0.01046 *
## as.factor(year)1995 0.05481 0.03921 1.40 0.16227
## as.factor(year)1996 0.01640 0.04194 0.39 0.69569
## as.factor(year)1997 0.09882 0.03721 2.66 0.00793 **
## as.factor(year)1998 0.12022 0.03516 3.42 0.00063 ***
## as.factor(year)1999 0.06475 0.03569 1.81 0.06975 .
## as.factor(year)2000 0.12641 0.03962 3.19 0.00143 **
## as.factor(year)2001 0.17281 0.04712 3.67 0.00025 ***
## as.factor(year)2002 0.15764 0.04301 3.67 0.00025 ***
## as.factor(year)2003 0.16231 0.06052 2.68 0.00735 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prod.industry <- plm(log(output) ~ (log(capital) + log(labor) + log(materials)) *
industry + as.factor(year), data = df, model = "pooling", index = c("firm",
"year"))
cl.plm(df, prod.industry, df$firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value
## (Intercept) 2.04844 0.19778 10.36
## log(capital) 0.08101 0.01054 7.69
## log(labor) 0.30347 0.04555 6.66
## log(materials) 0.70894 0.02840 24.96
## industryfurniture -0.67985 0.21258 -3.20
## industrygarment 0.25828 0.25428 1.02
## industrymetals and machinery -0.45267 0.21063 -2.15
## industrytextile 0.04843 0.39404 0.12
## industrywood 0.00372 0.25500 0.01
## as.factor(year)1992 0.02692 0.03761 0.72
## as.factor(year)1993 0.02431 0.03940 0.62
## as.factor(year)1994 0.12678 0.04330 2.93
## as.factor(year)1995 0.06689 0.03826 1.75
## as.factor(year)1996 0.02113 0.04091 0.52
## as.factor(year)1997 0.08191 0.03622 2.26
## as.factor(year)1998 0.11471 0.03414 3.36
## as.factor(year)1999 0.07384 0.03474 2.13
## as.factor(year)2000 0.12925 0.03755 3.44
## as.factor(year)2001 0.17004 0.04506 3.77
## as.factor(year)2002 0.15283 0.04180 3.66
## as.factor(year)2003 0.14251 0.05852 2.44
## log(capital):industryfurniture 0.00243 0.01488 0.16
## log(capital):industrygarment 0.02749 0.02007 1.37
## log(capital):industrymetals and machinery -0.00578 0.01326 -0.44
## log(capital):industrytextile 0.02598 0.04007 0.65
## log(capital):industrywood 0.00119 0.01975 0.06
## log(labor):industryfurniture -0.11669 0.05278 -2.21
## log(labor):industrygarment -0.12899 0.05096 -2.53
## log(labor):industrymetals and machinery -0.10381 0.04933 -2.10
## log(labor):industrytextile -0.03428 0.07574 -0.45
## log(labor):industrywood -0.02645 0.05937 -0.45
## log(materials):industryfurniture 0.09160 0.03086 2.97
## log(materials):industrygarment -0.02544 0.04326 -0.59
## log(materials):industrymetals and machinery 0.07557 0.03210 2.35
## log(materials):industrytextile -0.03375 0.08335 -0.40
## log(materials):industrywood 0.00360 0.03579 0.10
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## log(capital) 1.8e-14 ***
## log(labor) 3.0e-11 ***
## log(materials) < 2e-16 ***
## industryfurniture 0.00139 **
## industrygarment 0.30982
## industrymetals and machinery 0.03167 *
## industrytextile 0.90219
## industrywood 0.98837
## as.factor(year)1992 0.47415
## as.factor(year)1993 0.53729
## as.factor(year)1994 0.00343 **
## as.factor(year)1995 0.08050 .
## as.factor(year)1996 0.60550
## as.factor(year)1997 0.02376 *
## as.factor(year)1998 0.00079 ***
## as.factor(year)1999 0.03361 *
## as.factor(year)2000 0.00058 ***
## as.factor(year)2001 0.00016 ***
## as.factor(year)2002 0.00026 ***
## as.factor(year)2003 0.01492 *
## log(capital):industryfurniture 0.87026
## log(capital):industrygarment 0.17078
## log(capital):industrymetals and machinery 0.66319
## log(capital):industrytextile 0.51682
## log(capital):industrywood 0.95189
## log(labor):industryfurniture 0.02709 *
## log(labor):industrygarment 0.01140 *
## log(labor):industrymetals and machinery 0.03540 *
## log(labor):industrytextile 0.65089
## log(labor):industrywood 0.65598
## log(materials):industryfurniture 0.00301 **
## log(materials):industrygarment 0.55656
## log(materials):industrymetals and machinery 0.01858 *
## log(materials):industrytextile 0.68555
## log(materials):industrywood 0.91997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
By including the country dummies, it shows that Nigeria, South Africa have higher output level, and Tanzania has lower output level (the base is Ghana), while the estimated return to scale is consistent with previous results.
prod.country1 <- plm(log(output) ~ log(capital) + log(labor) + log(materials) +
industry + country + as.factor(year), data = df, model = "pooling", index = c("firm",
"year")) ## regression including country dummy
cl.plm(df, prod.country1, df$firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.01810 0.08460 23.85 < 2e-16 ***
## log(capital) 0.08633 0.00552 15.63 < 2e-16 ***
## log(labor) 0.23461 0.01416 16.57 < 2e-16 ***
## log(materials) 0.72636 0.01212 59.91 < 2e-16 ***
## industryfurniture -0.07251 0.02929 -2.48 0.01333 *
## industrygarment -0.05417 0.02958 -1.83 0.06708 .
## industrymetals and machinery -0.04321 0.02322 -1.86 0.06282 .
## industrytextile -0.12688 0.03526 -3.60 0.00032 ***
## industrywood 0.01884 0.03648 0.52 0.60557
## countryKenya -0.04032 0.02313 -1.74 0.08133 .
## countryNigeria 0.08735 0.02883 3.03 0.00246 **
## countrySouth Africa 0.36701 0.03274 11.21 < 2e-16 ***
## countryTanzania -0.10227 0.01758 -5.82 6.3e-09 ***
## as.factor(year)1992 0.08112 0.04029 2.01 0.04411 *
## as.factor(year)1993 0.06789 0.04043 1.68 0.09317 .
## as.factor(year)1994 0.15277 0.04420 3.46 0.00055 ***
## as.factor(year)1995 0.09884 0.03969 2.49 0.01280 *
## as.factor(year)1996 0.03003 0.04227 0.71 0.47744
## as.factor(year)1997 0.03639 0.03731 0.98 0.32953
## as.factor(year)1998 0.08615 0.03638 2.37 0.01794 *
## as.factor(year)1999 0.10205 0.03736 2.73 0.00633 **
## as.factor(year)2000 0.15002 0.04038 3.72 0.00021 ***
## as.factor(year)2001 0.15207 0.04737 3.21 0.00133 **
## as.factor(year)2002 0.13605 0.04471 3.04 0.00235 **
## as.factor(year)2003 0.11353 0.06264 1.81 0.06998 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
If we allow the coefficients to be different among countries, the results shows that the return to scale does NOT vary significantly among different industries. Using Ghana as benchmark base, Nigeria and Tanazania has lower return to capital; South Africa and Tanazaniahas lower return to materials.
prod.country2 <- plm(log(output) ~ (log(capital) + log(labor) + log(materials)) *
country + industry + as.factor(year), data = df, model = "pooling", index = c("firm",
"year")) ## regression allowing coefficients varying with country
cl.plm(df, prod.country2, df$firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.81362 0.11321 16.02 < 2e-16
## log(capital) 0.10454 0.00814 12.84 < 2e-16
## log(labor) 0.24201 0.01895 12.77 < 2e-16
## log(materials) 0.72733 0.01756 41.43 < 2e-16
## countryKenya 0.32776 0.18368 1.78 0.07442
## countryNigeria 0.60194 0.19257 3.13 0.00178
## countrySouth Africa 1.60332 0.29213 5.49 4.3e-08
## countryTanzania 0.03695 0.11500 0.32 0.74798
## industryfurniture -0.07657 0.02815 -2.72 0.00654
## industrygarment -0.05191 0.02802 -1.85 0.06398
## industrymetals and machinery -0.04309 0.02199 -1.96 0.05012
## industrytextile -0.10769 0.03470 -3.10 0.00192
## industrywood -0.00334 0.03498 -0.10 0.92389
## as.factor(year)1992 0.07616 0.04049 1.88 0.06003
## as.factor(year)1993 0.06196 0.04104 1.51 0.13116
## as.factor(year)1994 0.14075 0.04476 3.14 0.00167
## as.factor(year)1995 0.09720 0.03979 2.44 0.01460
## as.factor(year)1996 0.01236 0.04184 0.30 0.76771
## as.factor(year)1997 0.02528 0.03771 0.67 0.50265
## as.factor(year)1998 0.06953 0.03700 1.88 0.06031
## as.factor(year)1999 0.07940 0.03783 2.10 0.03589
## as.factor(year)2000 0.12403 0.04039 3.07 0.00215
## as.factor(year)2001 0.12793 0.04704 2.72 0.00656
## as.factor(year)2002 0.11418 0.04459 2.56 0.01047
## as.factor(year)2003 0.08302 0.06239 1.33 0.18335
## log(capital):countryKenya -0.02219 0.02269 -0.98 0.32824
## log(capital):countryNigeria -0.04490 0.01350 -3.33 0.00089
## log(capital):countrySouth Africa -0.02276 0.01685 -1.35 0.17670
## log(capital):countryTanzania -0.05144 0.01127 -4.56 5.1e-06
## log(labor):countryKenya 0.00452 0.03250 0.14 0.88951
## log(labor):countryNigeria 0.01642 0.04630 0.35 0.72280
## log(labor):countrySouth Africa 0.06353 0.04564 1.39 0.16398
## log(labor):countryTanzania -0.04614 0.02333 -1.98 0.04807
## log(materials):countryKenya -0.01343 0.03715 -0.36 0.71768
## log(materials):countryNigeria -0.00569 0.03511 -0.16 0.87121
## log(materials):countrySouth Africa -0.08886 0.03178 -2.80 0.00519
## log(materials):countryTanzania 0.05136 0.01945 2.64 0.00829
##
## (Intercept) ***
## log(capital) ***
## log(labor) ***
## log(materials) ***
## countryKenya .
## countryNigeria **
## countrySouth Africa ***
## countryTanzania
## industryfurniture **
## industrygarment .
## industrymetals and machinery .
## industrytextile **
## industrywood
## as.factor(year)1992 .
## as.factor(year)1993
## as.factor(year)1994 **
## as.factor(year)1995 *
## as.factor(year)1996
## as.factor(year)1997
## as.factor(year)1998 .
## as.factor(year)1999 *
## as.factor(year)2000 **
## as.factor(year)2001 **
## as.factor(year)2002 *
## as.factor(year)2003
## log(capital):countryKenya
## log(capital):countryNigeria ***
## log(capital):countrySouth Africa
## log(capital):countryTanzania ***
## log(labor):countryKenya
## log(labor):countryNigeria
## log(labor):countrySouth Africa
## log(labor):countryTanzania *
## log(materials):countryKenya
## log(materials):countryNigeria
## log(materials):countrySouth Africa **
## log(materials):countryTanzania **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The time-invariant fixed effects (firm, industry and country) are controlled by runing following "fixed-effect” regression. The “within model” was used.
The estimated return to scale has some changes in magnitude:
The return to scale of the production function is 0.8579.
This shows a decreasing return to scale.
prod.fe <- plm(log(output) ~ log(capital) + log(labor) + log(materials) + industry +
as.factor(year), data = df, model = "within", index = c("firm", "year"))
cl.plm(df, prod.fe, df$firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## log(capital) 0.0302 0.0133 2.27 0.02338 *
## log(labor) 0.1870 0.0191 9.79 < 2e-16 ***
## log(materials) 0.6407 0.0196 32.76 < 2e-16 ***
## as.factor(year)1992 0.1469 0.0394 3.73 0.00019 ***
## as.factor(year)1993 0.1526 0.0400 3.82 0.00014 ***
## as.factor(year)1994 0.2132 0.0418 5.10 3.6e-07 ***
## as.factor(year)1995 0.1601 0.0377 4.25 2.2e-05 ***
## as.factor(year)1996 0.0763 0.0401 1.90 0.05727 .
## as.factor(year)1997 0.0613 0.0372 1.65 0.09980 .
## as.factor(year)1998 0.0984 0.0373 2.64 0.00840 **
## as.factor(year)1999 0.1257 0.0380 3.30 0.00096 ***
## as.factor(year)2000 0.1415 0.0382 3.71 0.00021 ***
## as.factor(year)2001 0.1273 0.0414 3.08 0.00210 **
## as.factor(year)2002 0.1112 0.0410 2.72 0.00664 **
## as.factor(year)2003 0.0792 0.0487 1.63 0.10418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
In a summary, we have following results regarding OLS and fixed effect regressions
there is a general positive effect of year on output level, while the industry effect is NOT that significant.
There is no significant difference among return to capital among industries. But industries “foruniture”,“garment” and “metals and machinery” has marginally lower return to labor when compared with “food and baking”. There is a significant country effect on output.
“South Africa and Nigiria” have higher output level than other countries under same condition. Using Ghana as benchmark base, “Nigeria” and “Tanazania” has lower return to capital; “South Africa” and “Tanazania” has lower return to materials.
df <- df[order(df$firm, df$year), ]
df$invest <- c(df$capital[2:nrow(df)] - df$capital[1:(nrow(df) - 1)], NA)
df$invest[c(df$firm[2:nrow(df)] != df$firm[1:(nrow(df) - 1)], FALSE)] <- NA
notmissing <- !is.na(df$capital) & !is.na(df$invest) ##remove the missing values
df.nm <- subset(df, notmissing)
df.nm <- df.nm[order(df.nm$firm, df.nm$year), ]
Conduct the first step regression (OLS) to obtain consistent estimation for betaL and betaM
The estimated betaL is “0.197” (return to labor);
the estimated betaM is “0.750” (return to materials)
op1 <- lm(log(output) ~ log(labor) + log(materials) + poly(cbind(log(capital),
invest), degree = 4), data = df.nm)
summary(op1)
##
## Call:
## lm(formula = log(output) ~ log(labor) + log(materials) + poly(cbind(log(capital),
## invest), degree = 4), data = df.nm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.315 -0.255 -0.039 0.207 5.254
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 2.94e+00 9.20e-02
## log(labor) 1.97e-01 9.86e-03
## log(materials) 7.50e-01 5.62e-03
## poly(cbind(log(capital), invest), degree = 4)1.0 1.48e+01 4.24e+00
## poly(cbind(log(capital), invest), degree = 4)2.0 3.59e+00 1.78e+00
## poly(cbind(log(capital), invest), degree = 4)3.0 -2.05e+00 6.29e-01
## poly(cbind(log(capital), invest), degree = 4)4.0 6.47e-01 5.09e-01
## poly(cbind(log(capital), invest), degree = 4)0.1 7.47e+01 9.17e+01
## poly(cbind(log(capital), invest), degree = 4)1.1 -4.40e+03 5.06e+03
## poly(cbind(log(capital), invest), degree = 4)2.1 1.97e+03 1.94e+03
## poly(cbind(log(capital), invest), degree = 4)3.1 -5.16e+02 3.99e+02
## poly(cbind(log(capital), invest), degree = 4)0.2 -1.63e+01 4.35e+01
## poly(cbind(log(capital), invest), degree = 4)1.2 9.36e+02 1.96e+03
## poly(cbind(log(capital), invest), degree = 4)2.2 -2.74e+02 4.66e+02
## poly(cbind(log(capital), invest), degree = 4)0.3 1.71e+01 1.02e+01
## poly(cbind(log(capital), invest), degree = 4)1.3 -4.59e+02 2.76e+02
## poly(cbind(log(capital), invest), degree = 4)0.4 2.28e+00 1.08e+00
## t value Pr(>|t|)
## (Intercept) 31.98 < 2e-16 ***
## log(labor) 19.99 < 2e-16 ***
## log(materials) 133.53 < 2e-16 ***
## poly(cbind(log(capital), invest), degree = 4)1.0 3.50 0.00048 ***
## poly(cbind(log(capital), invest), degree = 4)2.0 2.02 0.04353 *
## poly(cbind(log(capital), invest), degree = 4)3.0 -3.26 0.00112 **
## poly(cbind(log(capital), invest), degree = 4)4.0 1.27 0.20369
## poly(cbind(log(capital), invest), degree = 4)0.1 0.81 0.41540
## poly(cbind(log(capital), invest), degree = 4)1.1 -0.87 0.38486
## poly(cbind(log(capital), invest), degree = 4)2.1 1.01 0.31091
## poly(cbind(log(capital), invest), degree = 4)3.1 -1.29 0.19560
## poly(cbind(log(capital), invest), degree = 4)0.2 -0.38 0.70694
## poly(cbind(log(capital), invest), degree = 4)1.2 0.48 0.63235
## poly(cbind(log(capital), invest), degree = 4)2.2 -0.59 0.55760
## poly(cbind(log(capital), invest), degree = 4)0.3 1.68 0.09369 .
## poly(cbind(log(capital), invest), degree = 4)1.3 -1.66 0.09645 .
## poly(cbind(log(capital), invest), degree = 4)0.4 2.12 0.03433 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.445 on 3439 degrees of freedom
## (248 observations deleted due to missingness)
## Multiple R-squared: 0.97, Adjusted R-squared: 0.97
## F-statistic: 6.93e+03 on 16 and 3439 DF, p-value: <2e-16
The OP estimates is smaller than OLS. This is consistent with expectation. According to Aguirregabiria (2012), the OLS creates upward bias to betaM and betaL. Second, the OP estimates is larger than fixed effect estimation. This is also consistent with expectation. This is because the fixed effect largely reduces the variation of variables and it introduces significant measurement error problem, thus creating smaller estimates of betaL and betaM. .
b1 <- op1$coefficients[c("log(labor)", "log(materials)")]
xb1 <- log(as.matrix(df.nm[, c("labor", "materials")])) %*% b1
fhat <- predict(op1, df.nm) - xb1
## construction of a lag function later used to calculate lag capital and
## fhat
lag <- function(x, i = df.nm$firm, t = df.nm$year) {
if (length(i) != length(x) || length(i) != length(t)) {
stop("Inputs not same length")
}
x.lag <- x[1:(length(x) - 1)]
x.lag[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
x.lag[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
return(c(NA, x.lag))
}
## Create data frame for step 2 regression
df.step2 <- data.frame(lhs = log(df.nm$output) - xb1, year = df.nm$year, firm = df.nm$firm,
k = log(df.nm$capital), fhat = fhat, k.lag = log(lag(df.nm$capital)), f.lag = lag(fhat))
## droping missing data
df.step2 <- subset(df.step2, !apply(df.step2, 1, function(x) any(is.na(x))))
objective <- function(betaK, degree = 4) {
op2 <- lm(I(lhs - betaK * k) ~ poly(I(f.lag - betaK * k.lag), degree), data = df.step2)
return(sum(residuals(op2)^2))
}
fig.df <- data.frame(bk = seq(from = -0.1, to = 0.3, by = 0.005))
fig.df$obj <- sapply(fig.df$bk, objective)
load.fun(ggplot2)
ggplot(data = fig.df, aes(x = bk, y = obj)) + geom_point()
opt.out <- optim(prod.ols$coefficients["log(capital)"], fn = objective, method = "Brent",
lower = -1, upper = 1)
betaK <- opt.out$par
betaK
## [1] 0.1031
Our estimated betaK=0.1031
Our estimated betaL=0.197 and betaM=0.750.
The return to scale is “0.1031+0.197+0.750=1.0501, this number is very close to our OLS estimation (1.057). This shows an almost consistent return to scale.
The OP estimates is a little larger than OLS, because of OLS estimation is biased. Second, the OP estimates is larger than fixed effect estimation. This is consistent with expectation. This is because the fixed effect largely reduces the variation of variables and it introduces significant measurement error problem, thus creating smaller estimates of betaK. .
Conduct the first step regression to obtain the consistent estimation of betaL
betaL=0.209
nomi <- !is.na(df$capital) & !is.na(df$materials) ##remove the missing values
df.mater <- subset(df, nomi)
df.mater <- df.mater[order(df.mater$firm, df.mater$year), ]
op3 <- lm(log(output) ~ log(labor) + poly(cbind(log(capital), log(materials)),
degree = 4), data = df.mater)
op3
##
## Call:
## lm(formula = log(output) ~ log(labor) + poly(cbind(log(capital),
## log(materials)), degree = 4), data = df.mater)
##
## Coefficients:
## (Intercept)
## 11.519
## log(labor)
## 0.209
## poly(cbind(log(capital), log(materials)), degree = 4)1.0
## 39.524
## poly(cbind(log(capital), log(materials)), degree = 4)2.0
## 17.642
## poly(cbind(log(capital), log(materials)), degree = 4)3.0
## 9.268
## poly(cbind(log(capital), log(materials)), degree = 4)4.0
## 1.046
## poly(cbind(log(capital), log(materials)), degree = 4)0.1
## 131.393
## poly(cbind(log(capital), log(materials)), degree = 4)1.1
## -2044.675
## poly(cbind(log(capital), log(materials)), degree = 4)2.1
## -1285.827
## poly(cbind(log(capital), log(materials)), degree = 4)3.1
## -175.867
## poly(cbind(log(capital), log(materials)), degree = 4)0.2
## 24.518
## poly(cbind(log(capital), log(materials)), degree = 4)1.2
## 994.363
## poly(cbind(log(capital), log(materials)), degree = 4)2.2
## 603.850
## poly(cbind(log(capital), log(materials)), degree = 4)0.3
## -8.984
## poly(cbind(log(capital), log(materials)), degree = 4)1.3
## -735.279
## poly(cbind(log(capital), log(materials)), degree = 4)0.4
## 3.957
bl <- op3$coefficients[c("log(labor)")]
xbl <- log(as.matrix(df.mater[, c("labor")])) %*% bl
fbar <- predict(op3, df.mater) - xbl
## create another lag function to get lag capital, materials and fbar
lag1 <- function(x, i = df.mater$firm, t = df.mater$year) {
if (length(i) != length(x) || length(i) != length(t)) {
stop("Inputs not same length")
}
x.lag1 <- x[1:(length(x) - 1)]
x.lag1[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
x.lag1[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
return(c(NA, x.lag1))
}
## create the dataframe for second stage regression to retrieve betaK and
## betaM
df.mat2 <- data.frame(lhs = log(df.mater$output) - xbl, k = log(df.mater$capital),
M = log(df.mater$materials), fbar = fbar, k.lag = log(lag1(df.mater$capital)),
f.lag = lag1(fbar), M.lag = log(lag1(df.mater$materials)))
df.mat2 <- subset(df.mat2, !apply(df.mat2, 1, function(x) any(is.na(x))))
. .
objective2 <- function(x, degree = 4) {
betaK = x[1]
betaM = x[2]
op2 <- lm(I(lhs - betaK * k - betaM * M) ~ poly(I(f.lag - betaK * k.lag -
betaM * M.lag), degree), data = df.mat2)
return(sum(residuals(op2)^2))
}
# I randomly draw thetaK and thetaM from uniform distribution [-1,1] and
# calculate the objective.
n = 1000
x = runif(n, min = -1, max = 1)
y = runif(n, min = -1, max = 1)
var = cbind(x, y)
z = rep(NA, n)
for (i in 1:n) {
z[i] = objective2(var[i, ])
}
plot3d(x, y, z, type = "p", col = 2)
opt.out2 <- nlm(objective2, c(0.08648541, 0.7520484))
opt.out2
## $minimum
## [1] 627.8
##
## $estimate
## [1] 0.1465 0.6609
##
## $gradient
## [1] 0.0005689 -0.0001473
##
## $code
## [1] 1
##
## $iterations
## [1] 8
The estimated betaK=0.1465 and betaM=0.6609.
The return to scale is "0.1465+0.209+0.6609=1.0164”, showing almost constant return to scale.