This is an R Markdown format used for publishing markdown documents to GitHub. When you click the Knit button all R code chunks are run and a markdown file (.md) suitable for publishing to GitHub is generated.
You can include R code in the document as follows:
library(readr)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(frontier)
library(plm)
library(cattonum)
cl.plm <- function(dat,fm, cluster)
{
require(sandwich, quietly = TRUE)
require(lmtest, quietly = TRUE)
ef <- estfun.plm(fm)
if (length(cluster)!=nrow(ef)) {
cluster <- cluster[as.numeric(rownames(ef))]
}
M <- length(unique(cluster))
N <- length(cluster)
K <- ncol(ef)
dfc <- (M/(M-1))*((N-1)/(N-K))
uj <- apply(ef,2, function(x) tapply(x, cluster, sum))
bread <- bread.plm(fm)
meat <- crossprod(uj)
vcovCL <- dfc*bread %*% meat %*% bread
coeftest(fm, vcovCL)
}
estfun.plm <- function(x, ...) {
model <- x$args$model
formula <- formula(x)
if (length(formula)[2]>1) { ## 2SLS
data <- model.frame(x)
effect <- x$args$effect
X <- model.matrix(formula, data, rhs = 1, model = model,
effect = effect, theta = theta)
if (length(formula)[2] == 2)
W <- model.matrix(formula, data, rhs = 2, model = model,
effect = effect, theta = theta)
else
W <- model.matrix(formula, data, rhs = c(2,3), model = model,
effect = effect, theta = theta)
Xhat <- lm(X ? W)$fit
return(x$residuals*Xhat)
} else { ## OLS
X <- model.matrix(x, model = model)
return( x$residuals * X)
}
}
bread.plm <- function(x, ...) {
model <- x$args$model
formula <- formula(x)
if (length(formula)[2]>1) { ## 2SLS
data <- model.frame(x)
effect <- x$args$effect
X <- model.matrix(formula, data, rhs = 1, model = model,
effect = effect, theta = theta)
if (length(formula)[2] == 2)
W <- model.matrix(formula, data, rhs = 2, model = model,
effect = effect, theta = theta)
else
W <- model.matrix(formula, data, rhs = c(2,3), model = model,
effect = effect, theta = theta)
Xhat <- lm(X ? W)$fit
return( solve(crossprod(Xhat)) )
} else { ## OLS
X <- model.matrix(x, model = model)
return( solve(crossprod(X)) )
}
}
data <- read_csv(file="./raw_data.csv", locale = locale(encoding = "UTF-8"))
data
data$ind <- as.factor(data$業種)
data$year <- as.factor(format(data$会計期間終了日, format = "%Y"))
df <- data[,c("EDINETコード", "year", "ind", "従業員数", "売上高", "資本金", "有形固定資産")]
df
names(df) <- c("firm_id", "year", "ind", "employee", "sales", "capital", "fixed_assets")
df[df==0] <- NA
df <- df[complete.cases(df),]
df
prod.ols <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee),
data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ols, df$firm_id)
df <- df %>% catto_label(ind)
df$ind <- as.factor(df$ind)
prod.ind <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee) + ind + year,
data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ind, df$firm_id)
prod.ind <- plm(log(sales) ~ (log(capital) + log(fixed_assets) + log(employee)) * ind + year,
data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ind, df$firm_id)
prod.fe <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee) + ind + year,
data = df, model = "within", index = index("firm_id", "year"))
cl.plm(df, prod.fe, df$firm_id)
df <- df[order(df$firm_id, df$year), ]
df$invest <- c(df$capital[2:nrow(df)] - df$capital[1:(nrow(df) - 1)], NA)
df$invest[c(df$firm_id[2:nrow(df)] != df$firm_id[1:(nrow(df) - 1)], FALSE)] <- NA
notmissing <- !is.na(df$capital) & !is.na(df$invest) ##remove the missing values
df.nm <- subset(df, notmissing)
df.nm <- df.nm[order(df.nm$firm_id, df.nm$year), ]
op1 <- lm(log(sales) ~ poly(cbind(log(capital), invest), degree = 4)+log(fixed_assets)+log(employee), data = df.nm)
summary(op1)
op1$coefficients
b1 <- op1$coefficients[c("log(employee)", "log(fixed_assets)")]
xb1 <- log(as.matrix(df.nm[, c("employee", "fixed_assets")])) %*% b1
fhat <- predict(op1, df.nm) - xb1
## construction of a lag function later used to calculate lag capital and
## fhat
lag <- function(x, i = df.nm$firm_id, t = df.nm$year) {
if (length(i) != length(x) || length(i) != length(t)) {
stop("Inputs not same length")
}
x.lag <- x[1:(length(x) - 1)]
x.lag[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
x.lag[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
return(c(NA, x.lag))
}
## Create data frame for step 2 regression
df.step2 <- data.frame(lhs = log(df.nm$sales) - xb1,
year = df.nm$year, firm_id = df.nm$firm_id,
k = log(df.nm$capital), fhat = fhat,
k.lag = log(lag(df.nm$capital)),
f.lag = lag(fhat))
## droping missing data
df.step2 <- subset(df.step2, !apply(df.step2, 1, function(x) any(is.na(x))))
objective <- function(betaK, degree = 4) {
op2 <- lm(I(lhs - betaK * k) ~ poly(I(f.lag - betaK * k.lag), degree), data = df.step2)
return(sum(residuals(op2)^2))
}
fig.df <- data.frame(bk = seq(from = -0.1, to = 0.3, by = 0.005))
fig.df$obj <- sapply(fig.df$bk, objective)
opt.out <- optim(prod.ols$coefficients["log(capital)"], fn = objective, method = "Brent",
lower = -1, upper = 1)
betaK <- opt.out$par
betaK