productivity

GitHub Documents

This is an R Markdown format used for publishing markdown documents to GitHub. When you click the Knit button all R code chunks are run and a markdown file (.md) suitable for publishing to GitHub is generated.

Including Code

You can include R code in the document as follows:

library(readr)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(frontier)
library(plm)
library(cattonum)

cl.plm   <- function(dat,fm, cluster)
{
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  ef <- estfun.plm(fm)
  if (length(cluster)!=nrow(ef)) {
    cluster <- cluster[as.numeric(rownames(ef))]
  }
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- ncol(ef)
  dfc <- (M/(M-1))*((N-1)/(N-K))

  uj  <- apply(ef,2, function(x) tapply(x, cluster, sum))
  bread <- bread.plm(fm)
  meat <- crossprod(uj)
  vcovCL <- dfc*bread %*% meat %*% bread
  coeftest(fm, vcovCL)
}

estfun.plm <- function(x, ...) {
  model <- x$args$model
  formula <- formula(x)
  if (length(formula)[2]>1) { ## 2SLS
    data <- model.frame(x)
    effect <- x$args$effect
    X <- model.matrix(formula, data, rhs = 1, model = model,
                      effect = effect, theta = theta)
    if (length(formula)[2] == 2)
      W <- model.matrix(formula, data, rhs = 2, model = model,
                        effect = effect, theta = theta)
    else
      W <- model.matrix(formula, data, rhs = c(2,3), model = model,
                        effect = effect, theta = theta)
    Xhat <- lm(X ? W)$fit
    return(x$residuals*Xhat)
  } else { ## OLS
    X <- model.matrix(x, model = model)
    return( x$residuals * X)
  }
}

bread.plm <- function(x, ...) {
  model <- x$args$model
  formula <- formula(x)
  if (length(formula)[2]>1) { ## 2SLS
    data <- model.frame(x)
    effect <- x$args$effect
    X <- model.matrix(formula, data, rhs = 1, model = model,
                      effect = effect, theta = theta)
    if (length(formula)[2] == 2)
      W <- model.matrix(formula, data, rhs = 2, model = model,
                        effect = effect, theta = theta)
    else
      W <- model.matrix(formula, data, rhs = c(2,3), model = model,
                        effect = effect, theta = theta)
    Xhat <- lm(X ? W)$fit
    return( solve(crossprod(Xhat)) )
  } else { ## OLS
    X <- model.matrix(x, model = model)
    return( solve(crossprod(X)) )
  }
}

data <- read_csv(file="./raw_data.csv", locale = locale(encoding = "UTF-8"))
data

data$ind <- as.factor(data$業種)
data$year <- as.factor(format(data$会計期間終了日, format = "%Y"))
df <- data[,c("EDINETコード", "year", "ind", "従業員数", "売上高", "資本金", "有形固定資産")]
df
names(df) <- c("firm_id", "year", "ind", "employee", "sales", "capital", "fixed_assets")
df[df==0] <- NA
df <- df[complete.cases(df),]
df

prod.ols <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee),
           data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ols, df$firm_id)

df <- df %>% catto_label(ind)
df$ind <- as.factor(df$ind)
prod.ind <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee) + ind + year,
                data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ind, df$firm_id)

prod.ind <- plm(log(sales) ~ (log(capital) + log(fixed_assets) + log(employee)) * ind + year,
                data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ind, df$firm_id)

prod.fe <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee) + ind + year,
                data = df, model = "within", index = index("firm_id", "year"))
cl.plm(df, prod.fe, df$firm_id)


df <- df[order(df$firm_id, df$year), ]
df$invest <- c(df$capital[2:nrow(df)] - df$capital[1:(nrow(df) - 1)], NA)
df$invest[c(df$firm_id[2:nrow(df)] != df$firm_id[1:(nrow(df) - 1)], FALSE)] <- NA
notmissing <- !is.na(df$capital) & !is.na(df$invest)  ##remove the missing values
df.nm <- subset(df, notmissing)
df.nm <- df.nm[order(df.nm$firm_id, df.nm$year), ]

op1 <- lm(log(sales) ~ poly(cbind(log(capital), invest), degree = 4)+log(fixed_assets)+log(employee), data = df.nm)
summary(op1)

op1$coefficients
b1 <- op1$coefficients[c("log(employee)", "log(fixed_assets)")]
xb1 <- log(as.matrix(df.nm[, c("employee", "fixed_assets")])) %*% b1
fhat <- predict(op1, df.nm) - xb1

## construction of a lag function later used to calculate lag capital and
## fhat
lag <- function(x, i = df.nm$firm_id, t = df.nm$year) {
  if (length(i) != length(x) || length(i) != length(t)) {
    stop("Inputs not same length")
  }
  x.lag <- x[1:(length(x) - 1)]
  x.lag[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
  x.lag[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
  return(c(NA, x.lag))
}

## Create data frame for step 2 regression
df.step2 <- data.frame(lhs = log(df.nm$sales) - xb1,
                       year = df.nm$year, firm_id = df.nm$firm_id,
                       k = log(df.nm$capital), fhat = fhat,
                       k.lag = log(lag(df.nm$capital)),
                       f.lag = lag(fhat))

## droping missing data
df.step2 <- subset(df.step2, !apply(df.step2, 1, function(x) any(is.na(x))))

objective <- function(betaK, degree = 4) {
  op2 <- lm(I(lhs - betaK * k) ~ poly(I(f.lag - betaK * k.lag), degree), data = df.step2)
  return(sum(residuals(op2)^2))
}

fig.df <- data.frame(bk = seq(from = -0.1, to = 0.3, by = 0.005))
fig.df$obj <- sapply(fig.df$bk, objective)

opt.out <- optim(prod.ols$coefficients["log(capital)"], fn = objective, method = "Brent",
                 lower = -1, upper = 1)
betaK <- opt.out$par

betaK