setwd("/Users/koyo/Dropbox/000078_CSKAKEN/01_CSNC")
library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
csnc.all <- read_excel("sho_csnc.xlsx")
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")
#### 統廃合のない学校のみを対象 複式設置校を除外
csnc.taisho_ <- dplyr::filter(csnc.all, taisho.g1 == 1 &
togo == 0 & nonrt == 0 & fuku == 0)
csnc.taisho <- select(csnc.taisho_,(c("taisho", "sid.new",
"nc.g1", "csmean.g1",
"nc.g2", "csmean.g2",
"nc.g3", "csmean.g3",
"nc.g4", "csmean.g4",
"nc.g5", "csmean.g5",
"nc.g6", "csmean.g6"
)))
csnc.taisho$taisho <- as.numeric(csnc.taisho$taisho)
csnc.taisho$sid.new <- as.numeric(csnc.taisho$sid.new)
csnc.taisho$nc.g1 <- as.numeric(csnc.taisho$nc.g1)
csnc.taisho$csmean.g1 <- as.numeric(csnc.taisho$csmean.g1)
csnc.taisho$nc.g2 <- as.numeric(csnc.taisho$nc.g2)
csnc.taisho$csmean.g2 <- as.numeric(csnc.taisho$csmean.g2)
csnc.taisho$nc.g3 <- as.numeric(csnc.taisho$nc.g3)
csnc.taisho$csmean.g3 <- as.numeric(csnc.taisho$csmean.g3)
csnc.taisho$nc.g4 <- as.numeric(csnc.taisho$nc.g4)
csnc.taisho$csmean.g4 <- as.numeric(csnc.taisho$csmean.g4)
csnc.taisho$nc.g5 <- as.numeric(csnc.taisho$nc.g5)
csnc.taisho$csmean.g5 <- as.numeric(csnc.taisho$csmean.g5)
csnc.taisho$nc.g6 <- as.numeric(csnc.taisho$nc.g6)
csnc.taisho$csmean.g6 <- as.numeric(csnc.taisho$csmean.g6)
csnc.nona <- na.omit(csnc.taisho)
colnames(csnc.nona) <- c("taisho", "sid",
"nc.g1", "cs.g1",
"nc.g2", "cs.g2",
"nc.g3", "cs.g3",
"nc.g4", "cs.g4",
"nc.g5", "cs.g5",
"nc.g6", "cs.g6"
)
csnc <- csnc.nona[,2:14]
## 学級規模を中心化する
### 各学年での平均
cs.m.g1 <- mean(csnc$cs.g1)
cs.m.g2 <- mean(csnc$cs.g2)
cs.m.g3 <- mean(csnc$cs.g3)
cs.m.g4 <- mean(csnc$cs.g4)
cs.m.g5 <- mean(csnc$cs.g5)
cs.m.g6 <- mean(csnc$cs.g6)
### 各学年の平均の平均
csm <- matrix(c(cs.m.g1, cs.m.g2, cs.m.g3, cs.m.g4, cs.m.g5, cs.m.g6), nrow=6, ncol=1)
csnc$cs.c.g1 <- csnc$cs.g1 - mean(csm)
csnc$cs.c.g2 <- csnc$cs.g2 - mean(csm)
csnc$cs.c.g3 <- csnc$cs.g3 - mean(csm)
csnc$cs.c.g4 <- csnc$cs.g4 - mean(csm)
csnc$cs.c.g5 <- csnc$cs.g5 - mean(csm)
csnc$cs.c.g6 <- csnc$cs.g6 - mean(csm)
## 学級規模変動差分データ列作成
csnc$cs.d12 <- csnc$cs.g2 - csnc$cs.g1
csnc$cs.d23 <- csnc$cs.g3 - csnc$cs.g2
csnc$cs.d34 <- csnc$cs.g4 - csnc$cs.g3
csnc$cs.d45 <- csnc$cs.g5 - csnc$cs.g4
csnc$cs.d56 <- csnc$cs.g6 - csnc$cs.g5
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/04_NRT/SY2018")
nrt.all <- read.csv("nrt.csv", fileEncoding = "Shift_JIS")
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")
nrt.taisho <- nrt.all %>%
dplyr::select(c("renban", "sho.sid",
"koku.ss.1213", #1-2年生追加
"koku.ss.1314",
"koku.ss.1415",
"koku.ss.1516",
"koku.ss.1617",
"koku.ss.1718" #6-7年生追加
))
colnames(nrt.taisho) <- c("renban", "sid",
"g12.koku",
"g23.koku",
"g34.koku",
"g45.koku",
"g56.koku",
"g67.koku"
)
nrt <- na.omit(nrt.taisho)
nrt <- nrt%>%
mutate(id = row_number())
# 1-2年生
nrt.12_ <- nrt[c("id", "sid", "g12.koku")]
nrt.12_$year <- 0
csnc.1 <- csnc[c("sid", "nc.g1", "cs.g1", "cs.c.g1")]
csnc.1$cs.d01 <- 0 # ここは1年生時用の特殊処理
nrt.12 <- dplyr::inner_join(csnc.1, nrt.12_, by = "sid")
nrt.12 <- nrt.12[c( "id", "sid", "nc.g1" , "cs.g1", "cs.c.g1", "cs.d01", "g12.koku", "year")]
colnames(nrt.12) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")
#2-3年生
nrt.23_ <- nrt[c("id", "sid", "g23.koku")]
nrt.23_$year <- 1
csnc.2 <- csnc[c("sid", "nc.g2", "cs.g2", "cs.c.g2", "cs.d12")]
nrt.23 <- dplyr::inner_join(csnc.2, nrt.23_, by = "sid")
nrt.23 <- nrt.23[c( "id", "sid", "nc.g2" , "cs.g2", "cs.c.g2", "cs.d12", "g23.koku", "year")]
colnames(nrt.23) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")
#3-4年生
nrt.34_ <- nrt[c("id", "sid", "g34.koku")]
nrt.34_$year <- 2
csnc.3 <- csnc[c("sid", "nc.g3", "cs.g3", "cs.c.g3", "cs.d23")]
nrt.34 <- dplyr::inner_join(csnc.3, nrt.34_, by = "sid")
nrt.34 <- nrt.34[c( "id", "sid", "nc.g3" , "cs.g3", "cs.c.g3", "cs.d23", "g34.koku", "year")]
colnames(nrt.34) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")
#4-5年生
nrt.45_ <- nrt[c("id", "sid", "g45.koku")]
nrt.45_$year <- 3
csnc.4 <- csnc[c("sid", "nc.g4", "cs.g4", "cs.c.g4", "cs.d34")]
nrt.45 <- dplyr::inner_join(csnc.4, nrt.45_, by = "sid")
nrt.45 <- nrt.45[c( "id", "sid", "nc.g4" , "cs.g4", "cs.c.g4", "cs.d34", "g45.koku", "year")]
colnames(nrt.45) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")
#5-6年生
nrt.56_ <- nrt[c("id", "sid", "g56.koku")]
nrt.56_$year <- 4
csnc.5 <- csnc[c("sid", "nc.g5", "cs.g5", "cs.c.g5", "cs.d45")]
nrt.56 <- dplyr::inner_join(csnc.5, nrt.56_, by = "sid")
nrt.56 <- nrt.56[c( "id", "sid", "nc.g5" , "cs.g5", "cs.c.g5", "cs.d45", "g56.koku", "year")]
colnames(nrt.56) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")
#6-7年生
nrt.67_ <- nrt[c("id", "sid", "g67.koku")]
nrt.67_$year <- 5
csnc.6 <- csnc[c("sid", "nc.g6", "cs.g6", "cs.c.g6", "cs.d56")]
nrt.67 <- dplyr::inner_join(csnc.6, nrt.67_, by = "sid")
nrt.67 <- nrt.67[c( "id", "sid", "nc.g6" , "cs.g6", "cs.c.g6", "cs.d56", "g67.koku", "year")]
colnames(nrt.67) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")
# うぃーんがっしゃん
nrt.lt <- dplyr::bind_rows(nrt.12, nrt.23, nrt.34, nrt.45, nrt.56, nrt.67)
library(brms)
## Loading required package: Rcpp
## Loading required package: ggplot2
## Loading 'brms' package (version 2.7.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
res1 <- brm(koku ~ year + cs.c + year:cs.c + year:cs.d + (1|id) + (1|sid),
data = nrt.lt,
# prior = NULL,
prior = c(set_prior("normal(0,10)", class = "b")),
chains = 4,
iter = 10000,
warmup = 2000
)
## Compiling the C++ model
## Start sampling
##
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.004807 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 48.07 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 2001 / 10000 [ 20%] (Sampling)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Sampling)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Sampling)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 438.134 seconds (Warm-up)
## Chain 1: 553.136 seconds (Sampling)
## Chain 1: 991.269 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.002159 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 21.59 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 2001 / 10000 [ 20%] (Sampling)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Sampling)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Sampling)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 410.994 seconds (Warm-up)
## Chain 2: 530.14 seconds (Sampling)
## Chain 2: 941.134 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.001914 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 19.14 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 2001 / 10000 [ 20%] (Sampling)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Sampling)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Sampling)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 400.478 seconds (Warm-up)
## Chain 3: 529.933 seconds (Sampling)
## Chain 3: 930.412 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.002061 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 20.61 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 2001 / 10000 [ 20%] (Sampling)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Sampling)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Sampling)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 407.933 seconds (Warm-up)
## Chain 4: 554.396 seconds (Sampling)
## Chain 4: 962.329 seconds (Total)
## Chain 4:
print(res1, digits = 3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: koku ~ year + cs.c + year:cs.c + year:cs.d + (1 | id) + (1 | sid)
## Data: nrt.lt (Number of observations: 25620)
## Samples: 4 chains, each with iter = 10000; warmup = 2000; thin = 1;
## total post-warmup samples = 32000
##
## Group-Level Effects:
## ~id (Number of levels: 4270)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 7.661 0.089 7.490 7.838 2633 1.001
##
## ~sid (Number of levels: 120)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.531 0.196 1.164 1.937 1146 1.003
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 53.702 0.201 53.305 54.097 3025 1.001
## year -0.219 0.018 -0.254 -0.183 24985 1.000
## cs.c -0.022 0.017 -0.055 0.011 10562 1.000
## year:cs.c -0.010 0.003 -0.016 -0.003 25781 1.000
## year:cs.d -0.001 0.005 -0.010 0.007 18529 1.000
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 4.553 0.022 4.510 4.597 27172 1.000
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).