setwd("/Users/koyo/Dropbox/000078_CSKAKEN/01_CSNC")
library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
csnc.all <- read_excel("sho_csnc.xlsx")
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")
#### 統廃合のない学校のみを対象 複式設置校を除外
csnc.taisho_ <- dplyr::filter(csnc.all, taisho.g1 == 1 &
togo == 0 & nonrt == 0 & fuku == 0)
csnc.taisho <- select(csnc.taisho_,(c("taisho", "sid.new",
"nc.g3", "csmean.g3",
"nc.g4", "csmean.g4",
"nc.g5", "csmean.g5",
"nc.g6", "csmean.g6"
)))
csnc.taisho$taisho <- as.numeric(csnc.taisho$taisho)
csnc.taisho$sid.new <- as.numeric(csnc.taisho$sid.new)
csnc.taisho$nc.g3 <- as.numeric(csnc.taisho$nc.g3)
csnc.taisho$csmean.g3 <- as.numeric(csnc.taisho$csmean.g3)
csnc.taisho$nc.g4 <- as.numeric(csnc.taisho$nc.g4)
csnc.taisho$csmean.g4 <- as.numeric(csnc.taisho$csmean.g4)
csnc.taisho$nc.g5 <- as.numeric(csnc.taisho$nc.g5)
csnc.taisho$csmean.g5 <- as.numeric(csnc.taisho$csmean.g5)
csnc.taisho$nc.g6 <- as.numeric(csnc.taisho$nc.g6)
csnc.taisho$csmean.g6 <- as.numeric(csnc.taisho$csmean.g6)
csnc.nona <- na.omit(csnc.taisho)
colnames(csnc.nona) <- c("taisho", "sid",
"nc.g3", "cs.g3",
"nc.g4", "cs.g4",
"nc.g5", "cs.g5",
"nc.g6", "cs.g6"
)
csnc <- csnc.nona[,2:10]
## 学級規模を中心化する
### 各学年での平均
cs.m.g3 <- mean(csnc$cs.g3)
cs.m.g4 <- mean(csnc$cs.g4)
cs.m.g5 <- mean(csnc$cs.g5)
cs.m.g6 <- mean(csnc$cs.g6)
### 各学年の平均の平均
csm <- matrix(c(cs.m.g3, cs.m.g4, cs.m.g5, cs.m.g6), nrow=4, ncol=1)
csnc$cs.c.g3 <- csnc$cs.g3 - mean(csm)
csnc$cs.c.g4 <- csnc$cs.g4 - mean(csm)
csnc$cs.c.g5 <- csnc$cs.g5 - mean(csm)
csnc$cs.c.g6 <- csnc$cs.g6 - mean(csm)
## 学級規模変動差分データ列作成
csnc$cs.d34 <- csnc$cs.g4 - csnc$cs.g3
csnc$cs.d45 <- csnc$cs.g5 - csnc$cs.g4
csnc$cs.d56 <- csnc$cs.g6 - csnc$cs.g5
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/04_NRT/SY2018")
nrt.all <- read.csv("nrt.csv", fileEncoding = "Shift_JIS")
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")
#3-4年生
nrt.34_ <- nrt[c("id", "sid", "g34.shak")]
nrt.34_$year <- 0
csnc.3 <- csnc[c("sid", "nc.g3", "cs.g3", "cs.c.g3")]
csnc.3$cs.d23 <- 0 # ここは3年生時用の特殊処理
nrt.34 <- dplyr::inner_join(csnc.3, nrt.34_, by = "sid")
nrt.34 <- nrt.34[c( "id", "sid", "nc.g3" , "cs.g3", "cs.c.g3", "cs.d23", "g34.shak", "year")]
colnames(nrt.34) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")
#4-5年生
nrt.45_ <- nrt[c("id", "sid", "g45.shak")]
nrt.45_$year <- 1
csnc.4 <- csnc[c("sid", "nc.g4", "cs.g4", "cs.c.g4", "cs.d34")]
nrt.45 <- dplyr::inner_join(csnc.4, nrt.45_, by = "sid")
nrt.45 <- nrt.45[c( "id", "sid", "nc.g4" , "cs.g4", "cs.c.g4", "cs.d34", "g45.shak", "year")]
colnames(nrt.45) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")
#5-6年生
nrt.56_ <- nrt[c("id", "sid", "g56.shak")]
nrt.56_$year <- 2
csnc.5 <- csnc[c("sid", "nc.g5", "cs.g5", "cs.c.g5", "cs.d45")]
nrt.56 <- dplyr::inner_join(csnc.5, nrt.56_, by = "sid")
nrt.56 <- nrt.56[c( "id", "sid", "nc.g5" , "cs.g5", "cs.c.g5", "cs.d45", "g56.shak", "year")]
colnames(nrt.56) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")
#6-7年生
nrt.67_ <- nrt[c("id", "sid", "g67.shak")]
nrt.67_$year <- 3
csnc.6 <- csnc[c("sid", "nc.g6", "cs.g6", "cs.c.g6", "cs.d56")]
nrt.67 <- dplyr::inner_join(csnc.6, nrt.67_, by = "sid")
nrt.67 <- nrt.67[c( "id", "sid", "nc.g6" , "cs.g6", "cs.c.g6", "cs.d56", "g67.shak", "year")]
colnames(nrt.67) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")
# うぃーんがっしゃん
nrt.lt <- dplyr::bind_rows(nrt.34, nrt.45, nrt.56, nrt.67)
library(brms)
## Loading required package: Rcpp
## Loading required package: ggplot2
## Loading 'brms' package (version 2.7.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
res1 <- brm(shak ~ year + cs.c + year:cs.c + year:cs.d + (1|id) + (1|sid),
data = nrt.lt,
# prior = NULL,
prior = c(set_prior("normal(0,10)", class = "b")),
chains = 2,
iter = 1000,
warmup = 200
)
## Compiling the C++ model
## Start sampling
##
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.001153 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 11.53 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1: Iteration: 201 / 1000 [ 20%] (Sampling)
## Chain 1: Iteration: 300 / 1000 [ 30%] (Sampling)
## Chain 1: Iteration: 400 / 1000 [ 40%] (Sampling)
## Chain 1: Iteration: 500 / 1000 [ 50%] (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 14.4515 seconds (Warm-up)
## Chain 1: 25.5671 seconds (Sampling)
## Chain 1: 40.0186 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000717 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 7.17 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2: Iteration: 201 / 1000 [ 20%] (Sampling)
## Chain 2: Iteration: 300 / 1000 [ 30%] (Sampling)
## Chain 2: Iteration: 400 / 1000 [ 40%] (Sampling)
## Chain 2: Iteration: 500 / 1000 [ 50%] (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 19.6449 seconds (Warm-up)
## Chain 2: 12.6973 seconds (Sampling)
## Chain 2: 32.3422 seconds (Total)
## Chain 2:
print(res1, digits = 3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: shak ~ year + cs.c + year:cs.c + year:cs.d + (1 | id) + (1 | sid)
## Data: nrt.lt (Number of observations: 6616)
## Samples: 2 chains, each with iter = 1000; warmup = 200; thin = 1;
## total post-warmup samples = 1600
##
## Group-Level Effects:
## ~id (Number of levels: 1654)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 9.027 0.174 8.689 9.367 267 1.021
##
## ~sid (Number of levels: 48)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.689 0.397 0.996 2.573 119 1.025
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 52.424 0.373 51.701 53.180 351 1.004
## year 0.057 0.060 -0.066 0.173 3523 1.000
## cs.c 0.026 0.040 -0.053 0.105 520 1.003
## year:cs.c -0.022 0.012 -0.046 0.000 2243 1.000
## year:cs.d -0.001 0.019 -0.038 0.036 1128 1.001
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma 5.387 0.055 5.281 5.496 1728 1.001
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).