データの前処理

クラスサイズデータ

setwd("/Users/koyo/Dropbox/000078_CSKAKEN/01_CSNC")
library(data.table)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
csnc.all <- read_excel("sho_csnc.xlsx")

学校データ整形

setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")
#### 統廃合のない学校のみを対象 複式設置校を除外
csnc.taisho_ <- dplyr::filter(csnc.all, taisho.g1 == 1 &
                          togo == 0 & nonrt == 0 & fuku == 0)

学校データを数値型にする

csnc.taisho <- select(csnc.taisho_,(c("taisho", "sid.new", 
                                  "nc.g1", "csmean.g1",
                                  "nc.g2", "csmean.g2",
                                  "nc.g3", "csmean.g3",
                                  "nc.g4", "csmean.g4",
                                  "nc.g5", "csmean.g5",
                                  "nc.g6", "csmean.g6"
                                  )))
csnc.taisho$taisho <- as.numeric(csnc.taisho$taisho)
csnc.taisho$sid.new <- as.numeric(csnc.taisho$sid.new)

csnc.taisho$nc.g1     <- as.numeric(csnc.taisho$nc.g1)
csnc.taisho$csmean.g1 <- as.numeric(csnc.taisho$csmean.g1)
csnc.taisho$nc.g2     <- as.numeric(csnc.taisho$nc.g2)
csnc.taisho$csmean.g2 <- as.numeric(csnc.taisho$csmean.g2)
csnc.taisho$nc.g3     <- as.numeric(csnc.taisho$nc.g3)
csnc.taisho$csmean.g3 <- as.numeric(csnc.taisho$csmean.g3)
csnc.taisho$nc.g4     <- as.numeric(csnc.taisho$nc.g4)
csnc.taisho$csmean.g4 <- as.numeric(csnc.taisho$csmean.g4)
csnc.taisho$nc.g5     <- as.numeric(csnc.taisho$nc.g5)
csnc.taisho$csmean.g5 <- as.numeric(csnc.taisho$csmean.g5)
csnc.taisho$nc.g6     <- as.numeric(csnc.taisho$nc.g6)
csnc.taisho$csmean.g6 <- as.numeric(csnc.taisho$csmean.g6)

csnc.nona <- na.omit(csnc.taisho)
colnames(csnc.nona) <- c("taisho", "sid", 
                         "nc.g1", "cs.g1",
                         "nc.g2", "cs.g2",
                         "nc.g3", "cs.g3",
                         "nc.g4", "cs.g4",
                         "nc.g5", "cs.g5",
                         "nc.g6", "cs.g6"
                        )
csnc <- csnc.nona[,2:14]

## 学級規模を中心化する
### 各学年での平均
cs.m.g1 <- mean(csnc$cs.g1)
cs.m.g2 <- mean(csnc$cs.g2)
cs.m.g3 <- mean(csnc$cs.g3)
cs.m.g4 <- mean(csnc$cs.g4)
cs.m.g5 <- mean(csnc$cs.g5)
cs.m.g6 <- mean(csnc$cs.g6)

### 各学年の平均の平均
csm <- matrix(c(cs.m.g1, cs.m.g2, cs.m.g3, cs.m.g4, cs.m.g5, cs.m.g6), nrow=6, ncol=1)

csnc$cs.c.g1 <- csnc$cs.g1 -  mean(csm)
csnc$cs.c.g2 <- csnc$cs.g2 -  mean(csm)
csnc$cs.c.g3 <- csnc$cs.g3 -  mean(csm)
csnc$cs.c.g4 <- csnc$cs.g4 -  mean(csm)
csnc$cs.c.g5 <- csnc$cs.g5 -  mean(csm)
csnc$cs.c.g6 <- csnc$cs.g6 -  mean(csm)

## 学級規模変動差分データ列作成
csnc$cs.d12 <- csnc$cs.g2 - csnc$cs.g1 
csnc$cs.d23 <- csnc$cs.g3 - csnc$cs.g2 
csnc$cs.d34 <- csnc$cs.g4 - csnc$cs.g3 
csnc$cs.d45 <- csnc$cs.g5 - csnc$cs.g4 
csnc$cs.d56 <- csnc$cs.g6 - csnc$cs.g5 

NRTデータ

setwd("/Users/koyo/Dropbox/000078_CSKAKEN/04_NRT/SY2018")
nrt.all <- read.csv("nrt.csv", fileEncoding = "Shift_JIS") 
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")

国語だけを取り出す

nrt.taisho <- nrt.all %>%
              dplyr::select(c("renban", "sho.sid",
                              "koku.ss.1213", #1-2年生追加
                              "koku.ss.1314", 
                              "koku.ss.1415", 
                              "koku.ss.1516",  
                              "koku.ss.1617",
                              "koku.ss.1718" #6-7年生追加
                              ))

colnames(nrt.taisho) <- c("renban", "sid",
                         "g12.koku",
                         "g23.koku",
                         "g34.koku",
                         "g45.koku",
                         "g56.koku",
                         "g67.koku"
                         )

nrt <- na.omit(nrt.taisho)

nrt <- nrt%>%
  mutate(id = row_number())

縦断データにする

# 1-2年生
nrt.12_ <- nrt[c("id", "sid", "g12.koku")]
nrt.12_$year <- 0
csnc.1 <- csnc[c("sid",  "nc.g1", "cs.g1", "cs.c.g1")]
csnc.1$cs.d01 <- 0 # ここは1年生時用の特殊処理
nrt.12 <- dplyr::inner_join(csnc.1, nrt.12_, by = "sid")
nrt.12 <- nrt.12[c( "id", "sid",  "nc.g1" ,  "cs.g1", "cs.c.g1", "cs.d01", "g12.koku", "year")]
colnames(nrt.12) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")

#2-3年生
nrt.23_ <- nrt[c("id", "sid", "g23.koku")]
nrt.23_$year <- 1
csnc.2 <- csnc[c("sid",  "nc.g2", "cs.g2",  "cs.c.g2", "cs.d12")]
nrt.23 <- dplyr::inner_join(csnc.2, nrt.23_, by = "sid")
nrt.23 <- nrt.23[c( "id", "sid",  "nc.g2" ,  "cs.g2",  "cs.c.g2", "cs.d12", "g23.koku", "year")]
colnames(nrt.23) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")

#3-4年生
nrt.34_ <- nrt[c("id", "sid", "g34.koku")]
nrt.34_$year <- 2
csnc.3 <- csnc[c("sid",  "nc.g3", "cs.g3",  "cs.c.g3", "cs.d23")]
nrt.34 <- dplyr::inner_join(csnc.3, nrt.34_, by = "sid")
nrt.34 <- nrt.34[c( "id", "sid",  "nc.g3" ,  "cs.g3",  "cs.c.g3", "cs.d23", "g34.koku", "year")]
colnames(nrt.34) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")

#4-5年生
nrt.45_ <- nrt[c("id", "sid", "g45.koku")]
nrt.45_$year <- 3
csnc.4 <- csnc[c("sid",  "nc.g4", "cs.g4",  "cs.c.g4", "cs.d34")]
nrt.45 <- dplyr::inner_join(csnc.4, nrt.45_, by = "sid")
nrt.45 <- nrt.45[c( "id", "sid",  "nc.g4" ,  "cs.g4",  "cs.c.g4", "cs.d34", "g45.koku", "year")]
colnames(nrt.45) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")

#5-6年生
nrt.56_ <- nrt[c("id", "sid", "g56.koku")]
nrt.56_$year <- 4
csnc.5 <- csnc[c("sid",  "nc.g5", "cs.g5",  "cs.c.g5", "cs.d45")]
nrt.56 <- dplyr::inner_join(csnc.5, nrt.56_, by = "sid")
nrt.56 <- nrt.56[c( "id", "sid",  "nc.g5" ,  "cs.g5",  "cs.c.g5", "cs.d45", "g56.koku", "year")]
colnames(nrt.56) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")

#6-7年生
nrt.67_ <- nrt[c("id", "sid", "g67.koku")]
nrt.67_$year <- 5
csnc.6 <- csnc[c("sid",  "nc.g6", "cs.g6",  "cs.c.g6", "cs.d56")]
nrt.67 <- dplyr::inner_join(csnc.6, nrt.67_, by = "sid")
nrt.67 <- nrt.67[c( "id", "sid",  "nc.g6" ,  "cs.g6",  "cs.c.g6", "cs.d56", "g67.koku", "year")]
colnames(nrt.67) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "koku", "year")

# うぃーんがっしゃん
nrt.lt <- dplyr::bind_rows(nrt.12, nrt.23, nrt.34, nrt.45, nrt.56, nrt.67)

分析する

library(brms)
## Loading required package: Rcpp
## Loading required package: ggplot2
## Loading 'brms' package (version 2.7.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
res1 <- brm(koku ~ year + cs.c + year:cs.c + year:cs.d + (1|id) + (1|sid), 
                         data = nrt.lt,
#              prior   = NULL,
               prior   = c(set_prior("normal(0,10)", class = "b")), 
               chains  = 4,
               iter    = 10000,
               warmup  = 2000
                         )
## Compiling the C++ model
## Start sampling
## 
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.004807 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 48.07 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 2001 / 10000 [ 20%]  (Sampling)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 438.134 seconds (Warm-up)
## Chain 1:                553.136 seconds (Sampling)
## Chain 1:                991.269 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.002159 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 21.59 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 2001 / 10000 [ 20%]  (Sampling)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 410.994 seconds (Warm-up)
## Chain 2:                530.14 seconds (Sampling)
## Chain 2:                941.134 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.001914 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 19.14 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 2001 / 10000 [ 20%]  (Sampling)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 400.478 seconds (Warm-up)
## Chain 3:                529.933 seconds (Sampling)
## Chain 3:                930.412 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.002061 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 20.61 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 2001 / 10000 [ 20%]  (Sampling)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Sampling)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Sampling)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 407.933 seconds (Warm-up)
## Chain 4:                554.396 seconds (Sampling)
## Chain 4:                962.329 seconds (Total)
## Chain 4:
print(res1, digits = 3)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: koku ~ year + cs.c + year:cs.c + year:cs.d + (1 | id) + (1 | sid) 
##    Data: nrt.lt (Number of observations: 25620) 
## Samples: 4 chains, each with iter = 10000; warmup = 2000; thin = 1;
##          total post-warmup samples = 32000
## 
## Group-Level Effects: 
## ~id (Number of levels: 4270) 
##               Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## sd(Intercept)    7.661     0.089    7.490    7.838       2633 1.001
## 
## ~sid (Number of levels: 120) 
##               Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## sd(Intercept)    1.531     0.196    1.164    1.937       1146 1.003
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## Intercept   53.702     0.201   53.305   54.097       3025 1.001
## year        -0.219     0.018   -0.254   -0.183      24985 1.000
## cs.c        -0.022     0.017   -0.055    0.011      10562 1.000
## year:cs.c   -0.010     0.003   -0.016   -0.003      25781 1.000
## year:cs.d   -0.001     0.005   -0.010    0.007      18529 1.000
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## sigma    4.553     0.022    4.510    4.597      27172 1.000
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).