データの前処理

クラスサイズデータ

setwd("/Users/koyo/Dropbox/000078_CSKAKEN/01_CSNC")
library(data.table)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
csnc.all <- read_excel("sho_csnc.xlsx")

学校データ整形

setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")
#### 統廃合のない学校のみを対象 複式設置校を除外
csnc.taisho_ <- dplyr::filter(csnc.all, taisho.g1 == 1 &
                          togo == 0 & nonrt == 0 & fuku == 0)

学校データを数値型にする

csnc.taisho <- select(csnc.taisho_,(c("taisho", "sid.new", 
                                  "nc.g3", "csmean.g3",
                                  "nc.g4", "csmean.g4",
                                  "nc.g5", "csmean.g5",
                                  "nc.g6", "csmean.g6"
                                  )))
csnc.taisho$taisho <- as.numeric(csnc.taisho$taisho)
csnc.taisho$sid.new <- as.numeric(csnc.taisho$sid.new)

csnc.taisho$nc.g3     <- as.numeric(csnc.taisho$nc.g3)
csnc.taisho$csmean.g3 <- as.numeric(csnc.taisho$csmean.g3)
csnc.taisho$nc.g4     <- as.numeric(csnc.taisho$nc.g4)
csnc.taisho$csmean.g4 <- as.numeric(csnc.taisho$csmean.g4)
csnc.taisho$nc.g5     <- as.numeric(csnc.taisho$nc.g5)
csnc.taisho$csmean.g5 <- as.numeric(csnc.taisho$csmean.g5)
csnc.taisho$nc.g6     <- as.numeric(csnc.taisho$nc.g6)
csnc.taisho$csmean.g6 <- as.numeric(csnc.taisho$csmean.g6)

csnc.nona <- na.omit(csnc.taisho)
colnames(csnc.nona) <- c("taisho", "sid", 
                         "nc.g3", "cs.g3",
                         "nc.g4", "cs.g4",
                         "nc.g5", "cs.g5",
                         "nc.g6", "cs.g6"
                        )
csnc <- csnc.nona[,2:10]

## 学級規模を中心化する
### 各学年での平均
cs.m.g3 <- mean(csnc$cs.g3)
cs.m.g4 <- mean(csnc$cs.g4)
cs.m.g5 <- mean(csnc$cs.g5)
cs.m.g6 <- mean(csnc$cs.g6)

### 各学年の平均の平均
csm <- matrix(c(cs.m.g3, cs.m.g4, cs.m.g5, cs.m.g6), nrow=4, ncol=1)

csnc$cs.c.g3 <- csnc$cs.g3 -  mean(csm)
csnc$cs.c.g4 <- csnc$cs.g4 -  mean(csm)
csnc$cs.c.g5 <- csnc$cs.g5 -  mean(csm)
csnc$cs.c.g6 <- csnc$cs.g6 -  mean(csm)

## 学級規模変動差分データ列作成
csnc$cs.d34 <- csnc$cs.g4 - csnc$cs.g3 
csnc$cs.d45 <- csnc$cs.g5 - csnc$cs.g4 
csnc$cs.d56 <- csnc$cs.g6 - csnc$cs.g5 

NRTデータ

setwd("/Users/koyo/Dropbox/000078_CSKAKEN/04_NRT/SY2018")
nrt.all <- read.csv("nrt.csv", fileEncoding = "Shift_JIS") 
setwd("/Users/koyo/Dropbox/000078_CSKAKEN/190700_Anal")

国語だけを取り出す

縦断データにする

#3-4年生
nrt.34_ <- nrt[c("id", "sid", "g34.shak")]
nrt.34_$year <- 0
csnc.3 <- csnc[c("sid",  "nc.g3", "cs.g3",  "cs.c.g3")]
csnc.3$cs.d23 <- 0 # ここは3年生時用の特殊処理
nrt.34 <- dplyr::inner_join(csnc.3, nrt.34_, by = "sid")
nrt.34 <- nrt.34[c( "id", "sid",  "nc.g3" ,  "cs.g3",  "cs.c.g3", "cs.d23", "g34.shak", "year")]
colnames(nrt.34) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")

#4-5年生
nrt.45_ <- nrt[c("id", "sid", "g45.shak")]
nrt.45_$year <- 1
csnc.4 <- csnc[c("sid",  "nc.g4", "cs.g4",  "cs.c.g4", "cs.d34")]
nrt.45 <- dplyr::inner_join(csnc.4, nrt.45_, by = "sid")
nrt.45 <- nrt.45[c( "id", "sid",  "nc.g4" ,  "cs.g4",  "cs.c.g4", "cs.d34", "g45.shak", "year")]
colnames(nrt.45) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")

#5-6年生
nrt.56_ <- nrt[c("id", "sid", "g56.shak")]
nrt.56_$year <- 2
csnc.5 <- csnc[c("sid",  "nc.g5", "cs.g5",  "cs.c.g5", "cs.d45")]
nrt.56 <- dplyr::inner_join(csnc.5, nrt.56_, by = "sid")
nrt.56 <- nrt.56[c( "id", "sid",  "nc.g5" ,  "cs.g5",  "cs.c.g5", "cs.d45", "g56.shak", "year")]
colnames(nrt.56) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")

#6-7年生
nrt.67_ <- nrt[c("id", "sid", "g67.shak")]
nrt.67_$year <- 3
csnc.6 <- csnc[c("sid",  "nc.g6", "cs.g6",  "cs.c.g6", "cs.d56")]
nrt.67 <- dplyr::inner_join(csnc.6, nrt.67_, by = "sid")
nrt.67 <- nrt.67[c( "id", "sid",  "nc.g6" ,  "cs.g6",  "cs.c.g6", "cs.d56", "g67.shak", "year")]
colnames(nrt.67) <- c("id", "sid", "nc", "cs", "cs.c", "cs.d", "shak", "year")

# うぃーんがっしゃん
nrt.lt <- dplyr::bind_rows(nrt.34, nrt.45, nrt.56, nrt.67)

分析する

library(brms)
## Loading required package: Rcpp
## Loading required package: ggplot2
## Loading 'brms' package (version 2.7.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## Run theme_set(theme_default()) to use the default bayesplot theme.
res1 <- brm(shak ~ year + cs.c + year:cs.c + year:cs.d + (1|id) + (1|sid), 
                         data = nrt.lt,
#              prior   = NULL,
               prior   = c(set_prior("normal(0,10)", class = "b")), 
               chains  = 2,
               iter    = 1000,
               warmup  = 200
                         )
## Compiling the C++ model
## Start sampling
## 
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.001153 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 11.53 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 1: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 1: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 1: Iteration: 201 / 1000 [ 20%]  (Sampling)
## Chain 1: Iteration: 300 / 1000 [ 30%]  (Sampling)
## Chain 1: Iteration: 400 / 1000 [ 40%]  (Sampling)
## Chain 1: Iteration: 500 / 1000 [ 50%]  (Sampling)
## Chain 1: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 1: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 1: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 1: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 1: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 14.4515 seconds (Warm-up)
## Chain 1:                25.5671 seconds (Sampling)
## Chain 1:                40.0186 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'fe29ec7d7d1ace277914b27a8b411d77' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000717 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 7.17 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:   1 / 1000 [  0%]  (Warmup)
## Chain 2: Iteration: 100 / 1000 [ 10%]  (Warmup)
## Chain 2: Iteration: 200 / 1000 [ 20%]  (Warmup)
## Chain 2: Iteration: 201 / 1000 [ 20%]  (Sampling)
## Chain 2: Iteration: 300 / 1000 [ 30%]  (Sampling)
## Chain 2: Iteration: 400 / 1000 [ 40%]  (Sampling)
## Chain 2: Iteration: 500 / 1000 [ 50%]  (Sampling)
## Chain 2: Iteration: 600 / 1000 [ 60%]  (Sampling)
## Chain 2: Iteration: 700 / 1000 [ 70%]  (Sampling)
## Chain 2: Iteration: 800 / 1000 [ 80%]  (Sampling)
## Chain 2: Iteration: 900 / 1000 [ 90%]  (Sampling)
## Chain 2: Iteration: 1000 / 1000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 19.6449 seconds (Warm-up)
## Chain 2:                12.6973 seconds (Sampling)
## Chain 2:                32.3422 seconds (Total)
## Chain 2:
print(res1, digits = 3)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: shak ~ year + cs.c + year:cs.c + year:cs.d + (1 | id) + (1 | sid) 
##    Data: nrt.lt (Number of observations: 6616) 
## Samples: 2 chains, each with iter = 1000; warmup = 200; thin = 1;
##          total post-warmup samples = 1600
## 
## Group-Level Effects: 
## ~id (Number of levels: 1654) 
##               Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## sd(Intercept)    9.027     0.174    8.689    9.367        267 1.021
## 
## ~sid (Number of levels: 48) 
##               Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## sd(Intercept)    1.689     0.397    0.996    2.573        119 1.025
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## Intercept   52.424     0.373   51.701   53.180        351 1.004
## year         0.057     0.060   -0.066    0.173       3523 1.000
## cs.c         0.026     0.040   -0.053    0.105        520 1.003
## year:cs.c   -0.022     0.012   -0.046    0.000       2243 1.000
## year:cs.d   -0.001     0.019   -0.038    0.036       1128 1.001
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Eff.Sample  Rhat
## sigma    5.387     0.055    5.281    5.496       1728 1.001
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).