-まずは一般線形モデルを紹介
library(car)
## Loading required package: carData
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
cor(cars$speed, cars$dist)
## [1] 0.8068949
#相関係数
cor(cars$speed, cars$dist)
## [1] 0.8068949
#散布図と回帰直線をひいてみる
library(ggplot2)
ggplot(data = cars) +
aes(x=speed, y=dist) +
geom_point()+
geom_smooth(method="lm", se=T)
## `geom_smooth()` using formula 'y ~ x'
lm_cars<-lm(dist~speed, data=cars)
summary(lm_cars)
##
## Call:
## lm(formula = dist ~ speed, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
#住宅ローンのデータ
#支払いが3ヶ月以上遅れると1
#FICO=信用スコア, ローン額/評価額, INCOME=所得, CA=カリフォルニア州かどうか
credit <- read.csv("credit.csv", header = T)
attach(credit)
head(credit)
## EVER90 FICO LTV INCOME CA
## 1 1 576 0.86 5 Other
## 2 1 678 0.90 3 California
## 3 1 693 0.80 5 Other
## 4 1 669 0.75 4 Other
## 5 1 542 0.95 4 Other
## 6 1 566 0.95 3 Other
summary(credit)
## EVER90 FICO LTV INCOME
## Min. :0.0000 Min. :461.0 Min. :0.2600 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:670.0 1st Qu.:0.7000 1st Qu.:3.000
## Median :0.0000 Median :724.0 Median :0.8000 Median :5.000
## Mean :0.3158 Mean :708.6 Mean :0.7617 Mean :4.189
## 3rd Qu.:1.0000 3rd Qu.:764.0 3rd Qu.:0.8850 3rd Qu.:5.000
## Max. :1.0000 Max. :809.0 Max. :0.9500 Max. :8.000
## CA
## Length:95
## Class :character
## Mode :character
##
##
##
#デフォルトは31.6%
#散布図と相関を出してみる
library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.0 ─
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
credit %>%
select(EVER90, FICO, LTV, INCOME) %>%
ggpairs()
fit.credit<- glm(EVER90 ~ FICO+LTV,data=credit, family=binomial)
library(jtools)
summ(fit.credit)
## MODEL INFO:
## Observations: 95
## Dependent Variable: EVER90
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(2) = 50.05, p = 0.00
## Pseudo-R² (Cragg-Uhler) = 0.57
## Pseudo-R² (McFadden) = 0.42
## AIC = 74.44, BIC = 82.11
##
## Standard errors: MLE
## ------------------------------------------------
## Est. S.E. z val. p
## ----------------- ------- ------ -------- ------
## (Intercept) 15.84 4.93 3.21 0.00
## FICO -0.03 0.01 -4.41 0.00
## LTV 5.66 2.67 2.12 0.03
## ------------------------------------------------
#係数の図示
coefplot::coefplot(fit.credit, intercept = FALSE)
summ(fit.credit, exp=T)
## MODEL INFO:
## Observations: 95
## Dependent Variable: EVER90
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(2) = 50.05, p = 0.00
## Pseudo-R² (Cragg-Uhler) = 0.57
## Pseudo-R² (McFadden) = 0.42
## AIC = 74.44, BIC = 82.11
##
## Standard errors: MLE
## -------------------------------------------------------------------------
## exp(Est.) 2.5% 97.5% z val. p
## ----------------- ------------ -------- ----------------- -------- ------
## (Intercept) 7565801.81 481.25 118943505784.40 3.21 0.00
## FICO 0.97 0.96 0.98 -4.41 0.00
## LTV 286.16 1.52 53973.24 2.12 0.03
## -------------------------------------------------------------------------
#結果のプロット(合ってるか微妙...)
fit <-fitted(fit.credit)
plot(FICO, fit, col="red")# 赤色にする
par(new=TRUE)
plot(FICO, EVER90)
#これも微妙
plot(LTV, fit, col="red")
par(new=TRUE)
plot(LTV, EVER90)
小暮厚之(2009)「Rによる統計データ分析入門」
#プレテスト(教育前テスト), ポストテスト(教育後テスト), 児童レベルと学校レベル(平均)
#ダウンロードデータが不調でcsvで読み込めなかった
library(readxl)
school<-read_excel("school.xlsx")
head(school)
## # A tibble: 6 x 12
## studentID `s-sID` pre1 post1 pre1.cgm pre1.cwc pre2.m post2.m schoolID time2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 47 150 -2.79 -5.49 52.5 126. 1 5.21
## 2 2 2 53 114 3.21 0.51 52.5 126. 1 5.21
## 3 3 3 46 106 -3.79 -6.49 52.5 126. 1 5.21
## 4 4 4 63 133 13.2 10.5 52.5 126. 1 5.21
## 5 5 5 54 122 4.21 1.51 52.5 126. 1 5.21
## 6 6 6 46 117 -3.79 -6.49 52.5 126. 1 5.21
## # … with 2 more variables: ratio2 <dbl>, n <dbl>
#学校数50, 各校100人, 合計5,000人
summary(school)
## studentID s-sID pre1 post1
## Min. : 1 Min. : 1.00 Min. :18.00 Min. : 76.0
## 1st Qu.:1251 1st Qu.: 25.75 1st Qu.:45.00 1st Qu.:117.0
## Median :2500 Median : 50.50 Median :50.00 Median :126.0
## Mean :2500 Mean : 50.50 Mean :49.79 Mean :126.2
## 3rd Qu.:3750 3rd Qu.: 75.25 3rd Qu.:55.00 3rd Qu.:136.0
## Max. :5000 Max. :100.00 Max. :79.00 Max. :178.0
## pre1.cgm pre1.cwc pre2.m post2.m
## Min. :-31.7900 Min. :-29.98 Min. :41.81 Min. :108.7
## 1st Qu.: -4.7900 1st Qu.: -4.84 1st Qu.:47.49 1st Qu.:121.6
## Median : 0.2100 Median : -0.04 Median :49.48 Median :126.4
## Mean : 0.0016 Mean : 0.00 Mean :49.79 Mean :126.2
## 3rd Qu.: 5.2100 3rd Qu.: 4.65 3rd Qu.:52.49 3rd Qu.:132.3
## Max. : 29.2100 Max. : 25.87 Max. :56.57 Max. :141.8
## schoolID time2 ratio2 n
## Min. : 1.0 Min. :1.960 Min. : 2.920 Min. :100
## 1st Qu.:13.0 1st Qu.:4.130 1st Qu.: 5.510 1st Qu.:100
## Median :25.5 Median :4.970 Median : 7.385 Median :100
## Mean :25.5 Mean :4.747 Mean : 7.299 Mean :100
## 3rd Qu.:38.0 3rd Qu.:5.450 3rd Qu.: 9.110 3rd Qu.:100
## Max. :50.0 Max. :6.660 Max. :12.240 Max. :100
#マルチレベルモデルの分析には「lmertest」というパッケージを使用
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
#学校ごとにランダムなモデルを推定
school.lme1<-lmer(post1~pre1+(1|schoolID), data=school)
summary(school.lme1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: post1 ~ pre1 + (1 | schoolID)
## Data: school
##
## REML criterion at convergence: 36803.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3732 -0.6917 0.0014 0.6802 3.4823
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID (Intercept) 32.53 5.704
## Residual 88.76 9.421
## Number of obs: 5000, groups: schoolID, 50
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.465e+01 1.258e+00 2.652e+02 59.35 <2e-16 ***
## pre1 1.034e+00 1.919e-02 4.989e+03 53.90 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## pre1 -0.760
#学校ごとの切片を出す
ranef(school.lme1)
## $schoolID
## (Intercept)
## 1 -2.4838974
## 2 -8.9888517
## 3 -3.4902490
## 4 7.7301739
## 5 11.0707274
## 6 3.6580230
## 7 -0.3144651
## 8 2.7748610
## 9 -3.3318334
## 10 3.0020837
## 11 -4.2072448
## 12 -3.1042079
## 13 -3.2692223
## 14 -8.7350645
## 15 6.0526189
## 16 8.3809637
## 17 -1.6775454
## 18 1.0088720
## 19 5.6422268
## 20 -4.8654238
## 21 6.5854402
## 22 1.2319427
## 23 -10.3451675
## 24 4.1503106
## 25 0.4911642
## 26 3.5475266
## 27 -9.3891104
## 28 -3.9331908
## 29 -0.3064999
## 30 -8.6672398
## 31 -2.8913835
## 32 3.9370082
## 33 10.6657220
## 34 -3.1399195
## 35 -6.9677203
## 36 -7.4698139
## 37 -10.5911701
## 38 1.9059128
## 39 5.4625017
## 40 8.3135832
## 41 -3.2092311
## 42 3.6033620
## 43 -1.2070566
## 44 6.0777793
## 45 -0.9965059
## 46 2.1478538
## 47 -2.4602616
## 48 4.9858358
## 49 -1.0365276
## 50 4.6523100
##
## with conditional variances for "schoolID"
school.lme2<-lmer(post1~(-1+pre1|schoolID), data=school)
summary(school.lme2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: post1 ~ (-1 + pre1 | schoolID)
## Data: school
##
## REML criterion at convergence: 37007.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7186 -0.6815 -0.0166 0.6822 3.5335
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID pre1 1.047 1.023
## Residual 88.599 9.413
## Number of obs: 5000, groups: schoolID, 50
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 75.4525 0.9532 4973.2395 79.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#学校ごとの傾きを出す
ranef(school.lme2)
## $schoolID
## pre1
## 1 0.9709807
## 2 0.8018649
## 3 0.9384671
## 4 1.1780092
## 5 1.2591644
## 6 1.1046015
## 7 1.0132360
## 8 1.0720180
## 9 0.9425292
## 10 1.0768611
## 11 0.9327003
## 12 0.9287014
## 13 0.9496673
## 14 0.8252031
## 15 1.1346303
## 16 1.1768486
## 17 0.9909535
## 18 1.0318678
## 19 1.1216044
## 20 0.9157977
## 21 1.1605273
## 22 1.0492201
## 23 0.8111924
## 24 1.1037510
## 25 1.0256886
## 26 1.0855451
## 27 0.8232429
## 28 0.9375193
## 29 0.9970181
## 30 0.8319404
## 31 0.9651307
## 32 1.1060775
## 33 1.2230133
## 34 0.9545247
## 35 0.8662634
## 36 0.8604291
## 37 0.8125965
## 38 1.0499978
## 39 1.1369781
## 40 1.1991151
## 41 0.9572226
## 42 1.0915775
## 43 0.9958932
## 44 1.1316443
## 45 0.9977004
## 46 1.0633161
## 47 0.9685870
## 48 1.1197344
## 49 0.9937646
## 50 1.1217177
##
## with conditional variances for "schoolID"
school.lme3<-lmer(post1~(pre1|schoolID), data=school)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 1.2724 (tol = 0.002, component 1)
summary(school.lme3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: post1 ~ (pre1 | schoolID)
## Data: school
##
## REML criterion at convergence: 36787.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7335 -0.6788 -0.0065 0.6959 3.5852
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schoolID (Intercept) 2016.532 44.906
## pre1 1.006 1.003 -0.99
## Residual 85.031 9.221
## Number of obs: 5000, groups: schoolID, 50
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 120.6547 0.8125 49.2200 148.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 1.2724 (tol = 0.002, component 1)
#学校ごとの切片と傾きを出す
ranef(school.lme3)
## $schoolID
## (Intercept) pre1
## 1 -46.532604 0.9969218
## 2 -54.115763 1.0133154
## 3 -37.267525 0.7742212
## 4 -72.073219 1.6818731
## 5 -44.279661 1.2342001
## 6 -53.460861 1.2816931
## 7 -46.208397 1.0322310
## 8 -47.216667 1.1071707
## 9 -13.098282 0.3339734
## 10 -48.862598 1.1421248
## 11 -63.790583 1.3237618
## 12 -22.792656 0.4121444
## 13 -43.144915 0.9101507
## 14 -37.674751 0.6730078
## 15 -48.239786 1.1878818
## 16 -21.011047 0.7093516
## 17 -63.396946 1.3592628
## 18 -26.943721 0.6675917
## 19 -19.608219 0.6268287
## 20 -49.930629 1.0147173
## 21 -68.374698 1.6058186
## 22 -56.668137 1.2875448
## 23 -72.266951 1.3607057
## 24 -38.829422 0.9711977
## 25 -36.621554 0.8655203
## 26 -43.089809 1.0460659
## 27 -39.490973 0.7151795
## 28 -48.297331 1.0009338
## 29 -3.940918 0.2028024
## 30 -47.351389 0.8802625
## 31 -52.164865 1.0965724
## 32 -47.347647 1.1501560
## 33 -52.779590 1.3572545
## 34 -44.290940 0.9380436
## 35 -39.321542 0.7494694
## 36 -40.706208 0.7733973
## 37 -75.698700 1.4209417
## 38 -27.470416 0.7387518
## 39 -56.537881 1.3622086
## 40 -50.090457 1.2959275
## 41 -44.303501 0.9416652
## 42 -46.666819 1.1183848
## 43 -56.648975 1.2369716
## 44 -19.560456 0.6164652
## 45 -39.837483 0.8989284
## 46 -48.476665 1.1266299
## 47 -48.909915 1.0433528
## 48 -63.262581 1.4494295
## 49 -44.001063 0.9692478
## 50 -45.422855 1.1239029
##
## with conditional variances for "schoolID"
尾崎幸謙・川端一光・山田剛史(2018)「Rで学ぶマルチレベルモデル入門編」