# 패키지 불러오기
library(psych)
library(tidyverse)
library(lmerTest)
library(lme4)
library(nlme)
# 데이터 불러오기
edudata <- read.csv("edudata_f_r.csv", header = T)
str(edudata)
## 'data.frame': 4479 obs. of 11 variables:
## $ year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ time : int 0 0 0 0 0 0 0 0 0 0 ...
## $ schid : int 38089774 175940177 118431311 353543557 54554244 687845273 301768251 278348620 813279850 750798009 ...
## $ region : int 3 2 3 3 3 3 2 3 3 2 ...
## $ type : int 1 3 2 2 2 3 1 1 1 2 ...
## $ man_woman : int 3 3 3 3 3 3 3 3 3 3 ...
## $ schvio_aa1: num 53.3 5 19.2 40 59.2 ...
## $ schvio_aa2: num 0 1.67 8.33 0 0 ...
## $ schvio_aa4: int 12 6 4 2 2 2 2 2 3 2 ...
## $ schvio_aa7: int 3 4 2 2 2 2 6 2 2 3 ...
## $ schvio_kk5: int 0 0 5 0 7 2 5 0 0 1 ...
# 데이터 구조 변환
edudata$region <- factor(edudata$region,
levels=c(1,2,3),
labels=c("특별/광역시","시", "그외"))
edudata$type <- factor(edudata$type,
levels=c(1,2,3),
labels=c("일반고","특성화고", "그외"))
edudata$man_woman <- factor(edudata$man_woman,
levels=c(1,2,3),
labels=c("남","여", "남여공학"))
# 데이터 탐색
# library(psych)
pairs.panels(edudata) # 상관그래프 등
***
# 더미변수 만들기
edudata <- transform(edudata,
region_1=ifelse(region=="특별/광역시",1,0),
region_2=ifelse(region=="시",1,0),
region_3=ifelse(region=="그외",1,0))
edudata <- transform(edudata,
type_1=ifelse(type=="일반고",1,0),
type_2=ifelse(type=="특성화고",1,0),
type_3=ifelse(type=="그외",1,0))
edudata <- transform(edudata,
man_woman_1=ifelse(man_woman=="남",1,0),
man_woman_2=ifelse(man_woman=="여",1,0),
man_woman_3=ifelse(man_woman=="남여공학",1,0))
str(edudata)
## 'data.frame': 4479 obs. of 20 variables:
## $ year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ time : int 0 0 0 0 0 0 0 0 0 0 ...
## $ schid : int 38089774 175940177 118431311 353543557 54554244 687845273 301768251 278348620 813279850 750798009 ...
## $ region : Factor w/ 3 levels "특별/광역시",..: 3 2 3 3 3 3 2 3 3 2 ...
## $ type : Factor w/ 3 levels "일반고","특성화고",..: 1 3 2 2 2 3 1 1 1 2 ...
## $ man_woman : Factor w/ 3 levels "남","여","남여공학": 3 3 3 3 3 3 3 3 3 3 ...
## $ schvio_aa1 : num 53.3 5 19.2 40 59.2 ...
## $ schvio_aa2 : num 0 1.67 8.33 0 0 ...
## $ schvio_aa4 : int 12 6 4 2 2 2 2 2 3 2 ...
## $ schvio_aa7 : int 3 4 2 2 2 2 6 2 2 3 ...
## $ schvio_kk5 : int 0 0 5 0 7 2 5 0 0 1 ...
## $ region_1 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ region_2 : num 0 1 0 0 0 0 1 0 0 1 ...
## $ region_3 : num 1 0 1 1 1 1 0 1 1 0 ...
## $ type_1 : num 1 0 0 0 0 0 1 1 1 0 ...
## $ type_2 : num 0 0 1 1 1 0 0 0 0 1 ...
## $ type_3 : num 0 1 0 0 0 1 0 0 0 0 ...
## $ man_woman_1: num 0 0 0 0 0 0 0 0 0 0 ...
## $ man_woman_2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ man_woman_3: num 1 1 1 1 1 1 1 1 1 1 ...
# 데이터 탐색(주요 변수별)
#library(tidyverse)
#region
region_change <- edudata %>% group_by(region,year) %>% summarise(mean_k55=mean(schvio_kk5))
region_change
## # A tibble: 9 x 3
## # Groups: region [?]
## region year mean_k55
## <fct> <int> <dbl>
## 1 특별/광역시 2015 2.83
## 2 특별/광역시 2016 3.39
## 3 특별/광역시 2017 3.91
## 4 시 2015 2.94
## 5 시 2016 3.26
## 6 시 2017 4.33
## 7 그외 2015 3.02
## 8 그외 2016 2.97
## 9 그외 2017 3.61
ggplot(region_change, aes(year, mean_k55, shape=as.factor(region))) +
geom_line(stat="identity", aes(group = region)) +
geom_point(size=3) +
labs(x="연도", y="가해자수", shape="지역") +
scale_shape_manual(values=0:2, labels=c("특별/광역시","시","그외"))
#type
type_change <- edudata %>% group_by(type,year) %>%
summarise(mean_k55=mean(schvio_kk5))
type_change
## # A tibble: 9 x 3
## # Groups: type [?]
## type year mean_k55
## <fct> <int> <dbl>
## 1 일반고 2015 2.23
## 2 일반고 2016 2.58
## 3 일반고 2017 3.27
## 4 특성화고 2015 5.42
## 5 특성화고 2016 5.50
## 6 특성화고 2017 6.59
## 7 그외 2015 1.98
## 8 그외 2016 2.35
## 9 그외 2017 2.83
ggplot(type_change, aes(year, mean_k55, shape=as.factor(type))) +
geom_line(stat="identity", aes(group = type)) +
geom_point(size=3) +
labs(x="연도", y="가해자수", shape="학교유형") +
scale_shape_manual(values=0:2, labels=c("일반고","특성화고","그외"))
# man_woman
man_woman_change <- edudata %>% group_by(man_woman,year) %>%
summarise(mean_k55=mean(schvio_kk5))
man_woman_change
## # A tibble: 9 x 3
## # Groups: man_woman [?]
## man_woman year mean_k55
## <fct> <int> <dbl>
## 1 남 2015 2.78
## 2 남 2016 3.38
## 3 남 2017 3.85
## 4 여 2015 0.88
## 5 여 2016 1.13
## 6 여 2017 1.23
## 7 남여공학 2015 3.64
## 8 남여공학 2016 3.85
## 9 남여공학 2017 4.88
ggplot(man_woman_change, aes(year, mean_k55, shape=as.factor(man_woman))) +
geom_line(stat="identity", aes(group = man_woman)) +
geom_point(size=3) +
labs(x="연도", y="가해자수", shape="성별") +
scale_shape_manual(values=0:2, labels=c("남","여","남여공학"))
###전체 #일차방정식 모델일 것임
year_change <- edudata %>% group_by(year) %>%
summarise(mean_k55=mean(schvio_kk5))
ggplot(year_change, aes(year, mean_k55)) +
geom_line(stat="identity") +
geom_point(size=3) + labs(x="연도", y="가해자수")
***
### 기술통계치 - 샘플 2015년
#시간수준(연도별 가해자수)
edudata_year <- edudata %>% filter(year=="2015")
summary(edudata_year$schvio_kk5)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 1.000 2.926 4.000 66.000
sd(edudata_year$schvio_kk5)
## [1] 5.068085
#학교폭력 예방교육 수준(학교)
edudata_year <- edudata %>% filter(year=="2015") # 2015년만 샘플로 실시
summary(edudata_year[,10:13])
## schvio_aa7 schvio_kk5 region_1 region_2
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.000
## 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.:0.000
## Median : 3.000 Median : 1.000 Median :0.000 Median :0.000
## Mean : 3.828 Mean : 2.926 Mean :0.351 Mean :0.347
## 3rd Qu.: 4.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :28.000 Max. :66.000 Max. :1.000 Max. :1.000
sd(edudata_year$schvio_aa1);sd(edudata_year$schvio_aa2);sd(edudata_year$schvio_aa4);sd(edudata_year$schvio_aa7)
## [1] 121.1999
## [1] 86.27673
## [1] 2.974932
## [1] 2.986426
#2수준(지역,유형,성별)
edudata_year <- edudata %>% filter(year=="2015") # 2015
summary(edudata_year[,4:6])
## region type man_woman
## 특별/광역시:524 일반고 :931 남 :274
## 시 :518 특성화고:343 여 :300
## 그외 :451 그외 :219 남여공학:919
#지역별 수(퍼센트구하기)
524/(524+518+451)*100 #특별/광역
## [1] 35.09712
518/(524+518+451)*100 #시
## [1] 34.69524
451/(524+518+451)*100 #그외
## [1] 30.20764
#학교유형별 수(퍼센트구하기)
931/(931+343+219)*100 #일반고
## [1] 62.35767
343/(931+343+219)*100 #특성화고
## [1] 22.97388
219/(931+343+219)*100 #그외
## [1] 14.66845
#남여공학별 (퍼센트구하기)
274/(274+300+919)*100 #남
## [1] 18.35231
300/(274+300+919)*100 #여
## [1] 20.09377
919/(274+300+919)*100 #공학
## [1] 61.55392
#ID에 따른 학교수(사례수)
length(unique(edudata$schid)) #1493개 학교
## [1] 1493
# 데이터 생성
#공통 ID 기준 집단평균 중심화 변환 실시 p.142 p.144
edudata <- group_by(edudata,schid) %>%
mutate(gm.schvio_aa1 = schvio_aa1-mean(schvio_aa1))
edudata <- group_by(edudata,schid) %>%
mutate(gm.schvio_aa2 = schvio_aa2-mean(schvio_aa2))
edudata <- group_by(edudata,schid) %>%
mutate(gm.schvio_aa4 = schvio_aa4-mean(schvio_aa4))
edudata <- group_by(edudata,schid) %>%
mutate(gm.schvio_aa7 = schvio_aa7-mean(schvio_aa7))
str(edudata)
## Classes 'grouped_df', 'tbl_df', 'tbl' and 'data.frame': 4479 obs. of 24 variables:
## $ year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ time : int 0 0 0 0 0 0 0 0 0 0 ...
## $ schid : int 38089774 175940177 118431311 353543557 54554244 687845273 301768251 278348620 813279850 750798009 ...
## $ region : Factor w/ 3 levels "특별/광역시",..: 3 2 3 3 3 3 2 3 3 2 ...
## $ type : Factor w/ 3 levels "일반고","특성화고",..: 1 3 2 2 2 3 1 1 1 2 ...
## $ man_woman : Factor w/ 3 levels "남","여","남여공학": 3 3 3 3 3 3 3 3 3 3 ...
## $ schvio_aa1 : num 53.3 5 19.2 40 59.2 ...
## $ schvio_aa2 : num 0 1.67 8.33 0 0 ...
## $ schvio_aa4 : int 12 6 4 2 2 2 2 2 3 2 ...
## $ schvio_aa7 : int 3 4 2 2 2 2 6 2 2 3 ...
## $ schvio_kk5 : int 0 0 5 0 7 2 5 0 0 1 ...
## $ region_1 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ region_2 : num 0 1 0 0 0 0 1 0 0 1 ...
## $ region_3 : num 1 0 1 1 1 1 0 1 1 0 ...
## $ type_1 : num 1 0 0 0 0 0 1 1 1 0 ...
## $ type_2 : num 0 0 1 1 1 0 0 0 0 1 ...
## $ type_3 : num 0 1 0 0 0 1 0 0 0 0 ...
## $ man_woman_1 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ man_woman_2 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ man_woman_3 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ gm.schvio_aa1: num 31.36 -30.83 -30.56 2.22 3.72 ...
## $ gm.schvio_aa2: num -7.22 -16.39 5.56 -11.11 0 ...
## $ gm.schvio_aa4: num 7.33 1 1.33 0 0 ...
## $ gm.schvio_aa7: num 0.667 0.667 0 -1.333 0 ...
## - attr(*, "vars")= chr "schid"
## - attr(*, "labels")='data.frame': 1493 obs. of 1 variable:
## ..$ schid: int 1261219 2577568 2654555 3820542 4116387 5528988 6245079 6644493 6882957 8895141 ...
## ..- attr(*, "vars")= chr "schid"
## ..- attr(*, "labels")='data.frame': 1493 obs. of 1 variable:
## .. ..$ schid: int 1261219 2577568 2654555 3820542 4116387 5528988 6245079 6644493 6882957 8895141 ...
## .. ..- attr(*, "vars")= chr "schid"
## .. ..- attr(*, "labels")='data.frame': 1493 obs. of 1 variable:
## .. .. ..$ schid: int 1261219 2577568 2654555 3820542 4116387 5528988 6245079 6644493 6882957 8895141 ...
## .. .. ..- attr(*, "vars")= chr "schid"
## .. .. ..- attr(*, "labels")='data.frame': 1493 obs. of 1 variable:
## .. .. .. ..$ schid: int 1261219 2577568 2654555 3820542 4116387 5528988 6245079 6644493 6882957 8895141 ...
## .. .. .. ..- attr(*, "vars")= chr "schid"
## .. .. .. ..- attr(*, "drop")= logi TRUE
## .. .. ..- attr(*, "indices")=List of 1493
## .. .. .. ..$ : int 564 2057 3550
## .. .. .. ..$ : int 578 2071 3564
## .. .. .. ..$ : int 1376 2869 4362
## .. .. .. ..$ : int 489 1982 3475
## .. .. .. ..$ : int 311 1804 3297
## .. .. .. ..$ : int 1363 2856 4349
## .. .. .. ..$ : int 526 2019 3512
## .. .. .. ..$ : int 1080 2573 4066
## .. .. .. ..$ : int 854 2347 3840
## .. .. .. ..$ : int 1463 2956 4449
## .. .. .. ..$ : int 44 1537 3030
## .. .. .. ..$ : int 344 1837 3330
## .. .. .. ..$ : int 711 2204 3697
## .. .. .. ..$ : int 1265 2758 4251
## .. .. .. ..$ : int 994 2487 3980
## .. .. .. ..$ : int 1380 2873 4366
## .. .. .. ..$ : int 1281 2774 4267
## .. .. .. ..$ : int 1443 2936 4429
## .. .. .. ..$ : int 1372 2865 4358
## .. .. .. ..$ : int 325 1818 3311
## .. .. .. ..$ : int 228 1721 3214
## .. .. .. ..$ : int 267 1760 3253
## .. .. .. ..$ : int 1272 2765 4258
## .. .. .. ..$ : int 1296 2789 4282
## .. .. .. ..$ : int 315 1808 3301
## .. .. .. ..$ : int 486 1979 3472
## .. .. .. ..$ : int 658 2151 3644
## .. .. .. ..$ : int 1421 2914 4407
## .. .. .. ..$ : int 1013 2506 3999
## .. .. .. ..$ : int 1134 2627 4120
## .. .. .. ..$ : int 324 1817 3310
## .. .. .. ..$ : int 38 1531 3024
## .. .. .. ..$ : int 282 1775 3268
## .. .. .. ..$ : int 1285 2778 4271
## .. .. .. ..$ : int 340 1833 3326
## .. .. .. ..$ : int 964 2457 3950
## .. .. .. ..$ : int 805 2298 3791
## .. .. .. ..$ : int 132 1625 3118
## .. .. .. ..$ : int 1124 2617 4110
## .. .. .. ..$ : int 1027 2520 4013
## .. .. .. ..$ : int 671 2164 3657
## .. .. .. ..$ : int 791 2284 3777
## .. .. .. ..$ : int 1250 2743 4236
## .. .. .. ..$ : int 584 2077 3570
## .. .. .. ..$ : int 619 2112 3605
## .. .. .. ..$ : int 887 2380 3873
## .. .. .. ..$ : int 1477 2970 4463
## .. .. .. ..$ : int 1249 2742 4235
## .. .. .. ..$ : int 758 2251 3744
## .. .. .. ..$ : int 285 1778 3271
## .. .. .. ..$ : int 1185 2678 4171
## .. .. .. ..$ : int 1486 2979 4472
## .. .. .. ..$ : int 308 1801 3294
## .. .. .. ..$ : int 1132 2625 4118
## .. .. .. ..$ : int 1482 2975 4468
## .. .. .. ..$ : int 1377 2870 4363
## .. .. .. ..$ : int 1430 2923 4416
## .. .. .. ..$ : int 28 1521 3014
## .. .. .. ..$ : int 1420 2913 4406
## .. .. .. ..$ : int 133 1626 3119
## .. .. .. ..$ : int 1243 2736 4229
## .. .. .. ..$ : int 367 1860 3353
## .. .. .. ..$ : int 46 1539 3032
## .. .. .. ..$ : int 1404 2897 4390
## .. .. .. ..$ : int 1075 2568 4061
## .. .. .. ..$ : int 940 2433 3926
## .. .. .. ..$ : int 161 1654 3147
## .. .. .. ..$ : int 1453 2946 4439
## .. .. .. ..$ : int 225 1718 3211
## .. .. .. ..$ : int 948 2441 3934
## .. .. .. ..$ : int 641 2134 3627
## .. .. .. ..$ : int 1179 2672 4165
## .. .. .. ..$ : int 654 2147 3640
## .. .. .. ..$ : int 0 1493 2986
## .. .. .. ..$ : int 60 1553 3046
## .. .. .. ..$ : int 851 2344 3837
## .. .. .. ..$ : int 1492 2985 4478
## .. .. .. ..$ : int 673 2166 3659
## .. .. .. ..$ : int 1375 2868 4361
## .. .. .. ..$ : int 247 1740 3233
## .. .. .. ..$ : int 1449 2942 4435
## .. .. .. ..$ : int 168 1661 3154
## .. .. .. ..$ : int 1350 2843 4336
## .. .. .. ..$ : int 734 2227 3720
## .. .. .. ..$ : int 1403 2896 4389
## .. .. .. ..$ : int 45 1538 3031
## .. .. .. ..$ : int 109 1602 3095
## .. .. .. ..$ : int 602 2095 3588
## .. .. .. ..$ : int 1199 2692 4185
## .. .. .. ..$ : int 1047 2540 4033
## .. .. .. ..$ : int 314 1807 3300
## .. .. .. ..$ : int 1468 2961 4454
## .. .. .. ..$ : int 1294 2787 4280
## .. .. .. ..$ : int 594 2087 3580
## .. .. .. ..$ : int 220 1713 3206
## .. .. .. ..$ : int 723 2216 3709
## .. .. .. ..$ : int 1119 2612 4105
## .. .. .. ..$ : int 557 2050 3543
## .. .. .. ..$ : int 196 1689 3182
## .. .. .. .. [list output truncated]
## .. .. ..- attr(*, "drop")= logi TRUE
## .. .. ..- attr(*, "group_sizes")= int 3 3 3 3 3 3 3 3 3 3 ...
## .. .. ..- attr(*, "biggest_group_size")= int 3
## .. ..- attr(*, "indices")=List of 1493
## .. .. ..$ : int 564 2057 3550
## .. .. ..$ : int 578 2071 3564
## .. .. ..$ : int 1376 2869 4362
## .. .. ..$ : int 489 1982 3475
## .. .. ..$ : int 311 1804 3297
## .. .. ..$ : int 1363 2856 4349
## .. .. ..$ : int 526 2019 3512
## .. .. ..$ : int 1080 2573 4066
## .. .. ..$ : int 854 2347 3840
## .. .. ..$ : int 1463 2956 4449
## .. .. ..$ : int 44 1537 3030
## .. .. ..$ : int 344 1837 3330
## .. .. ..$ : int 711 2204 3697
## .. .. ..$ : int 1265 2758 4251
## .. .. ..$ : int 994 2487 3980
## .. .. ..$ : int 1380 2873 4366
## .. .. ..$ : int 1281 2774 4267
## .. .. ..$ : int 1443 2936 4429
## .. .. ..$ : int 1372 2865 4358
## .. .. ..$ : int 325 1818 3311
## .. .. ..$ : int 228 1721 3214
## .. .. ..$ : int 267 1760 3253
## .. .. ..$ : int 1272 2765 4258
## .. .. ..$ : int 1296 2789 4282
## .. .. ..$ : int 315 1808 3301
## .. .. ..$ : int 486 1979 3472
## .. .. ..$ : int 658 2151 3644
## .. .. ..$ : int 1421 2914 4407
## .. .. ..$ : int 1013 2506 3999
## .. .. ..$ : int 1134 2627 4120
## .. .. ..$ : int 324 1817 3310
## .. .. ..$ : int 38 1531 3024
## .. .. ..$ : int 282 1775 3268
## .. .. ..$ : int 1285 2778 4271
## .. .. ..$ : int 340 1833 3326
## .. .. ..$ : int 964 2457 3950
## .. .. ..$ : int 805 2298 3791
## .. .. ..$ : int 132 1625 3118
## .. .. ..$ : int 1124 2617 4110
## .. .. ..$ : int 1027 2520 4013
## .. .. ..$ : int 671 2164 3657
## .. .. ..$ : int 791 2284 3777
## .. .. ..$ : int 1250 2743 4236
## .. .. ..$ : int 584 2077 3570
## .. .. ..$ : int 619 2112 3605
## .. .. ..$ : int 887 2380 3873
## .. .. ..$ : int 1477 2970 4463
## .. .. ..$ : int 1249 2742 4235
## .. .. ..$ : int 758 2251 3744
## .. .. ..$ : int 285 1778 3271
## .. .. ..$ : int 1185 2678 4171
## .. .. ..$ : int 1486 2979 4472
## .. .. ..$ : int 308 1801 3294
## .. .. ..$ : int 1132 2625 4118
## .. .. ..$ : int 1482 2975 4468
## .. .. ..$ : int 1377 2870 4363
## .. .. ..$ : int 1430 2923 4416
## .. .. ..$ : int 28 1521 3014
## .. .. ..$ : int 1420 2913 4406
## .. .. ..$ : int 133 1626 3119
## .. .. ..$ : int 1243 2736 4229
## .. .. ..$ : int 367 1860 3353
## .. .. ..$ : int 46 1539 3032
## .. .. ..$ : int 1404 2897 4390
## .. .. ..$ : int 1075 2568 4061
## .. .. ..$ : int 940 2433 3926
## .. .. ..$ : int 161 1654 3147
## .. .. ..$ : int 1453 2946 4439
## .. .. ..$ : int 225 1718 3211
## .. .. ..$ : int 948 2441 3934
## .. .. ..$ : int 641 2134 3627
## .. .. ..$ : int 1179 2672 4165
## .. .. ..$ : int 654 2147 3640
## .. .. ..$ : int 0 1493 2986
## .. .. ..$ : int 60 1553 3046
## .. .. ..$ : int 851 2344 3837
## .. .. ..$ : int 1492 2985 4478
## .. .. ..$ : int 673 2166 3659
## .. .. ..$ : int 1375 2868 4361
## .. .. ..$ : int 247 1740 3233
## .. .. ..$ : int 1449 2942 4435
## .. .. ..$ : int 168 1661 3154
## .. .. ..$ : int 1350 2843 4336
## .. .. ..$ : int 734 2227 3720
## .. .. ..$ : int 1403 2896 4389
## .. .. ..$ : int 45 1538 3031
## .. .. ..$ : int 109 1602 3095
## .. .. ..$ : int 602 2095 3588
## .. .. ..$ : int 1199 2692 4185
## .. .. ..$ : int 1047 2540 4033
## .. .. ..$ : int 314 1807 3300
## .. .. ..$ : int 1468 2961 4454
## .. .. ..$ : int 1294 2787 4280
## .. .. ..$ : int 594 2087 3580
## .. .. ..$ : int 220 1713 3206
## .. .. ..$ : int 723 2216 3709
## .. .. ..$ : int 1119 2612 4105
## .. .. ..$ : int 557 2050 3543
## .. .. ..$ : int 196 1689 3182
## .. .. .. [list output truncated]
## .. ..- attr(*, "drop")= logi TRUE
## .. ..- attr(*, "group_sizes")= int 3 3 3 3 3 3 3 3 3 3 ...
## .. ..- attr(*, "biggest_group_size")= int 3
## ..- attr(*, "indices")=List of 1493
## .. ..$ : int 564 2057 3550
## .. ..$ : int 578 2071 3564
## .. ..$ : int 1376 2869 4362
## .. ..$ : int 489 1982 3475
## .. ..$ : int 311 1804 3297
## .. ..$ : int 1363 2856 4349
## .. ..$ : int 526 2019 3512
## .. ..$ : int 1080 2573 4066
## .. ..$ : int 854 2347 3840
## .. ..$ : int 1463 2956 4449
## .. ..$ : int 44 1537 3030
## .. ..$ : int 344 1837 3330
## .. ..$ : int 711 2204 3697
## .. ..$ : int 1265 2758 4251
## .. ..$ : int 994 2487 3980
## .. ..$ : int 1380 2873 4366
## .. ..$ : int 1281 2774 4267
## .. ..$ : int 1443 2936 4429
## .. ..$ : int 1372 2865 4358
## .. ..$ : int 325 1818 3311
## .. ..$ : int 228 1721 3214
## .. ..$ : int 267 1760 3253
## .. ..$ : int 1272 2765 4258
## .. ..$ : int 1296 2789 4282
## .. ..$ : int 315 1808 3301
## .. ..$ : int 486 1979 3472
## .. ..$ : int 658 2151 3644
## .. ..$ : int 1421 2914 4407
## .. ..$ : int 1013 2506 3999
## .. ..$ : int 1134 2627 4120
## .. ..$ : int 324 1817 3310
## .. ..$ : int 38 1531 3024
## .. ..$ : int 282 1775 3268
## .. ..$ : int 1285 2778 4271
## .. ..$ : int 340 1833 3326
## .. ..$ : int 964 2457 3950
## .. ..$ : int 805 2298 3791
## .. ..$ : int 132 1625 3118
## .. ..$ : int 1124 2617 4110
## .. ..$ : int 1027 2520 4013
## .. ..$ : int 671 2164 3657
## .. ..$ : int 791 2284 3777
## .. ..$ : int 1250 2743 4236
## .. ..$ : int 584 2077 3570
## .. ..$ : int 619 2112 3605
## .. ..$ : int 887 2380 3873
## .. ..$ : int 1477 2970 4463
## .. ..$ : int 1249 2742 4235
## .. ..$ : int 758 2251 3744
## .. ..$ : int 285 1778 3271
## .. ..$ : int 1185 2678 4171
## .. ..$ : int 1486 2979 4472
## .. ..$ : int 308 1801 3294
## .. ..$ : int 1132 2625 4118
## .. ..$ : int 1482 2975 4468
## .. ..$ : int 1377 2870 4363
## .. ..$ : int 1430 2923 4416
## .. ..$ : int 28 1521 3014
## .. ..$ : int 1420 2913 4406
## .. ..$ : int 133 1626 3119
## .. ..$ : int 1243 2736 4229
## .. ..$ : int 367 1860 3353
## .. ..$ : int 46 1539 3032
## .. ..$ : int 1404 2897 4390
## .. ..$ : int 1075 2568 4061
## .. ..$ : int 940 2433 3926
## .. ..$ : int 161 1654 3147
## .. ..$ : int 1453 2946 4439
## .. ..$ : int 225 1718 3211
## .. ..$ : int 948 2441 3934
## .. ..$ : int 641 2134 3627
## .. ..$ : int 1179 2672 4165
## .. ..$ : int 654 2147 3640
## .. ..$ : int 0 1493 2986
## .. ..$ : int 60 1553 3046
## .. ..$ : int 851 2344 3837
## .. ..$ : int 1492 2985 4478
## .. ..$ : int 673 2166 3659
## .. ..$ : int 1375 2868 4361
## .. ..$ : int 247 1740 3233
## .. ..$ : int 1449 2942 4435
## .. ..$ : int 168 1661 3154
## .. ..$ : int 1350 2843 4336
## .. ..$ : int 734 2227 3720
## .. ..$ : int 1403 2896 4389
## .. ..$ : int 45 1538 3031
## .. ..$ : int 109 1602 3095
## .. ..$ : int 602 2095 3588
## .. ..$ : int 1199 2692 4185
## .. ..$ : int 1047 2540 4033
## .. ..$ : int 314 1807 3300
## .. ..$ : int 1468 2961 4454
## .. ..$ : int 1294 2787 4280
## .. ..$ : int 594 2087 3580
## .. ..$ : int 220 1713 3206
## .. ..$ : int 723 2216 3709
## .. ..$ : int 1119 2612 4105
## .. ..$ : int 557 2050 3543
## .. ..$ : int 196 1689 3182
## .. .. [list output truncated]
## ..- attr(*, "drop")= logi TRUE
## ..- attr(*, "group_sizes")= int 3 3 3 3 3 3 3 3 3 3 ...
## ..- attr(*, "biggest_group_size")= int 3
## - attr(*, "indices")=List of 1493
## ..$ : int 564 2057 3550
## ..$ : int 578 2071 3564
## ..$ : int 1376 2869 4362
## ..$ : int 489 1982 3475
## ..$ : int 311 1804 3297
## ..$ : int 1363 2856 4349
## ..$ : int 526 2019 3512
## ..$ : int 1080 2573 4066
## ..$ : int 854 2347 3840
## ..$ : int 1463 2956 4449
## ..$ : int 44 1537 3030
## ..$ : int 344 1837 3330
## ..$ : int 711 2204 3697
## ..$ : int 1265 2758 4251
## ..$ : int 994 2487 3980
## ..$ : int 1380 2873 4366
## ..$ : int 1281 2774 4267
## ..$ : int 1443 2936 4429
## ..$ : int 1372 2865 4358
## ..$ : int 325 1818 3311
## ..$ : int 228 1721 3214
## ..$ : int 267 1760 3253
## ..$ : int 1272 2765 4258
## ..$ : int 1296 2789 4282
## ..$ : int 315 1808 3301
## ..$ : int 486 1979 3472
## ..$ : int 658 2151 3644
## ..$ : int 1421 2914 4407
## ..$ : int 1013 2506 3999
## ..$ : int 1134 2627 4120
## ..$ : int 324 1817 3310
## ..$ : int 38 1531 3024
## ..$ : int 282 1775 3268
## ..$ : int 1285 2778 4271
## ..$ : int 340 1833 3326
## ..$ : int 964 2457 3950
## ..$ : int 805 2298 3791
## ..$ : int 132 1625 3118
## ..$ : int 1124 2617 4110
## ..$ : int 1027 2520 4013
## ..$ : int 671 2164 3657
## ..$ : int 791 2284 3777
## ..$ : int 1250 2743 4236
## ..$ : int 584 2077 3570
## ..$ : int 619 2112 3605
## ..$ : int 887 2380 3873
## ..$ : int 1477 2970 4463
## ..$ : int 1249 2742 4235
## ..$ : int 758 2251 3744
## ..$ : int 285 1778 3271
## ..$ : int 1185 2678 4171
## ..$ : int 1486 2979 4472
## ..$ : int 308 1801 3294
## ..$ : int 1132 2625 4118
## ..$ : int 1482 2975 4468
## ..$ : int 1377 2870 4363
## ..$ : int 1430 2923 4416
## ..$ : int 28 1521 3014
## ..$ : int 1420 2913 4406
## ..$ : int 133 1626 3119
## ..$ : int 1243 2736 4229
## ..$ : int 367 1860 3353
## ..$ : int 46 1539 3032
## ..$ : int 1404 2897 4390
## ..$ : int 1075 2568 4061
## ..$ : int 940 2433 3926
## ..$ : int 161 1654 3147
## ..$ : int 1453 2946 4439
## ..$ : int 225 1718 3211
## ..$ : int 948 2441 3934
## ..$ : int 641 2134 3627
## ..$ : int 1179 2672 4165
## ..$ : int 654 2147 3640
## ..$ : int 0 1493 2986
## ..$ : int 60 1553 3046
## ..$ : int 851 2344 3837
## ..$ : int 1492 2985 4478
## ..$ : int 673 2166 3659
## ..$ : int 1375 2868 4361
## ..$ : int 247 1740 3233
## ..$ : int 1449 2942 4435
## ..$ : int 168 1661 3154
## ..$ : int 1350 2843 4336
## ..$ : int 734 2227 3720
## ..$ : int 1403 2896 4389
## ..$ : int 45 1538 3031
## ..$ : int 109 1602 3095
## ..$ : int 602 2095 3588
## ..$ : int 1199 2692 4185
## ..$ : int 1047 2540 4033
## ..$ : int 314 1807 3300
## ..$ : int 1468 2961 4454
## ..$ : int 1294 2787 4280
## ..$ : int 594 2087 3580
## ..$ : int 220 1713 3206
## ..$ : int 723 2216 3709
## ..$ : int 1119 2612 4105
## ..$ : int 557 2050 3543
## ..$ : int 196 1689 3182
## .. [list output truncated]
## - attr(*, "drop")= logi TRUE
## - attr(*, "group_sizes")= int 3 3 3 3 3 3 3 3 3 3 ...
## - attr(*, "biggest_group_size")= int 3
#시간에 따른 변화패턴: 무작위로 20명의 사례를 선정(샘플링)하였음
edudata.20 <- arrange(edudata, schid)
#str(edudata.20) # 4479 obs. of 24 variables
length.schid <- length(unique(edudata$schid)) #1493
edudata.20$myselect <- rep(sample(1:length.schid,
size=length.schid,replace=FALSE),each=3)
edudata_1.20 <- filter(edudata.20,myselect<21) %>% arrange(schid,year)
ggplot(edudata_1.20,aes(x=year,y=schvio_kk5))+
geom_line(stat='identity')+
geom_point(size=2)+
geom_smooth(se = FALSE, method = "lm")+
labs(x='시점',y='학교폭력정도')+
facet_wrap(~schid) # 결론은 일차함수 모형
***
# 분석
#다층모형 계층 설정에 따라 모형이 달라짐.
#본 연구에서는 시간변수를 1수준으로, 예방교육과 학교배경특성을 2수준으로 설정함.
# 1) 무조건 평균모형(기본모형) p.80
# 종속변수만 투입
# 학교폭력정도의 수준별 분산 분포
# 학교폭력정도에 대한 수준별 분산 분포와 그 비율을 파악하기 위해 2수준 다층모형의 무조건 모형을 추정하였다.
# library(lmerTest)
# 결과 : 랜덤효과절편 분산 15.1, 랜덤효과오차항 13.03, 고정효과절편 3.3695
edudata_model0 <- lmer(schvio_kk5~ 1+(1|schid), data=edudata)
summary(edudata_model0)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: schvio_kk5 ~ 1 + (1 | schid)
## Data: edudata
##
## REML criterion at convergence: 26449.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.4423 -0.3520 -0.2086 0.2020 10.3460
##
## Random effects:
## Groups Name Variance Std.Dev.
## schid (Intercept) 15.10 3.885
## Residual 13.03 3.610
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.3695 0.1141 1492.0000 29.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 랜덤효과 항들 추출 #ICC = 15.1/(15.1+13.03) 해석 p.85
var.cov <- data.frame(VarCorr(edudata_model0))
var.cov
## grp var1 var2 vcov sdcor
## 1 schid (Intercept) <NA> 15.09573 3.885323
## 2 Residual <NA> <NA> 13.03416 3.610285
(ICC.schid <- var.cov$vcov[1]/sum(var.cov$vcov))
## [1] 0.5366439
# 1-1)무조건 성장모형
# 굳이 해야할지 고민, 왜냐면 그냥 다층모형에 집어넣어 하면 되니깐.
# 종속변수와 1수준 시간(시점)변수만 투입
# 성장모형이라고 하는 이유는 시간(시점) 변수가 투입되어서...
# 초기치(절편값 Intercept) : 2.850
# 변화율(time 값) : 0.519
# 변화율의 상관계수 time-Intercept : -0.436
edudata_model0 <- lmer(schvio_kk5~time+(time|schid), data=edudata)
summary(edudata_model0)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: schvio_kk5 ~ time + (time | schid)
## Data: edudata
##
## REML criterion at convergence: 26316.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6883 -0.3251 -0.1756 0.2024 8.8201
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schid (Intercept) 14.723 3.837
## time 2.189 1.480 -0.09
## Residual 10.581 3.253
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.850e+00 1.256e-01 1.492e+03 22.701 < 2e-16 ***
## time 5.191e-01 7.078e-02 1.492e+03 7.334 3.65e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## time -0.436
# 랜덤효과 유의도 없음
#edudata_model0 <- lme(fixed = schvio_kk5~time, random=~time|schid, data=edudata)
# edudata_model0 선택
# edudata_model01 <- lmer(schvio_kk5~1+(time|schid), data=edudata)
# AIC(edudata_model0);BIC(edudata_model0)
# AIC(edudata_model01);BIC(edudata_model01)
####### 신뢰구간 및 그림은 탐색
#Calculating 95% CI and PI
#Calculating 95% CI and PI
edudata.new <- data.frame(time = 0:2)
edudata.new$schvio_kk5 <- predict(edudata_model0, edudata.new, re.form = NA)
m.mat <- model.matrix(terms(edudata_model0), edudata.new)
edudata.new$var <- diag(m.mat %*% vcov(edudata_model0) %*% t(m.mat)) + VarCorr(edudata_model0)$schid[1]
edudata.new$pvar <- edudata.new$var + sigma(edudata_model0)^2
edudata.new$ci.lb <- with(edudata.new, schvio_kk5 - 1.96 * sqrt(var))
edudata.new$ci.ub <- with(edudata.new, schvio_kk5 + 1.96 * sqrt(var))
edudata.new$pi.lb <- with(edudata.new, schvio_kk5 - 1.96 * sqrt(pvar))
edudata.new$pi.ub <- with(edudata.new, schvio_kk5 + 1.96 * sqrt(pvar))
edudata.new
## time schvio_kk5 var pvar ci.lb ci.ub pi.lb pi.ub
## 1 0 2.850413 14.73834 25.31905 -4.674134 10.37496 -7.011922 12.71275
## 2 1 3.369502 14.73559 25.31630 -4.154344 10.89335 -6.492298 13.23130
## 3 2 3.888591 14.74287 25.32358 -3.637111 11.41429 -5.974626 13.75181
str(edudata.new)
## 'data.frame': 3 obs. of 8 variables:
## $ time : int 0 1 2
## $ schvio_kk5: num 2.85 3.37 3.89
## $ var : num 14.7 14.7 14.7
## $ pvar : num 25.3 25.3 25.3
## $ ci.lb : num -4.67 -4.15 -3.64
## $ ci.ub : num 10.4 10.9 11.4
## $ pi.lb : num -7.01 -6.49 -5.97
## $ pi.ub : num 12.7 13.2 13.8
# Plot the average values
# ylim(c(0,1,2) 사용하여 y축 조정, alpha 도 좀 더 진하게 하면 좋을 듯.
# 구간 중 예측구간은 지우고 그래프를 그리면 더 좋을 듯
ggplot(data = edudata.new, aes(x = time, y = schvio_kk5)) +
geom_line(data = edudata, alpha = .05, aes(group = schid)) +
geom_errorbar(width = .02, colour = 'red', aes(x = time - .02, ymax = ci.ub, ymin = ci.lb)) +
#95% 신뢰구간(빨강)
geom_line(colour = 'red', linetype = 'dashed', aes(x = time-.02)) +
geom_point(size = 3.5, colour = 'red', fill = 'white', aes(x = time - .02)) +
geom_errorbar(width = .02, colour = 'blue', aes(x = time + .02, ymax = pi.ub, ymin = pi.lb)) +
#95% 예측구간(파랑)
geom_line(colour = 'blue', linetype = 'dashed', aes(x = time + .02)) +
geom_point(size = 3.5, colour = 'blue', fill = 'white', aes(x = time + .02)) +
theme_bw()
***
#######################################################
#### 2) 조건모형
#우선 모델 결정을 위해 3가지 모형을 테스트 해보았다.
#To choose the best model
#m0 <- lmer(schvio_kk5 ~ time + (time | schid), data=edudata)
m0 <- lmer(schvio_kk5 ~ 1 + (time | schid), data=edudata)
m1 <- lmer(schvio_kk5 ~ (gm.schvio_aa1+gm.schvio_aa2+gm.schvio_aa4+gm.schvio_aa7+region_1+region_2+region_3+type_1+type_2+type_3+man_woman_1+man_woman_2+man_woman_3)+
(time| schid), data=edudata)
m11 <- lmer(schvio_kk5 ~ time+(gm.schvio_aa1+gm.schvio_aa2+gm.schvio_aa4+gm.schvio_aa7+region_1+region_2+region_3+type_1+type_2+type_3+man_woman_1+man_woman_2+man_woman_3)+
(time| schid), data=edudata)
m2 <- lmer(schvio_kk5 ~ (region_1+region_2+region_3+type_1+type_2+type_3+man_woman_1+man_woman_2+man_woman_3)+
(gm.schvio_aa1 | schid) + (gm.schvio_aa2 | schid)+ (gm.schvio_aa4 | schid)+ (gm.schvio_aa7 | schid) + (time| schid), data=edudata)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge: degenerate Hessian with 1
## negative eigenvalues
## Warning: Model failed to converge with 1 negative eigenvalue: -4.7e+00
summary(m0)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: schvio_kk5 ~ 1 + (time | schid)
## Data: edudata
##
## REML criterion at convergence: 26366.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5895 -0.3392 -0.1716 0.1927 8.5401
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schid (Intercept) 14.881 3.858
## time 2.453 1.566 -0.12
## Residual 10.581 3.253
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.252 0.113 1492.000 28.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: schvio_kk5 ~ (gm.schvio_aa1 + gm.schvio_aa2 + gm.schvio_aa4 +
## gm.schvio_aa7 + region_1 + region_2 + region_3 + type_1 +
## type_2 + type_3 + man_woman_1 + man_woman_2 + man_woman_3) +
## (time | schid)
## Data: edudata
##
## REML criterion at convergence: 26133
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6383 -0.3621 -0.1294 0.1918 8.7264
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schid (Intercept) 12.218 3.495
## time 2.416 1.554 -0.19
## Residual 10.644 3.263
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.996e+00 3.225e-01 1.548e+03 6.188 7.77e-10 ***
## gm.schvio_aa1 -2.306e-04 8.019e-04 2.973e+03 -0.288 0.773672
## gm.schvio_aa2 -4.219e-04 1.221e-03 2.918e+03 -0.345 0.729822
## gm.schvio_aa4 -3.835e-02 3.955e-02 2.981e+03 -0.970 0.332335
## gm.schvio_aa7 2.002e-02 4.008e-02 2.943e+03 0.500 0.617456
## region_1 9.303e-01 2.693e-01 1.526e+03 3.455 0.000566 ***
## region_2 8.520e-01 2.614e-01 1.535e+03 3.259 0.001142 **
## type_1 7.944e-01 3.034e-01 1.555e+03 2.619 0.008914 **
## type_2 3.740e+00 3.472e-01 1.568e+03 10.772 < 2e-16 ***
## man_woman_1 -4.428e-01 2.853e-01 1.551e+03 -1.552 0.120844
## man_woman_2 -3.061e+00 2.745e-01 1.527e+03 -11.151 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) gm.s_1 gm.s_2 gm.s_4 gm.s_7 regn_1 regn_2 type_1 type_2
## gm.schvio_1 0.001
## gm.schvio_2 -0.005 -0.110
## gm.schvio_4 -0.009 -0.087 -0.031
## gm.schvio_7 -0.001 -0.032 -0.021 -0.323
## region_1 -0.472 -0.001 -0.001 0.008 -0.005
## region_2 -0.414 -0.002 -0.004 0.007 0.000 0.557
## type_1 -0.772 -0.001 0.004 0.000 -0.003 0.129 0.017
## type_2 -0.702 -0.002 0.004 -0.001 -0.002 0.100 0.045 0.701
## man_woman_1 -0.094 0.001 -0.004 -0.001 0.003 -0.242 -0.161 -0.034 0.083
## man_woman_2 0.000 0.001 0.000 -0.002 -0.001 -0.233 -0.152 -0.150 -0.070
## mn_w_1
## gm.schvio_1
## gm.schvio_2
## gm.schvio_4
## gm.schvio_7
## region_1
## region_2
## type_1
## type_2
## man_woman_1
## man_woman_2 0.283
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m11)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## schvio_kk5 ~ time + (gm.schvio_aa1 + gm.schvio_aa2 + gm.schvio_aa4 +
## gm.schvio_aa7 + region_1 + region_2 + region_3 + type_1 +
## type_2 + type_3 + man_woman_1 + man_woman_2 + man_woman_3) +
## (time | schid)
## Data: edudata
##
## REML criterion at convergence: 26083.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7157 -0.3607 -0.1371 0.1920 8.9998
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schid (Intercept) 12.040 3.470
## time 2.179 1.476 -0.16
## Residual 10.622 3.259
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.558e+00 3.278e-01 1.645e+03 4.754 2.17e-06 ***
## time 5.353e-01 7.278e-02 1.559e+03 7.355 3.07e-13 ***
## gm.schvio_aa1 -2.210e-04 7.947e-04 2.969e+03 -0.278 0.78094
## gm.schvio_aa2 4.201e-04 1.215e-03 2.910e+03 0.346 0.72955
## gm.schvio_aa4 1.239e-03 3.959e-02 2.980e+03 0.031 0.97504
## gm.schvio_aa7 4.831e-02 3.989e-02 2.936e+03 1.211 0.22604
## region_1 9.198e-01 2.692e-01 1.526e+03 3.417 0.00065 ***
## region_2 8.436e-01 2.613e-01 1.536e+03 3.228 0.00127 **
## type_1 7.911e-01 3.032e-01 1.557e+03 2.609 0.00917 **
## type_2 3.741e+00 3.470e-01 1.570e+03 10.780 < 2e-16 ***
## man_woman_1 -4.352e-01 2.852e-01 1.553e+03 -1.526 0.12723
## man_woman_2 -3.054e+00 2.744e-01 1.528e+03 -11.131 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time gm.s_1 gm.s_2 gm.s_4 gm.s_7 regn_1 regn_2 type_1
## time -0.181
## gm.schvio_1 0.001 0.000
## gm.schvio_2 -0.022 0.096 -0.109
## gm.schvio_4 -0.034 0.139 -0.086 -0.018
## gm.schvio_7 -0.019 0.100 -0.032 -0.011 -0.305
## region_1 -0.463 -0.004 -0.001 -0.002 0.007 -0.005
## region_2 -0.407 -0.003 -0.002 -0.005 0.006 0.000 0.557
## type_1 -0.759 -0.001 -0.001 0.004 0.000 -0.003 0.129 0.017
## type_2 -0.691 0.001 -0.002 0.004 -0.001 -0.002 0.100 0.045 0.701
## man_woman_1 -0.093 0.003 0.001 -0.003 -0.001 0.003 -0.242 -0.161 -0.034
## man_woman_2 -0.001 0.002 0.001 0.001 -0.002 -0.001 -0.233 -0.152 -0.150
## type_2 mn_w_1
## time
## gm.schvio_1
## gm.schvio_2
## gm.schvio_4
## gm.schvio_7
## region_1
## region_2
## type_1
## type_2
## man_woman_1 0.083
## man_woman_2 -0.070 0.283
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: schvio_kk5 ~ (region_1 + region_2 + region_3 + type_1 + type_2 +
## type_3 + man_woman_1 + man_woman_2 + man_woman_3) + (gm.schvio_aa1 |
## schid) + (gm.schvio_aa2 | schid) + (gm.schvio_aa4 | schid) +
## (gm.schvio_aa7 | schid) + (time | schid)
## Data: edudata
##
## REML criterion at convergence: 26095.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6435 -0.3621 -0.1263 0.1914 8.6655
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schid (Intercept) 1.417e+00 1.1901746
## gm.schvio_aa1 4.245e-07 0.0006516 -1.00
## schid.1 (Intercept) 3.515e+00 1.8747649
## gm.schvio_aa2 1.174e-05 0.0034268 1.00
## schid.2 (Intercept) 3.857e+00 1.9638989
## gm.schvio_aa4 9.593e-03 0.0979417 -1.00
## schid.3 (Intercept) 2.005e-01 0.4478169
## gm.schvio_aa7 4.251e-04 0.0206184 1.00
## schid.4 (Intercept) 3.312e+00 1.8199723
## time 2.408e+00 1.5518281 -0.37
## Residual 1.057e+01 3.2513053
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.0040 0.3220 1545.5556 6.224 6.23e-10 ***
## region_1 0.9408 0.2688 1520.1825 3.500 0.000479 ***
## region_2 0.8471 0.2610 1531.5453 3.245 0.001200 **
## type_1 0.7796 0.3030 1554.3505 2.573 0.010163 *
## type_2 3.7136 0.3467 1566.0385 10.712 < 2e-16 ***
## man_woman_1 -0.4231 0.2849 1548.4139 -1.485 0.137639
## man_woman_2 -3.0623 0.2740 1523.3644 -11.174 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) regn_1 regn_2 type_1 type_2 mn_w_1
## region_1 -0.472
## region_2 -0.414 0.557
## type_1 -0.772 0.129 0.017
## type_2 -0.702 0.100 0.045 0.701
## man_woman_1 -0.094 -0.242 -0.161 -0.034 0.083
## man_woman_2 0.000 -0.233 -0.152 -0.151 -0.070 0.283
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
## convergence code: 1
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
#모델 비교
anova(m0, m1, m11, m2) #m11 모델이 낫다.
## Warning in optwrap(optimizer, devfun, x@theta, lower = x@lower, calc.derivs
## = TRUE, : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Data: edudata
## Models:
## m0: schvio_kk5 ~ 1 + (time | schid)
## m1: schvio_kk5 ~ (gm.schvio_aa1 + gm.schvio_aa2 + gm.schvio_aa4 +
## m1: gm.schvio_aa7 + region_1 + region_2 + region_3 + type_1 +
## m1: type_2 + type_3 + man_woman_1 + man_woman_2 + man_woman_3) +
## m1: (time | schid)
## m11: schvio_kk5 ~ time + (gm.schvio_aa1 + gm.schvio_aa2 + gm.schvio_aa4 +
## m11: gm.schvio_aa7 + region_1 + region_2 + region_3 + type_1 +
## m11: type_2 + type_3 + man_woman_1 + man_woman_2 + man_woman_3) +
## m11: (time | schid)
## m2: schvio_kk5 ~ (region_1 + region_2 + region_3 + type_1 + type_2 +
## m2: type_3 + man_woman_1 + man_woman_2 + man_woman_3) + (gm.schvio_aa1 |
## m2: schid) + (gm.schvio_aa2 | schid) + (gm.schvio_aa4 | schid) +
## m2: (gm.schvio_aa7 | schid) + (time | schid)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 5 26374 26406 -13182 26364
## m1 15 26122 26218 -13046 26092 271.858 10 < 2.2e-16 ***
## m11 16 26070 26173 -13019 26038 53.372 1 2.76e-13 ***
## m2 23 26133 26280 -13044 26087 0.000 7 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#AIC BIC
#m11 값이 가장 작어 모델이 가장 낫다.
#위에 anova도 참고할 것
#따라서 m1 으로 모형 선택하여 분석
AIC(m0);BIC(m0)
## [1] 26376.16
## [1] 26408.2
AIC(m1);BIC(m1)
## [1] 26163.03
## [1] 26259.13
AIC(m11);BIC(m11)
## [1] 26115.14
## [1] 26217.65
AIC(m2);BIC(m2)
## [1] 26141.53
## [1] 26288.89
#########################################################
## 초기치 절편: 2.017
## 변화율 절편 : 1.558
## 초기치
m11_a <- lmer(schvio_kk5 ~ 1+(gm.schvio_aa1+gm.schvio_aa2+gm.schvio_aa4+gm.schvio_aa7+region_1+region_2+region_3+type_1+type_2+type_3+man_woman_1+man_woman_2+man_woman_3)+
(1| schid), data=edudata)
## 변화율
# time 대신 1 를 넣어야 할까? 하지만 앞에 모델 평가에서 time을 넣는 것이 지표가 좋음
m11_b <- lmer(schvio_kk5 ~ time+(gm.schvio_aa1+gm.schvio_aa2+gm.schvio_aa4+gm.schvio_aa7+region_1+region_2+region_3+type_1+type_2+type_3+man_woman_1+man_woman_2+man_woman_3)+
(time| schid), data=edudata)
summary(m11_a)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## schvio_kk5 ~ 1 + (gm.schvio_aa1 + gm.schvio_aa2 + gm.schvio_aa4 +
## gm.schvio_aa7 + region_1 + region_2 + region_3 + type_1 +
## type_2 + type_3 + man_woman_1 + man_woman_2 + man_woman_3) +
## (1 | schid)
## Data: edudata
##
## REML criterion at convergence: 26207.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.4232 -0.3993 -0.1504 0.1979 10.3501
##
## Random effects:
## Groups Name Variance Std.Dev.
## schid (Intercept) 11.80 3.435
## Residual 13.06 3.614
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.017e+00 3.246e-01 1.553e+03 6.214 6.63e-10 ***
## gm.schvio_aa1 -7.905e-05 8.026e-04 2.982e+03 -0.098 0.921543
## gm.schvio_aa2 -6.871e-04 1.212e-03 2.982e+03 -0.567 0.570669
## gm.schvio_aa4 -5.250e-02 3.975e-02 2.982e+03 -1.321 0.186688
## gm.schvio_aa7 5.060e-03 3.987e-02 2.982e+03 0.127 0.899034
## region_1 9.967e-01 2.712e-01 1.529e+03 3.675 0.000246 ***
## region_2 9.417e-01 2.633e-01 1.539e+03 3.576 0.000360 ***
## type_1 8.421e-01 3.054e-01 1.561e+03 2.758 0.005888 **
## type_2 3.802e+00 3.495e-01 1.575e+03 10.880 < 2e-16 ***
## man_woman_1 -4.775e-01 2.872e-01 1.557e+03 -1.662 0.096617 .
## man_woman_2 -3.166e+00 2.764e-01 1.530e+03 -11.454 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) gm.s_1 gm.s_2 gm.s_4 gm.s_7 regn_1 regn_2 type_1 type_2
## gm.schvio_1 0.002
## gm.schvio_2 -0.002 -0.106
## gm.schvio_4 -0.002 -0.084 -0.036
## gm.schvio_7 0.003 -0.037 -0.027 -0.335
## region_1 -0.472 0.000 0.000 0.005 -0.002
## region_2 -0.414 0.000 0.000 0.006 -0.001 0.557
## type_1 -0.772 -0.002 0.002 0.000 -0.003 0.128 0.016
## type_2 -0.702 -0.001 0.001 -0.002 -0.002 0.099 0.044 0.701
## man_woman_1 -0.094 -0.001 0.001 -0.004 0.002 -0.242 -0.161 -0.034 0.083
## man_woman_2 0.000 0.000 0.001 -0.002 0.002 -0.232 -0.151 -0.151 -0.070
## mn_w_1
## gm.schvio_1
## gm.schvio_2
## gm.schvio_4
## gm.schvio_7
## region_1
## region_2
## type_1
## type_2
## man_woman_1
## man_woman_2 0.282
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(m11_b)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## schvio_kk5 ~ time + (gm.schvio_aa1 + gm.schvio_aa2 + gm.schvio_aa4 +
## gm.schvio_aa7 + region_1 + region_2 + region_3 + type_1 +
## type_2 + type_3 + man_woman_1 + man_woman_2 + man_woman_3) +
## (time | schid)
## Data: edudata
##
## REML criterion at convergence: 26083.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7157 -0.3607 -0.1371 0.1920 8.9998
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schid (Intercept) 12.040 3.470
## time 2.179 1.476 -0.16
## Residual 10.622 3.259
## Number of obs: 4479, groups: schid, 1493
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.558e+00 3.278e-01 1.645e+03 4.754 2.17e-06 ***
## time 5.353e-01 7.278e-02 1.559e+03 7.355 3.07e-13 ***
## gm.schvio_aa1 -2.210e-04 7.947e-04 2.969e+03 -0.278 0.78094
## gm.schvio_aa2 4.201e-04 1.215e-03 2.910e+03 0.346 0.72955
## gm.schvio_aa4 1.239e-03 3.959e-02 2.980e+03 0.031 0.97504
## gm.schvio_aa7 4.831e-02 3.989e-02 2.936e+03 1.211 0.22604
## region_1 9.198e-01 2.692e-01 1.526e+03 3.417 0.00065 ***
## region_2 8.436e-01 2.613e-01 1.536e+03 3.228 0.00127 **
## type_1 7.911e-01 3.032e-01 1.557e+03 2.609 0.00917 **
## type_2 3.741e+00 3.470e-01 1.570e+03 10.780 < 2e-16 ***
## man_woman_1 -4.352e-01 2.852e-01 1.553e+03 -1.526 0.12723
## man_woman_2 -3.054e+00 2.744e-01 1.528e+03 -11.131 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time gm.s_1 gm.s_2 gm.s_4 gm.s_7 regn_1 regn_2 type_1
## time -0.181
## gm.schvio_1 0.001 0.000
## gm.schvio_2 -0.022 0.096 -0.109
## gm.schvio_4 -0.034 0.139 -0.086 -0.018
## gm.schvio_7 -0.019 0.100 -0.032 -0.011 -0.305
## region_1 -0.463 -0.004 -0.001 -0.002 0.007 -0.005
## region_2 -0.407 -0.003 -0.002 -0.005 0.006 0.000 0.557
## type_1 -0.759 -0.001 -0.001 0.004 0.000 -0.003 0.129 0.017
## type_2 -0.691 0.001 -0.002 0.004 -0.001 -0.002 0.100 0.045 0.701
## man_woman_1 -0.093 0.003 0.001 -0.003 -0.001 0.003 -0.242 -0.161 -0.034
## man_woman_2 -0.001 0.002 0.001 0.001 -0.002 -0.001 -0.233 -0.152 -0.150
## type_2 mn_w_1
## time
## gm.schvio_1
## gm.schvio_2
## gm.schvio_4
## gm.schvio_7
## region_1
## region_2
## type_1
## type_2
## man_woman_1 0.083
## man_woman_2 -0.070 0.283
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients