– sex(M=nam,F=nữ) – extroversion (extrov) hướng ngoại, giá trị 1-10 – teacher experience (texp) kinh nghiệm của thầy cô (số năm 2-25)
pop = read.csv("~/Desktop/R GS Tuấn 2020/TDTU Datasets for 2020 Workshop/popular.csv", header = T)
head(pop)
## pupil class extrov sex texp popular popteach Zextrav Zsex Ztexp
## 1 1 1 5 F 24 6.3 6 -0.1703149 0.9888125 1.486153
## 2 2 1 7 M 24 4.9 5 1.4140098 -1.0108084 1.486153
## 3 3 1 4 F 24 5.3 6 -0.9624772 0.9888125 1.486153
## 4 4 1 3 F 24 4.7 5 -1.7546396 0.9888125 1.486153
## 5 5 1 5 F 24 6.0 6 -0.1703149 0.9888125 1.486153
## 6 6 1 4 M 24 4.7 5 -0.9624772 -1.0108084 1.486153
## Zpopular Zpopteach Cextrav Ctexp Csex
## 1 0.8850133 0.66905609 -0.215 9.737 0.5
## 2 -0.1276291 -0.04308451 1.785 9.737 -0.5
## 3 0.1616973 0.66905609 -1.215 9.737 0.5
## 4 -0.2722923 -0.04308451 -2.215 9.737 0.5
## 5 0.6680185 0.66905609 -0.215 9.737 0.5
## 6 -0.2722923 -0.04308451 -1.215 9.737 -0.5
names(pop)
## [1] "pupil" "class" "extrov" "sex" "texp" "popular"
## [7] "popteach" "Zextrav" "Zsex" "Ztexp" "Zpopular" "Zpopteach"
## [13] "Cextrav" "Ctexp" "Csex"
library(ggplot2)
ggplot(data=pop, aes(x = factor(class), fill=class)) + geom_bar() + coord_flip() + theme(legend.position="none")
# Phân tích sơ khởi bằng biểu đồ # Dùng các package
library(ggplot2); library(gridExtra)
p1 = ggplot(data=pop, aes(popular)) + geom_histogram(fill="blue", col="white")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Effect of sex
p2 = ggplot(data=pop, aes(x=sex, y=popular, col=sex)) + geom_boxplot() + geom_jitter(alpha=0.1) + theme(legend.position="none")
p2
# Effect of extroversion
p3 = ggplot(data=pop, aes(x=factor(extrov), y=popular, col=factor(extrov))) + geom_boxplot() + geom_jitter(alpha=0.1) + theme(legend.position="none")
p3
# Association with Texp
p4 = ggplot(data=pop, aes(x=texp, y=popular, col=sex)) + geom_point() + geom_smooth()
p4
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# xếp các biểu đồ chung 1 trang
grid.arrange(p1, p2, p3, p4, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Mục tiêu: đánh giá ảnh hưởng hay tác động của các yếu tố đa tầng - Đánh giá ảnh hưởng của các yếu tố giới tính (sex), texp, extrov đến điểm popular - Ước tính các nguồn phương sai # Mô hình 1: ảnh hưởng của class (lớp học) # Package lme4, lmerTest - Tác giả Douglas Bates (lme4) - Hàm chính là lmer lmer(Y ~ fixed effects | random effects, data=xxx, REML=T/F) - Fixed effects và random effects cách biệt bởi dấu | # Triển khai qua lme4
library(lme4); library(lmerTest)
## Loading required package: Matrix
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
pop$class = as.factor(pop$class)
m1 = lmer(popular ~ 1 + (1 |
class), data=pop)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + (1 | class)
## Data: pop
##
## REML criterion at convergence: 6330.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5655 -0.6975 0.0020 0.6758 3.3175
##
## Random effects:
## Groups Name Variance Std.Dev.
## class (Intercept) 0.7021 0.8379
## Residual 1.2218 1.1053
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.07786 0.08739 98.90973 58.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 = lmer(popular ~ 1 + Cextrav
+ (1 | class), data=pop)
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + Cextrav + (1 | class)
## Data: pop
##
## REML criterion at convergence: 5832.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0644 -0.7267 0.0165 0.7088 3.3587
##
## Random effects:
## Groups Name Variance Std.Dev.
## class (Intercept) 0.8406 0.9168
## Residual 0.9304 0.9646
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.078e+00 9.421e-02 9.830e+01 53.90 <2e-16 ***
## Cextrav 4.863e-01 2.015e-02 1.965e+03 24.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Cextrav 0.000
m3 = lmer(popular ~ 1 + Cextrav
+ (1 + Cextrav | class),
data=pop)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + Cextrav + (1 + Cextrav | class)
## Data: pop
##
## REML criterion at convergence: 5779.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1961 -0.7291 0.0146 0.6816 3.2217
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## class (Intercept) 0.89178 0.9443
## Cextrav 0.02599 0.1612 -0.88
## Residual 0.89492 0.9460
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.03127 0.09702 97.07723 51.86 <2e-16 ***
## Cextrav 0.49286 0.02546 89.69832 19.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Cextrav -0.552
m4 = lmer(popular ~ 1 + Cextrav
+ Csex + (1 + Cextrav + Csex |
class), data=pop)
## boundary (singular) fit: see ?isSingular
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + Cextrav + Csex + (1 + Cextrav + Csex | class)
## Data: pop
##
## REML criterion at convergence: 4870.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.01902 -0.64955 -0.01055 0.67099 3.11761
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## class (Intercept) 0.673941 0.82094
## Cextrav 0.029847 0.17276 -0.73
## Csex 0.005365 0.07325 -0.65 -0.03
## Residual 0.552883 0.74356
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.02051 0.08411 95.86769 59.69 <2e-16 ***
## Cextrav 0.44300 0.02343 91.03920 18.91 <2e-16 ***
## Csex 1.24483 0.03728 504.13792 33.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cextrv
## Cextrav -0.533
## Csex -0.127 -0.065
## convergence code: 0
## boundary (singular) fit: see ?isSingular
m5 = lmer(popular ~ 1 + Cextrav
+ Csex + Ctexp + (1 + Cextrav +
Csex | class), data=pop)
## boundary (singular) fit: see ?isSingular
summary(m5)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + Cextrav + Csex + Ctexp + (1 + Cextrav + Csex |
## class)
## Data: pop
##
## REML criterion at convergence: 4833.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1642 -0.6554 -0.0246 0.6711 2.9571
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## class (Intercept) 0.284590 0.53347
## Cextrav 0.034741 0.18639 -0.09
## Csex 0.002404 0.04903 -0.98 -0.09
## Residual 0.551435 0.74259
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.022e+00 5.645e-02 9.507e+01 88.96 <2e-16 ***
## Cextrav 4.529e-01 2.465e-02 9.620e+01 18.38 <2e-16 ***
## Csex 1.251e+00 3.694e-02 9.860e+02 33.86 <2e-16 ***
## Ctexp 8.951e-02 8.618e-03 1.013e+02 10.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cextrv Csex
## Cextrav -0.060
## Csex -0.126 -0.066
## Ctexp -0.024 0.089 -0.039
## convergence code: 0
## boundary (singular) fit: see ?isSingular
m5 = lmer(popular ~ 1 + Cextrav + Csex + Ctexp + (1 + Cextrav +
Csex | class), data=pop)
## boundary (singular) fit: see ?isSingular
library(sjPlot)
## #refugeeswelcome
plot_model(m5, show.values=TRUE, show.p=TRUE)
# Mô hình 6: ảnh hưởng của yếu tốTexp, extrov và sex
m6 = lmer(popular ~ 1 + Cextrav + Csex + Ctexp + Cextrav*Ctexp +
Csex*Ctexp + (1 + Cextrav + Csex | class), data=pop)
summary(m6)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + Cextrav + Csex + Ctexp + Cextrav * Ctexp + Csex *
## Ctexp + (1 + Cextrav + Csex | class)
## Data: pop
##
## REML criterion at convergence: 4786.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.12083 -0.64848 -0.01953 0.68701 3.05063
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## class (Intercept) 0.286818 0.53555
## Cextrav 0.005584 0.07473 -0.09
## Csex 0.004137 0.06432 -0.85 -0.45
## Residual 0.552079 0.74302
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.991462 0.056718 99.879784 88.005 < 2e-16 ***
## Cextrav 0.450533 0.017465 82.668382 25.796 < 2e-16 ***
## Csex 1.240357 0.036858 572.273755 33.652 < 2e-16 ***
## Ctexp 0.097171 0.008705 103.174528 11.163 < 2e-16 ***
## Cextrav:Ctexp -0.024704 0.002574 71.879585 -9.597 1.66e-14 ***
## Csex:Ctexp -0.001773 0.005924 617.089874 -0.299 0.765
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cextrv Csex Ctexp Cxtr:C
## Cextrav -0.037
## Csex -0.140 -0.113
## Ctexp -0.021 0.120 -0.038
## Cextrv:Ctxp 0.120 0.015 0.032 -0.119
## Csex:Ctexp -0.040 0.030 -0.015 -0.145 -0.142
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.