week1

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(lme4)

## 載入需要的套件：Matrix

library(lmerTest)

## 
## 載入套件：'lmerTest'

## 下列物件被遮斷自 'package:lme4':
## 
##     lmer

## 下列物件被遮斷自 'package:stats':
## 
##     step

library(performance)
library(ggplot2)

## Import the data
library(haven)
hsb1 <- read_sav("HSB/hsb1.sav")
View(hsb1)

## First six rows
head(hsb1)

## # A tibble: 6 × 5
##   schoolid minority                 female        ses mathach
##   <chr>    <dbl+lbl>                <dbl+lbl>   <dbl>   <dbl>
## 1 1224     0 [non-minority student] 1 [female] -1.53     5.88
## 2 1224     0 [non-minority student] 1 [female] -0.588   19.7 
## 3 1224     0 [non-minority student] 0 [male]   -0.528   20.3 
## 4 1224     0 [non-minority student] 0 [male]   -0.668    8.78
## 5 1224     0 [non-minority student] 0 [male]   -0.158   17.9 
## 6 1224     0 [non-minority student] 0 [male]    0.022    4.58

## Column means
sub.dat <- hsb1[, c("ses", "mathach")]
head(sub.dat)  # just to confirm I have the right variables

## # A tibble: 6 × 2
##      ses mathach
##    <dbl>   <dbl>
## 1 -1.53     5.88
## 2 -0.588   19.7 
## 3 -0.528   20.3 
## 4 -0.668    8.78
## 5 -0.158   17.9 
## 6  0.022    4.58

colMeans(sub.dat)

##          ses      mathach 
## 1.433542e-04 1.274785e+01

##Descriptive stat
summary(sub.dat)

##       ses               mathach      
##  Min.   :-3.758000   Min.   :-2.832  
##  1st Qu.:-0.538000   1st Qu.: 7.275  
##  Median : 0.002000   Median :13.131  
##  Mean   : 0.000143   Mean   :12.748  
##  3rd Qu.: 0.602000   3rd Qu.:18.317  
##  Max.   : 2.692000   Max.   :24.993

## Correlation matrices
cor(sub.dat)  # correlation matrix

##               ses   mathach
## ses     1.0000000 0.3607556
## mathach 0.3607556 1.0000000

## Creating new variables
sub.dat$sesXmath <- sub.dat$ses * sub.dat$mathach
head(sub.dat)

## # A tibble: 6 × 3
##      ses mathach sesXmath
##    <dbl>   <dbl>    <dbl>
## 1 -1.53     5.88   -8.98 
## 2 -0.588   19.7   -11.6  
## 3 -0.528   20.3   -10.7  
## 4 -0.668    8.78   -5.87 
## 5 -0.158   17.9    -2.83 
## 6  0.022    4.58    0.101

# Graph shows positive relationship between SES and math achievement 
ggplot(data = hsb1, aes(x = ses, y = mathach)) +  geom_point(shape = 1) + theme_bw()

# Regression with lm using the full dataset without considering nesting structure
# Coefficients, standard errors, and t-test results
fit.ses <- lm(mathach ~ ses, data = hsb1)
summary(fit.ses)

## 
## Call:
## lm(formula = mathach ~ ses, data = hsb1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.4382  -4.7580   0.2334   5.0649  15.9007 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.74740    0.07569  168.42   <2e-16 ***
## ses          3.18387    0.09712   32.78   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.416 on 7183 degrees of freedom
## Multiple R-squared:  0.1301, Adjusted R-squared:   0.13 
## F-statistic:  1075 on 1 and 7183 DF,  p-value: < 2.2e-16

coef(summary(fit.ses))

##             Estimate Std. Error   t value      Pr(>|t|)
## (Intercept) 12.74740 0.07568644 168.42379  0.000000e+00
## ses          3.18387 0.09712093  32.78253 8.714539e-220

sigma(fit.ses)^2 # error variance

## [1] 41.15882

summary(fit.ses)$r.squared # r-squared

## [1] 0.1301446

#Your First MLM: Random effects ANOVA
re_ANOVA <- lmer(mathach ~ (1|schoolid), data = hsb1)
summary(re_ANOVA)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mathach ~ (1 | schoolid)
##    Data: hsb1
## 
## REML criterion at convergence: 47116.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0631 -0.7539  0.0267  0.7606  2.7426 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolid (Intercept)  8.614   2.935   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  schoolid, 160
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  12.6370     0.2444 156.6473   51.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

icc(re_ANOVA)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.180
##   Unadjusted ICC: 0.180

week1

2023-09-19

R Markdown