資料整理

載入所需要套件和資料

library(heplots)
library(reshape2)
library(ggplot2)
library(plotly)
library(plyr)
library(moments)
library(nlme)

前36筆是男生,後28筆是女生 給ID 給gender

data(VocabGrowth)
LG <- VocabGrowth
LG$id <- c(1:64)
LG$gender <- factor(c(rep(1,36),rep(2,28)))
head(LG)
##   grade8 grade9 grade10 grade11 id gender
## 1   1.75   2.60    3.76    3.68  1      1
## 2   0.90   2.47    2.44    3.43  2      1
## 3   0.80   0.93    0.40    2.27  3      1
## 4   2.42   4.15    4.56    4.21  4      1
## 5  -1.31  -1.31   -0.66   -2.22  5      1
## 6  -1.56   1.67    0.18    2.33  6      1

更動資料排列形狀

LGl <- melt(LG, variable.name ='gradename',  value.name = 'score',id=c('id','gender'))

依照gradename重新創一個數字型態的年級變項

LGl$grade <- as.numeric(substr(LGl$gradename, 6, 7))
head(LGl)
##   id gender gradename score grade
## 1  1      1    grade8  1.75     8
## 2  2      1    grade8  0.90     8
## 3  3      1    grade8  0.80     8
## 4  4      1    grade8  2.42     8
## 5  5      1    grade8 -1.31     8
## 6  6      1    grade8 -1.56     8

看四個時間點的平均數與變異數(以年級和性別為依據)

aggregate(score ~ grade+gender, data = LGl, mean)
##   grade gender    score
## 1     8      1 1.141111
## 2     9      1 2.375000
## 3    10      1 2.880278
## 4    11      1 3.536389
## 5     8      2 1.132857
## 6     9      2 2.756071
## 7    10      2 3.127143
## 8    11      2 3.381786
aggregate(score ~ grade+gender, data = LGl, sd)
##   grade gender    score
## 1     8      1 1.984037
## 2     9      1 2.348258
## 3    10      1 2.397270
## 4    11      1 2.192743
## 5     8      2 1.798256
## 6     9      2 1.705784
## 7    10      2 1.868049
## 8    11      2 1.553820

給性別名字

LGl$gender <- mapvalues(LGl$gender,
                         from = c(2,1),
                          to = c("female", "male"))

繪製不同性別四個時間點的變化

男女生均值剖面圖

pd<-position_dodge(width = .2)
p1<-ggplot(data = LGl, aes(x = gradename, y = score,group=gender, shape=gender,colour=gender,size=1.5)) +
    stat_summary(fun.data = 'mean_cl_boot', size = 3, position = pd) +
    stat_summary(fun.y = mean, geom = 'line', aes(group = gender), position = pd,size = 1.2) +
    guides(shape = guide_legend(title = '', reverse = TRUE)) +
    labs(x = '年級', y = '字彙分數') 
ggplotly(p1)

將性別分開分析

LGl_M <- subset(LGl, gender == "male")
LGl_F <- subset(LGl, gender == "female")

峰度偏態、機率密度圖(男生)

aggregate(score ~ grade, data = LGl_M, skewness)
##   grade     score
## 1     8 1.0361765
## 2     9 0.9958847
## 3    10 0.7435270
## 4    11 0.5666287
aggregate(score ~ grade, data = LGl_M, kurtosis)
##   grade    score
## 1     8 5.843873
## 2     9 4.092944
## 3    10 3.690133
## 4    11 5.210051
p2<- ggplot(LGl_M, aes(score, fill = gradename)) +
     geom_density(alpha = 0.2)+ xlim(-5, 15)+
     xlab('字彙分數')
ggplotly(p2)

峰度偏態、機率密度圖(女生)

aggregate(score ~ grade, data = LGl_F, skewness)
##   grade     score
## 1     8 0.3455779
## 2     9 0.2889140
## 3    10 1.4372096
## 4    11 0.7776863
aggregate(score ~ grade, data = LGl_F, kurtosis)
##   grade    score
## 1     8 3.394654
## 2     9 2.750384
## 3    10 5.527719
## 4    11 3.703205
p3<- ggplot(LGl_F, aes(score, fill = gradename)) +
  geom_density(alpha = 0.2)+ xlim(-5, 15)+
  xlab('字彙分數')
ggplotly(p3)

迴歸線

個別資料,畫上迴歸線並配上區間(男生)

p4<-ggplot(data = LGl_M, aes(x = grade, y = score)) +
    geom_line(aes(group = id)) +
    stat_summary(fun.data = 'mean_cl_boot') +
    stat_smooth() +
    labs(x = '年級', y = '字彙分數')
ggplotly(p4)

個別資料,畫上迴歸線並配上區間(女生)

p5<-ggplot(data = LGl_F, aes(x = grade, y = score)) +
  geom_line(aes(group = id)) +
  stat_summary(fun.data = 'mean_cl_boot') +
  stat_smooth() +
  labs(x = '年級', y = '字彙分數')
ggplotly(p5)

個別迴歸線(男生)

p6<-ggplot(data = LGl_M, aes(x = grade, y = score)) +
    stat_smooth(aes(group = id), method = 'lm', se = F, color = "#3399ff",size=0.8,alpha = 0.5) +
    geom_point(color ="#ff8080" ,alpha = 0.4,size=2.5) +
    labs(x = '年級', y = '字彙分數')
ggplotly(p6)

個別迴歸線(女生)

p6<-ggplot(data = LGl_F, aes(x = grade, y = score)) +
  stat_smooth(aes(group = id), method = 'lm', se = F, color = "#3399ff",size=0.8,alpha = 0.5) +
  geom_point(color ="#ff8080" ,alpha = 0.4,size=2.5) +
  labs(x = '年級', y = '字彙分數')
ggplotly(p6)

性別均值置中

LGl_M$grade_c <- scale(LGl_M$grade, scale = F)
LGl_F$grade_c <- scale(LGl_F$grade, scale = F)

將每個人迴歸線的截距、斜率記下來

m1 <- lmList(score ~ grade_c | id, data = LGl_M)
f1 <- lmList(score ~ grade_c | id, data = LGl_F)

看一下離群值(男生)

p7<-dataEllipse(coef(m1)[, 1], coef(m1)[, 2],  pch = 19,levels=c(.68, .95),
            col = 'black', id.n=2, xlab = '截距估計值', 
            ylab = '斜率估計值')

看一下離群值(女生)

p8<-dataEllipse(coef(f1)[, 1], coef(f1)[, 2],  pch = 19,levels=c(.68, .95),
                col = 'black', id.n=2, xlab = '截距估計值', 
                ylab = '斜率估計值')

(未完)